Actual source code: vscatcusp.c

petsc-3.5.4 2015-05-23
Report Typos and Errors
  1: #include <petsc-private/isimpl.h>
  2: #include <petsc-private/vecimpl.h>             /*I "petscvec.h" I*/

  4: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*);

  8: /*@
  9:    VecScatterInitializeForGPU - Initializes a generalized scatter from one vector to
 10:  another for GPU based computation.  Effectively, this function creates all the
 11:  necessary indexing buffers and work vectors needed to move data only those data points
 12:  in a vector which need to be communicated across ranks. This is done at the first time
 13:  this function is called. Currently, this only used in the context of the parallel
 14:  SpMV call in MatMult_MPIAIJCUSP (in mpi/mpicusp/mpiaijcusp.cu) or MatMult_MPIAIJCUSPARSE
 15:  (in mpi/mpicusparse/mpiaijcusparse.cu). This function is executed before the call to
 16:  MatMult. This enables the memory transfers to be overlapped with the MatMult SpMV kernel
 17:  call.

 19:    Input Parameters:
 20: +  inctx - scatter context generated by VecScatterCreate()
 21: .  x - the vector from which we scatter
 22: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
 23:     SCATTER_FORWARD or SCATTER_REVERSE

 25:   Level: intermediate

 27: .seealso: VecScatterCreate(), VecScatterEnd()
 28: @*/
 29: PetscErrorCode  VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
 30: {
 31:   VecScatter_MPI_General *to,*from;
 32:   PetscErrorCode         ierr;
 33:   PetscInt               i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;

 36:   if (mode & SCATTER_REVERSE) {
 37:     to     = (VecScatter_MPI_General*)inctx->fromdata;
 38:     from   = (VecScatter_MPI_General*)inctx->todata;
 39:   } else {
 40:     to     = (VecScatter_MPI_General*)inctx->todata;
 41:     from   = (VecScatter_MPI_General*)inctx->fromdata;
 42:   }
 43:   bs           = to->bs;
 44:   nrecvs       = from->n;
 45:   nsends       = to->n;
 46:   indices      = to->indices;
 47:   sstartsSends = to->starts;
 48:   sstartsRecvs = from->starts;
 49:   if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED && (nsends>0 || nrecvs>0)) {
 50:     if (!inctx->spptr) {
 51:       PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
 52:       PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
 53:       /* Here we create indices for both the senders and receivers. */
 54:       PetscMalloc1(ns,&tindicesSends);
 55:       PetscMalloc1(nr,&tindicesRecvs);

 57:       PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
 58:       PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));

 60:       PetscSortRemoveDupsInt(&ns,tindicesSends);
 61:       PetscSortRemoveDupsInt(&nr,tindicesRecvs);

 63:       PetscMalloc1(bs*ns,&sindicesSends);
 64:       PetscMalloc1(from->bs*nr,&sindicesRecvs);

 66:       /* sender indices */
 67:       for (i=0; i<ns; i++) {
 68:         for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
 69:       }
 70:       PetscFree(tindicesSends);

 72:       /* receiver indices */
 73:       for (i=0; i<nr; i++) {
 74:         for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
 75:       }
 76:       PetscFree(tindicesRecvs);

 78:       /* create GPU indices, work vectors, ... */
 79:       VecScatterCUSPIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUSPIndices*)&inctx->spptr);
 80:       PetscFree(sindicesSends);
 81:       PetscFree(sindicesRecvs);
 82:     }
 83:   }
 84:   return(0);
 85: }

 89: /*@
 90:    VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
 91:  another for GPU based computation. Effectively, this function resets the temporary
 92:  buffer flags. Currently, this only used in the context of the parallel SpMV call in
 93:  in MatMult_MPIAIJCUSP (in mpi/mpicusp/mpiaijcusp.cu) or MatMult_MPIAIJCUSPARSE
 94:  (in mpi/mpicusparse/mpiaijcusparse.cu). Once the MatMultAdd is finished,
 95:  the GPU temporary buffers used for messaging are no longer valid.

 97:    Input Parameters:
 98: +  inctx - scatter context generated by VecScatterCreate()

100:   Level: intermediate

102: @*/
103: PetscErrorCode  VecScatterFinalizeForGPU(VecScatter inctx)
104: {
106:   return(0);
107: }