Actual source code: vscatcusp.c
petsc-3.5.4 2015-05-23
1: #include <petsc-private/isimpl.h>
2: #include <petsc-private/vecimpl.h> /*I "petscvec.h" I*/
4: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*);
8: /*@
9: VecScatterInitializeForGPU - Initializes a generalized scatter from one vector to
10: another for GPU based computation. Effectively, this function creates all the
11: necessary indexing buffers and work vectors needed to move data only those data points
12: in a vector which need to be communicated across ranks. This is done at the first time
13: this function is called. Currently, this only used in the context of the parallel
14: SpMV call in MatMult_MPIAIJCUSP (in mpi/mpicusp/mpiaijcusp.cu) or MatMult_MPIAIJCUSPARSE
15: (in mpi/mpicusparse/mpiaijcusparse.cu). This function is executed before the call to
16: MatMult. This enables the memory transfers to be overlapped with the MatMult SpMV kernel
17: call.
19: Input Parameters:
20: + inctx - scatter context generated by VecScatterCreate()
21: . x - the vector from which we scatter
22: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
23: SCATTER_FORWARD or SCATTER_REVERSE
25: Level: intermediate
27: .seealso: VecScatterCreate(), VecScatterEnd()
28: @*/
29: PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
30: {
31: VecScatter_MPI_General *to,*from;
32: PetscErrorCode ierr;
33: PetscInt i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
36: if (mode & SCATTER_REVERSE) {
37: to = (VecScatter_MPI_General*)inctx->fromdata;
38: from = (VecScatter_MPI_General*)inctx->todata;
39: } else {
40: to = (VecScatter_MPI_General*)inctx->todata;
41: from = (VecScatter_MPI_General*)inctx->fromdata;
42: }
43: bs = to->bs;
44: nrecvs = from->n;
45: nsends = to->n;
46: indices = to->indices;
47: sstartsSends = to->starts;
48: sstartsRecvs = from->starts;
49: if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED && (nsends>0 || nrecvs>0)) {
50: if (!inctx->spptr) {
51: PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
52: PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
53: /* Here we create indices for both the senders and receivers. */
54: PetscMalloc1(ns,&tindicesSends);
55: PetscMalloc1(nr,&tindicesRecvs);
57: PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
58: PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));
60: PetscSortRemoveDupsInt(&ns,tindicesSends);
61: PetscSortRemoveDupsInt(&nr,tindicesRecvs);
63: PetscMalloc1(bs*ns,&sindicesSends);
64: PetscMalloc1(from->bs*nr,&sindicesRecvs);
66: /* sender indices */
67: for (i=0; i<ns; i++) {
68: for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
69: }
70: PetscFree(tindicesSends);
72: /* receiver indices */
73: for (i=0; i<nr; i++) {
74: for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
75: }
76: PetscFree(tindicesRecvs);
78: /* create GPU indices, work vectors, ... */
79: VecScatterCUSPIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUSPIndices*)&inctx->spptr);
80: PetscFree(sindicesSends);
81: PetscFree(sindicesRecvs);
82: }
83: }
84: return(0);
85: }
89: /*@
90: VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
91: another for GPU based computation. Effectively, this function resets the temporary
92: buffer flags. Currently, this only used in the context of the parallel SpMV call in
93: in MatMult_MPIAIJCUSP (in mpi/mpicusp/mpiaijcusp.cu) or MatMult_MPIAIJCUSPARSE
94: (in mpi/mpicusparse/mpiaijcusparse.cu). Once the MatMultAdd is finished,
95: the GPU temporary buffers used for messaging are no longer valid.
97: Input Parameters:
98: + inctx - scatter context generated by VecScatterCreate()
100: Level: intermediate
102: @*/
103: PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
104: {
106: return(0);
107: }