Actual source code: veccuda.c

petsc-master 2019-12-13
Report Typos and Errors
  1: /*
  2:  Implementation of the sequential cuda vectors.

  4:  This file contains the code that can be compiled with a C
  5:  compiler.  The companion file veccuda2.cu contains the code that
  6:  must be compiled with nvcc or a C++ compiler.
  7:  */

  9: #define PETSC_SKIP_SPINLOCK

 11: #include <petscconf.h>
 12: #include <petsc/private/vecimpl.h>          /*I <petscvec.h> I*/
 13:  #include <../src/vec/vec/impls/dvecimpl.h>
 14:  #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>

 16: /*
 17:     Allocates space for the vector array on the Host if it does not exist.
 18:     Does NOT change the PetscCUDAFlag for the vector
 19:     Does NOT zero the CUDA array
 20:  */
 21: PetscErrorCode VecCUDAAllocateCheckHost(Vec v)
 22: {
 24:   PetscScalar    *array;
 25:   Vec_Seq        *s = (Vec_Seq*)v->data;
 26:   PetscInt       n = v->map->n;

 29:   if (!s) {
 30:     PetscNewLog((PetscObject)v,&s);
 31:     v->data = s;
 32:   }
 33:   if (!s->array) {
 34:     PetscMalloc1(n,&array);
 35:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
 36:     s->array           = array;
 37:     s->array_allocated = array;
 38:     if (v->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
 39:       v->offloadmask = PETSC_OFFLOAD_CPU;
 40:     }
 41:   }
 42:   return(0);
 43: }

 45: PetscErrorCode VecCopy_SeqCUDA_Private(Vec xin,Vec yin)
 46: {
 47:   PetscScalar       *ya;
 48:   const PetscScalar *xa;
 49:   PetscErrorCode    ierr;

 52:   VecCUDAAllocateCheckHost(xin);
 53:   VecCUDAAllocateCheckHost(yin);
 54:   if (xin != yin) {
 55:     VecGetArrayRead(xin,&xa);
 56:     VecGetArray(yin,&ya);
 57:     PetscArraycpy(ya,xa,xin->map->n);
 58:     VecRestoreArrayRead(xin,&xa);
 59:     VecRestoreArray(yin,&ya);
 60:   }
 61:   return(0);
 62: }

 64: PetscErrorCode VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r)
 65: {
 67:   PetscInt       n = xin->map->n,i;
 68:   PetscScalar    *xx;

 71:   VecGetArray(xin,&xx);
 72:   for (i=0; i<n; i++) { PetscRandomGetValue(r,&xx[i]); }
 73:   VecRestoreArray(xin,&xx);
 74:   return(0);
 75: }

 77: PetscErrorCode VecDestroy_SeqCUDA_Private(Vec v)
 78: {
 79:   Vec_Seq        *vs = (Vec_Seq*)v->data;

 83:   PetscObjectSAWsViewOff(v);
 84: #if defined(PETSC_USE_LOG)
 85:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
 86: #endif
 87:   if (vs) {
 88:     if (vs->array_allocated) { PetscFree(vs->array_allocated); }
 89:     PetscFree(vs);
 90:   }
 91:   return(0);
 92: }

 94: PetscErrorCode VecResetArray_SeqCUDA_Private(Vec vin)
 95: {
 96:   Vec_Seq *v = (Vec_Seq*)vin->data;

 99:   v->array         = v->unplacedarray;
100:   v->unplacedarray = 0;
101:   return(0);
102: }

104: PetscErrorCode VecCUDAAllocateCheck_Public(Vec v)
105: {

109:   VecCUDAAllocateCheck(v);
110:   return(0);
111: }

113: PetscErrorCode VecCUDACopyToGPU_Public(Vec v)
114: {

118:   VecCUDACopyToGPU(v);
119:   return(0);
120: }

122: /*
123:     VecCUDACopyToGPUSome_Public - Copies certain entries down to the GPU from the CPU of a vector

125:    Input Parameters:
126:  +  v    - the vector
127:  .  ci   - the requested indices, this should be created with CUDAIndicesCreate()
128:  -  mode - vec scatter mode used in VecScatterBegin/End
129: */
130: PetscErrorCode VecCUDACopyToGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)
131: {

135:   VecCUDACopyToGPUSome(v,ci,mode);
136:   return(0);
137: }

139: /*
140:   VecCUDACopyFromGPUSome_Public - Copies certain entries up to the CPU from the GPU of a vector

142:   Input Parameters:
143:  +  v    - the vector
144:  .  ci   - the requested indices, this should be created with CUDAIndicesCreate()
145:  -  mode - vec scatter mode used in VecScatterBegin/End
146: */
147: PetscErrorCode VecCUDACopyFromGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)
148: {

152:   VecCUDACopyFromGPUSome(v,ci,mode);
153:   return(0);
154: }

156: PetscErrorCode VecSetRandom_SeqCUDA(Vec xin,PetscRandom r)
157: {

161:   VecSetRandom_SeqCUDA_Private(xin,r);
162:   xin->offloadmask = PETSC_OFFLOAD_CPU;
163:   return(0);
164: }

166: PetscErrorCode VecResetArray_SeqCUDA(Vec vin)
167: {

171:   VecCUDACopyFromGPU(vin);
172:   VecResetArray_SeqCUDA_Private(vin);
173:   vin->offloadmask = PETSC_OFFLOAD_CPU;
174:   return(0);
175: }

177: PetscErrorCode VecPlaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
178: {

182:   VecCUDACopyFromGPU(vin);
183:   VecPlaceArray_Seq(vin,a);
184:   vin->offloadmask = PETSC_OFFLOAD_CPU;
185:   return(0);
186: }

188: PetscErrorCode VecReplaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
189: {

193:   VecCUDACopyFromGPU(vin);
194:   VecReplaceArray_Seq(vin,a);
195:   vin->offloadmask = PETSC_OFFLOAD_CPU;
196:   return(0);
197: }

199: /*@
200:  VecCreateSeqCUDA - Creates a standard, sequential array-style vector.

202:  Collective

204:  Input Parameter:
205:  +  comm - the communicator, should be PETSC_COMM_SELF
206:  -  n - the vector length

208:  Output Parameter:
209:  .  v - the vector

211:  Notes:
212:  Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
213:  same type as an existing vector.

215:  Level: intermediate

217:  .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
218:  @*/
219: PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm,PetscInt n,Vec *v)
220: {

224:   VecCreate(comm,v);
225:   VecSetSizes(*v,n,n);
226:   VecSetType(*v,VECSEQCUDA);
227:   return(0);
228: }

230: PetscErrorCode VecDuplicate_SeqCUDA(Vec win,Vec *V)
231: {

235:   VecCreateSeqCUDA(PetscObjectComm((PetscObject)win),win->map->n,V);
236:   PetscLayoutReference(win->map,&(*V)->map);
237:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
238:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
239:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
240:   return(0);
241: }

243: PetscErrorCode VecCreate_SeqCUDA(Vec V)
244: {

248:   PetscLayoutSetUp(V->map);
249:   VecCUDAAllocateCheck(V);
250:   VecCreate_SeqCUDA_Private(V,((Vec_CUDA*)V->spptr)->GPUarray_allocated);
251:   VecCUDAAllocateCheckHost(V);
252:   VecSet(V,0.0);
253:   VecSet_Seq(V,0.0);
254:   V->offloadmask = PETSC_OFFLOAD_BOTH;
255:   return(0);
256: }

258: /*@C
259:    VecCreateSeqCUDAWithArray - Creates a CUDA sequential array-style vector,
260:    where the user provides the array space to store the vector values. The array
261:    provided must be a GPU array.

263:    Collective

265:    Input Parameter:
266: +  comm - the communicator, should be PETSC_COMM_SELF
267: .  bs - the block size
268: .  n - the vector length
269: -  array - GPU memory where the vector elements are to be stored.

271:    Output Parameter:
272: .  V - the vector

274:    Notes:
275:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
276:    same type as an existing vector.

278:    If the user-provided array is NULL, then VecCUDAPlaceArray() can be used
279:    at a later stage to SET the array for storing the vector values.

281:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
282:    The user should not free the array until the vector is destroyed.

284:    Level: intermediate

286: .seealso: VecCreateMPICUDAWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
287:           VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
288:           VecCreateMPIWithArray()
289: @*/
290: PetscErrorCode  VecCreateSeqCUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec *V)
291: {
293:   PetscMPIInt    size;

296:   VecCreate(comm,V);
297:   VecSetSizes(*V,n,n);
298:   VecSetBlockSize(*V,bs);
299:   MPI_Comm_size(comm,&size);
300:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
301:   VecCreate_SeqCUDA_Private(*V,array);
302:   return(0);
303: }

305: PetscErrorCode VecGetArrayWrite_SeqCUDA(Vec v,PetscScalar **vv)
306: {

310:   VecCUDAAllocateCheckHost(v);
311:   v->offloadmask = PETSC_OFFLOAD_CPU;
312:   *vv = *((PetscScalar**)v->data);
313:   return(0);
314: }

316: PetscErrorCode VecPinToCPU_SeqCUDA(Vec V,PetscBool pin)
317: {

321:   V->pinnedtocpu = pin;
322:   if (pin) {
323:     VecCUDACopyFromGPU(V);
324:     V->offloadmask                 = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
325:     V->ops->dot                    = VecDot_Seq;
326:     V->ops->norm                   = VecNorm_Seq;
327:     V->ops->tdot                   = VecTDot_Seq;
328:     V->ops->scale                  = VecScale_Seq;
329:     V->ops->copy                   = VecCopy_Seq;
330:     V->ops->set                    = VecSet_Seq;
331:     V->ops->swap                   = VecSwap_Seq;
332:     V->ops->axpy                   = VecAXPY_Seq;
333:     V->ops->axpby                  = VecAXPBY_Seq;
334:     V->ops->axpbypcz               = VecAXPBYPCZ_Seq;
335:     V->ops->pointwisemult          = VecPointwiseMult_Seq;
336:     V->ops->pointwisedivide        = VecPointwiseDivide_Seq;
337:     V->ops->setrandom              = VecSetRandom_Seq;
338:     V->ops->dot_local              = VecDot_Seq;
339:     V->ops->tdot_local             = VecTDot_Seq;
340:     V->ops->norm_local             = VecNorm_Seq;
341:     V->ops->mdot_local             = VecMDot_Seq;
342:     V->ops->mtdot_local            = VecMTDot_Seq;
343:     V->ops->maxpy                  = VecMAXPY_Seq;
344:     V->ops->mdot                   = VecMDot_Seq;
345:     V->ops->mtdot                  = VecMTDot_Seq;
346:     V->ops->aypx                   = VecAYPX_Seq;
347:     V->ops->waxpy                  = VecWAXPY_Seq;
348:     V->ops->dotnorm2               = NULL;
349:     V->ops->placearray             = VecPlaceArray_Seq;
350:     V->ops->replacearray           = VecReplaceArray_Seq;
351:     V->ops->resetarray             = VecResetArray_Seq;
352:     V->ops->duplicate              = VecDuplicate_Seq;
353:     V->ops->conjugate              = VecConjugate_Seq;
354:     V->ops->getlocalvector         = NULL;
355:     V->ops->restorelocalvector     = NULL;
356:     V->ops->getlocalvectorread     = NULL;
357:     V->ops->restorelocalvectorread = NULL;
358:     V->ops->getarraywrite          = NULL;
359:   } else {
360:     V->ops->dot                    = VecDot_SeqCUDA;
361:     V->ops->norm                   = VecNorm_SeqCUDA;
362:     V->ops->tdot                   = VecTDot_SeqCUDA;
363:     V->ops->scale                  = VecScale_SeqCUDA;
364:     V->ops->copy                   = VecCopy_SeqCUDA;
365:     V->ops->set                    = VecSet_SeqCUDA;
366:     V->ops->swap                   = VecSwap_SeqCUDA;
367:     V->ops->axpy                   = VecAXPY_SeqCUDA;
368:     V->ops->axpby                  = VecAXPBY_SeqCUDA;
369:     V->ops->axpbypcz               = VecAXPBYPCZ_SeqCUDA;
370:     V->ops->pointwisemult          = VecPointwiseMult_SeqCUDA;
371:     V->ops->pointwisedivide        = VecPointwiseDivide_SeqCUDA;
372:     V->ops->setrandom              = VecSetRandom_SeqCUDA;
373:     V->ops->dot_local              = VecDot_SeqCUDA;
374:     V->ops->tdot_local             = VecTDot_SeqCUDA;
375:     V->ops->norm_local             = VecNorm_SeqCUDA;
376:     V->ops->mdot_local             = VecMDot_SeqCUDA;
377:     V->ops->maxpy                  = VecMAXPY_SeqCUDA;
378:     V->ops->mdot                   = VecMDot_SeqCUDA;
379:     V->ops->aypx                   = VecAYPX_SeqCUDA;
380:     V->ops->waxpy                  = VecWAXPY_SeqCUDA;
381:     V->ops->dotnorm2               = VecDotNorm2_SeqCUDA;
382:     V->ops->placearray             = VecPlaceArray_SeqCUDA;
383:     V->ops->replacearray           = VecReplaceArray_SeqCUDA;
384:     V->ops->resetarray             = VecResetArray_SeqCUDA;
385:     V->ops->destroy                = VecDestroy_SeqCUDA;
386:     V->ops->duplicate              = VecDuplicate_SeqCUDA;
387:     V->ops->conjugate              = VecConjugate_SeqCUDA;
388:     V->ops->getlocalvector         = VecGetLocalVector_SeqCUDA;
389:     V->ops->restorelocalvector     = VecRestoreLocalVector_SeqCUDA;
390:     V->ops->getlocalvectorread     = VecGetLocalVector_SeqCUDA;
391:     V->ops->restorelocalvectorread = VecRestoreLocalVector_SeqCUDA;
392:     V->ops->getarraywrite          = VecGetArrayWrite_SeqCUDA;
393:   }
394:   return(0);
395: }

397: PetscErrorCode VecCreate_SeqCUDA_Private(Vec V,const PetscScalar *array)
398: {
400:   Vec_CUDA       *veccuda;
401:   PetscMPIInt    size;

404:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
405:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQCUDA on more than one process");
406:   VecCreate_Seq_Private(V,0);
407:   PetscObjectChangeTypeName((PetscObject)V,VECSEQCUDA);
408:   VecPinToCPU_SeqCUDA(V,PETSC_FALSE);
409:   V->ops->pintocpu = VecPinToCPU_SeqCUDA;

411:   /* Later, functions check for the Vec_CUDA structure existence, so do not create it without array */
412:   if (array) {
413:     if (!V->spptr) {
414:       PetscMalloc(sizeof(Vec_CUDA),&V->spptr);
415:       veccuda = (Vec_CUDA*)V->spptr;
416:       veccuda->stream = 0; /* using default stream */
417:       veccuda->GPUarray_allocated = 0;
418:       veccuda->hostDataRegisteredAsPageLocked = PETSC_FALSE;
419:       V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
420:     }
421:     veccuda = (Vec_CUDA*)V->spptr;
422:     veccuda->GPUarray = (PetscScalar*)array;
423:   }

425:   return(0);
426: }