Actual source code: vecviennacl.cxx

petsc-master 2020-10-23
Report Typos and Errors
  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc/private/vecimpl.h>
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include <vector>

 12: #include "viennacl/linalg/inner_prod.hpp"
 13: #include "viennacl/linalg/norm_1.hpp"
 14: #include "viennacl/linalg/norm_2.hpp"
 15: #include "viennacl/linalg/norm_inf.hpp"

 17: #ifdef VIENNACL_WITH_OPENCL
 18: #include "viennacl/ocl/backend.hpp"
 19: #endif


 22: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 23: {

 28:   *a   = 0;
 29:   VecViennaCLCopyToGPU(v);
 30:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 31:   ViennaCLWaitForGPU();
 32:   return(0);
 33: }

 35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 36: {

 41:   v->offloadmask = PETSC_OFFLOAD_GPU;

 43:   PetscObjectStateIncrease((PetscObject)v);
 44:   return(0);
 45: }

 47: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 48: {

 53:   *a   = 0;
 54:   VecViennaCLCopyToGPU(v);
 55:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 56:   ViennaCLWaitForGPU();
 57:   return(0);
 58: }

 60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 61: {
 64:   return(0);
 65: }

 67: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 68: {

 73:   *a   = 0;
 74:   VecViennaCLAllocateCheck(v);
 75:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 76:   ViennaCLWaitForGPU();
 77:   return(0);
 78: }

 80: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 81: {

 86:   v->offloadmask = PETSC_OFFLOAD_GPU;

 88:   PetscObjectStateIncrease((PetscObject)v);
 89:   return(0);
 90: }



 94: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 95: {
 96:   PetscErrorCode       ierr;
 97:   char                 string[20];
 98:   PetscBool            flg,flg_cuda,flg_opencl,flg_openmp;

101:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102:   PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);
103:   if (flg) {
104:     try {
105:       PetscStrcasecmp(string,"cuda",&flg_cuda);
106:       PetscStrcasecmp(string,"opencl",&flg_opencl);
107:       PetscStrcasecmp(string,"openmp",&flg_openmp);

109:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111: #if defined(PETSC_HAVE_CUDA)
112:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113: #endif
114: #if defined(PETSC_HAVE_OPENCL)
115:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116: #endif
117:       else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118:     } catch (std::exception const & ex) {
119:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120:     }
121:   }

123: #if defined(PETSC_HAVE_OPENCL)
124:   /* ViennaCL OpenCL device type configuration */
125:   PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);
126:   if (flg) {
127:     try {
128:       PetscStrcasecmp(string,"cpu",&flg);
129:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

131:       PetscStrcasecmp(string,"gpu",&flg);
132:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

134:       PetscStrcasecmp(string,"accelerator",&flg);
135:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136:     } catch (std::exception const & ex) {
137:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138:     }
139:   }
140: #endif

142:   /* Print available backends */
143:   PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
144:   if (flg) {
145:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
146: #if defined(PETSC_HAVE_CUDA)
147:     PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
148: #endif
149: #if defined(PETSC_HAVE_OPENCL)
150:     PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
151: #endif
152: #if defined(PETSC_HAVE_OPENMP)
153:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
154: #else
155:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
156: #endif
157:     PetscPrintf(PETSC_COMM_WORLD, "\n");

159:     /* Print selected backends */
160:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: ");
161: #if defined(PETSC_HAVE_CUDA)
162:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163:       PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
164:     }
165: #endif
166: #if defined(PETSC_HAVE_OPENCL)
167:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168:       PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
169:     }
170: #endif
171: #if defined(PETSC_HAVE_OPENMP)
172:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
174:     }
175: #else
176:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
178:     }
179: #endif
180:     PetscPrintf(PETSC_COMM_WORLD, "\n");
181:   }
182:   return(0);
183: }

185: /*
186:     Allocates space for the vector array on the Host if it does not exist.
187:     Does NOT change the PetscViennaCLFlag for the vector
188:     Does NOT zero the ViennaCL array
189:  */
190: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191: {
193:   PetscScalar    *array;
194:   Vec_Seq        *s;
195:   PetscInt       n = v->map->n;

198:   s    = (Vec_Seq*)v->data;
199:   VecViennaCLAllocateCheck(v);
200:   if (s->array == 0) {
201:     PetscMalloc1(n,&array);
202:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
203:     s->array           = array;
204:     s->array_allocated = array;
205:   }
206:   return(0);
207: }


210: /*
211:     Allocates space for the vector array on the GPU if it does not exist.
212:     Does NOT change the PetscViennaCLFlag for the vector
213:     Does NOT zero the ViennaCL array

215:  */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219:   if (!v->spptr) {
220:     try {
221:       v->spptr                            = new Vec_ViennaCL;
222:       ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
223:       ((Vec_ViennaCL*)v->spptr)->GPUarray = ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;

225:     } catch(std::exception const & ex) {
226:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
227:     }
228:   }
229:   return(0);
230: }


233: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
234: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
235: {

240:   VecViennaCLAllocateCheck(v);
241:   if (v->map->n > 0) {
242:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
243:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
244:       try {
245:         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
246:         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
247:         ViennaCLWaitForGPU();
248:       } catch(std::exception const & ex) {
249:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
250:       }
251:       PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
252:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
253:       v->offloadmask = PETSC_OFFLOAD_BOTH;
254:     }
255:   }
256:   return(0);
257: }



261: /*
262:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
263: */
264: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
265: {

270:   VecViennaCLAllocateCheckHost(v);
271:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
272:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
273:     try {
274:       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
275:       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
276:       ViennaCLWaitForGPU();
277:     } catch(std::exception const & ex) {
278:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
279:     }
280:     PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
281:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
282:     v->offloadmask = PETSC_OFFLOAD_BOTH;
283:   }
284:   return(0);
285: }


288: /* Copy on CPU */
289: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
290: {
291:   PetscScalar       *ya;
292:   const PetscScalar *xa;
293:   PetscErrorCode    ierr;

296:   VecViennaCLAllocateCheckHost(xin);
297:   VecViennaCLAllocateCheckHost(yin);
298:   if (xin != yin) {
299:     VecGetArrayRead(xin,&xa);
300:     VecGetArray(yin,&ya);
301:     PetscArraycpy(ya,xa,xin->map->n);
302:     VecRestoreArrayRead(xin,&xa);
303:     VecRestoreArray(yin,&ya);
304:   }
305:   return(0);
306: }

308: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
309: {
311:   PetscInt       n = xin->map->n,i;
312:   PetscScalar    *xx;

315:   VecGetArray(xin,&xx);
316:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
317:   VecRestoreArray(xin,&xx);
318:   return(0);
319: }

321: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
322: {
323:   Vec_Seq        *vs = (Vec_Seq*)v->data;

327:   PetscObjectSAWsViewOff(v);
328: #if defined(PETSC_USE_LOG)
329:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
330: #endif
331:   if (vs->array_allocated) { PetscFree(vs->array_allocated); }
332:   PetscFree(vs);
333:   return(0);
334: }

336: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
337: {
338:   Vec_Seq *v = (Vec_Seq*)vin->data;

341:   v->array         = v->unplacedarray;
342:   v->unplacedarray = 0;
343:   return(0);
344: }


347: /*MC
348:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

350:    Options Database Keys:
351: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

353:   Level: beginner

355: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
356: M*/


359: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
360: {
361:   const ViennaCLVector  *xgpu;
362:   ViennaCLVector        *ygpu;
363:   PetscErrorCode        ierr;

366:   VecViennaCLGetArrayRead(xin,&xgpu);
367:   VecViennaCLGetArray(yin,&ygpu);
368:   PetscLogGpuTimeBegin();
369:   try {
370:     if (alpha != 0.0 && xin->map->n > 0) {
371:       *ygpu = *xgpu + alpha * *ygpu;
372:       PetscLogGpuFlops(2.0*yin->map->n);
373:     } else {
374:       *ygpu = *xgpu;
375:     }
376:     ViennaCLWaitForGPU();
377:   } catch(std::exception const & ex) {
378:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
379:   }
380:   PetscLogGpuTimeEnd();
381:   VecViennaCLRestoreArrayRead(xin,&xgpu);
382:   VecViennaCLRestoreArray(yin,&ygpu);
383:   return(0);
384: }


387: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
388: {
389:   const ViennaCLVector  *xgpu;
390:   ViennaCLVector        *ygpu;
391:   PetscErrorCode        ierr;

394:   if (alpha != 0.0 && xin->map->n > 0) {
395:     VecViennaCLGetArrayRead(xin,&xgpu);
396:     VecViennaCLGetArray(yin,&ygpu);
397:     PetscLogGpuTimeBegin();
398:     try {
399:       *ygpu += alpha * *xgpu;
400:       ViennaCLWaitForGPU();
401:     } catch(std::exception const & ex) {
402:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
403:     }
404:     PetscLogGpuTimeEnd();
405:     VecViennaCLRestoreArrayRead(xin,&xgpu);
406:     VecViennaCLRestoreArray(yin,&ygpu);
407:     PetscLogGpuFlops(2.0*yin->map->n);
408:   }
409:   return(0);
410: }


413: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
414: {
415:   const ViennaCLVector  *xgpu,*ygpu;
416:   ViennaCLVector        *wgpu;
417:   PetscErrorCode        ierr;

420:   if (xin->map->n > 0) {
421:     VecViennaCLGetArrayRead(xin,&xgpu);
422:     VecViennaCLGetArrayRead(yin,&ygpu);
423:     VecViennaCLGetArrayWrite(win,&wgpu);
424:     PetscLogGpuTimeBegin();
425:     try {
426:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
427:       ViennaCLWaitForGPU();
428:     } catch(std::exception const & ex) {
429:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
430:     }
431:     PetscLogGpuTimeEnd();
432:     PetscLogGpuFlops(win->map->n);
433:     VecViennaCLRestoreArrayRead(xin,&xgpu);
434:     VecViennaCLRestoreArrayRead(yin,&ygpu);
435:     VecViennaCLRestoreArrayWrite(win,&wgpu);
436:   }
437:   return(0);
438: }


441: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
442: {
443:   const ViennaCLVector  *xgpu,*ygpu;
444:   ViennaCLVector        *wgpu;
445:   PetscErrorCode        ierr;

448:   if (alpha == 0.0 && xin->map->n > 0) {
449:     VecCopy_SeqViennaCL(yin,win);
450:   } else {
451:     VecViennaCLGetArrayRead(xin,&xgpu);
452:     VecViennaCLGetArrayRead(yin,&ygpu);
453:     VecViennaCLGetArrayWrite(win,&wgpu);
454:     PetscLogGpuTimeBegin();
455:     if (alpha == 1.0) {
456:       try {
457:         *wgpu = *ygpu + *xgpu;
458:       } catch(std::exception const & ex) {
459:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
460:       }
461:       PetscLogGpuFlops(win->map->n);
462:     } else if (alpha == -1.0) {
463:       try {
464:         *wgpu = *ygpu - *xgpu;
465:       } catch(std::exception const & ex) {
466:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
467:       }
468:       PetscLogGpuFlops(win->map->n);
469:     } else {
470:       try {
471:         *wgpu = *ygpu + alpha * *xgpu;
472:       } catch(std::exception const & ex) {
473:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
474:       }
475:       PetscLogGpuFlops(2*win->map->n);
476:     }
477:     ViennaCLWaitForGPU();
478:     PetscLogGpuTimeEnd();
479:     VecViennaCLRestoreArrayRead(xin,&xgpu);
480:     VecViennaCLRestoreArrayRead(yin,&ygpu);
481:     VecViennaCLRestoreArrayWrite(win,&wgpu);
482:   }
483:   return(0);
484: }


487: /*
488:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
489:  *
490:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
491:  * hence there is an iterated application of these until the final result is obtained
492:  */
493: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
494: {
496:   PetscInt       j;

499:   for (j = 0; j < nv; ++j) {
500:     if (j+1 < nv) {
501:       VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
502:       ++j;
503:     } else {
504:       VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
505:     }
506:   }
507:   ViennaCLWaitForGPU();
508:   return(0);
509: }


512: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
513: {
514:   const ViennaCLVector  *xgpu,*ygpu;
515:   PetscErrorCode        ierr;

518:   if (xin->map->n > 0) {
519:     VecViennaCLGetArrayRead(xin,&xgpu);
520:     VecViennaCLGetArrayRead(yin,&ygpu);
521:     PetscLogGpuTimeBegin();
522:     try {
523:       *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
524:     } catch(std::exception const & ex) {
525:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
526:     }
527:     ViennaCLWaitForGPU();
528:     PetscLogGpuTimeEnd();
529:     if (xin->map->n >0) {
530:       PetscLogGpuFlops(2.0*xin->map->n-1);
531:     }
532:     VecViennaCLRestoreArrayRead(xin,&xgpu);
533:     VecViennaCLRestoreArrayRead(yin,&ygpu);
534:   } else *z = 0.0;
535:   return(0);
536: }



540: /*
541:  * Operation z[j] = dot(x, y[j])
542:  *
543:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
544:  */
545: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
546: {
547:   PetscErrorCode       ierr;
548:   PetscInt             n = xin->map->n,i;
549:   const ViennaCLVector *xgpu,*ygpu;
550:   Vec                  *yyin = (Vec*)yin;
551:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

554:   if (xin->map->n > 0) {
555:     VecViennaCLGetArrayRead(xin,&xgpu);
556:     for (i=0; i<nv; i++) {
557:       VecViennaCLGetArrayRead(yyin[i],&ygpu);
558:       ygpu_array[i] = ygpu;
559:     }
560:     PetscLogGpuTimeBegin();
561:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
562:     ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
563:     viennacl::copy(result.begin(), result.end(), z);
564:     for (i=0; i<nv; i++) {
565:       VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
566:     }
567:     ViennaCLWaitForGPU();
568:     PetscLogGpuTimeEnd();
569:     VecViennaCLRestoreArrayRead(xin,&xgpu);
570:     PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
571:   } else {
572:     for (i=0; i<nv; i++) z[i] = 0.0;
573:   }
574:   return(0);
575: }

577: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
578: {

582:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
583:   VecMDot_SeqViennaCL(xin,nv,yin,z);
584:   ViennaCLWaitForGPU();
585:   return(0);
586: }


589: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
590: {
591:   ViennaCLVector *xgpu;

595:   if (xin->map->n > 0) {
596:     VecViennaCLGetArrayWrite(xin,&xgpu);
597:     PetscLogGpuTimeBegin();
598:     try {
599:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
600:       ViennaCLWaitForGPU();
601:     } catch(std::exception const & ex) {
602:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
603:     }
604:     PetscLogGpuTimeEnd();
605:     VecViennaCLRestoreArrayWrite(xin,&xgpu);
606:   }
607:   return(0);
608: }

610: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
611: {
612:   ViennaCLVector *xgpu;

616:   if (alpha == 0.0 && xin->map->n > 0) {
617:     VecSet_SeqViennaCL(xin,alpha);
618:     PetscLogGpuFlops(xin->map->n);
619:   } else if (alpha != 1.0 && xin->map->n > 0) {
620:     VecViennaCLGetArray(xin,&xgpu);
621:     PetscLogGpuTimeBegin();
622:     try {
623:       *xgpu *= alpha;
624:       ViennaCLWaitForGPU();
625:     } catch(std::exception const & ex) {
626:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
627:     }
628:     PetscLogGpuTimeEnd();
629:     VecViennaCLRestoreArray(xin,&xgpu);
630:     PetscLogGpuFlops(xin->map->n);
631:   }
632:   return(0);
633: }


636: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
637: {

641:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
642:   VecDot_SeqViennaCL(xin, yin, z);
643:   ViennaCLWaitForGPU();
644:   return(0);
645: }


648: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
649: {
650:   const ViennaCLVector *xgpu;
651:   ViennaCLVector       *ygpu;
652:   PetscErrorCode       ierr;

655:   if (xin != yin && xin->map->n > 0) {
656:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
657:       VecViennaCLGetArrayRead(xin,&xgpu);
658:       VecViennaCLGetArrayWrite(yin,&ygpu);
659:       PetscLogGpuTimeBegin();
660:       try {
661:         *ygpu = *xgpu;
662:         ViennaCLWaitForGPU();
663:       } catch(std::exception const & ex) {
664:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
665:       }
666:       PetscLogGpuTimeEnd();
667:       VecViennaCLRestoreArrayRead(xin,&xgpu);
668:       VecViennaCLRestoreArrayWrite(yin,&ygpu);

670:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
671:       /* copy in CPU if we are on the CPU*/
672:       VecCopy_SeqViennaCL_Private(xin,yin);
673:       ViennaCLWaitForGPU();
674:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
675:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
676:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
677:         /* copy in CPU */
678:         VecCopy_SeqViennaCL_Private(xin,yin);
679:         ViennaCLWaitForGPU();
680:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
681:         /* copy in GPU */
682:         VecViennaCLGetArrayRead(xin,&xgpu);
683:         VecViennaCLGetArrayWrite(yin,&ygpu);
684:         PetscLogGpuTimeBegin();
685:         try {
686:           *ygpu = *xgpu;
687:           ViennaCLWaitForGPU();
688:         } catch(std::exception const & ex) {
689:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
690:         }
691:         PetscLogGpuTimeEnd();
692:         VecViennaCLRestoreArrayRead(xin,&xgpu);
693:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
694:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
695:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
696:            default to copy in GPU (this is an arbitrary choice) */
697:         VecViennaCLGetArrayRead(xin,&xgpu);
698:         VecViennaCLGetArrayWrite(yin,&ygpu);
699:         PetscLogGpuTimeBegin();
700:         try {
701:           *ygpu = *xgpu;
702:           ViennaCLWaitForGPU();
703:         } catch(std::exception const & ex) {
704:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
705:         }
706:         PetscLogGpuTimeEnd();
707:         VecViennaCLRestoreArrayRead(xin,&xgpu);
708:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
709:       } else {
710:         VecCopy_SeqViennaCL_Private(xin,yin);
711:         ViennaCLWaitForGPU();
712:       }
713:     }
714:   }
715:   return(0);
716: }


719: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
720: {
722:   ViennaCLVector *xgpu,*ygpu;

725:   if (xin != yin && xin->map->n > 0) {
726:     VecViennaCLGetArray(xin,&xgpu);
727:     VecViennaCLGetArray(yin,&ygpu);
728:     PetscLogGpuTimeBegin();
729:     try {
730:       viennacl::swap(*xgpu, *ygpu);
731:       ViennaCLWaitForGPU();
732:     } catch(std::exception const & ex) {
733:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
734:     }
735:     PetscLogGpuTimeEnd();
736:     VecViennaCLRestoreArray(xin,&xgpu);
737:     VecViennaCLRestoreArray(yin,&ygpu);
738:   }
739:   return(0);
740: }


743: // y = alpha * x + beta * y
744: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
745: {
746:   PetscErrorCode       ierr;
747:   PetscScalar          a = alpha,b = beta;
748:   const ViennaCLVector *xgpu;
749:   ViennaCLVector       *ygpu;

752:   if (a == 0.0 && xin->map->n > 0) {
753:     VecScale_SeqViennaCL(yin,beta);
754:   } else if (b == 1.0 && xin->map->n > 0) {
755:     VecAXPY_SeqViennaCL(yin,alpha,xin);
756:   } else if (a == 1.0 && xin->map->n > 0) {
757:     VecAYPX_SeqViennaCL(yin,beta,xin);
758:   } else if (b == 0.0 && xin->map->n > 0) {
759:     VecViennaCLGetArrayRead(xin,&xgpu);
760:     VecViennaCLGetArray(yin,&ygpu);
761:     PetscLogGpuTimeBegin();
762:     try {
763:       *ygpu = *xgpu * alpha;
764:       ViennaCLWaitForGPU();
765:     } catch(std::exception const & ex) {
766:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
767:     }
768:     PetscLogGpuTimeEnd();
769:     PetscLogGpuFlops(xin->map->n);
770:     VecViennaCLRestoreArrayRead(xin,&xgpu);
771:     VecViennaCLRestoreArray(yin,&ygpu);
772:   } else if (xin->map->n > 0) {
773:     VecViennaCLGetArrayRead(xin,&xgpu);
774:     VecViennaCLGetArray(yin,&ygpu);
775:     PetscLogGpuTimeBegin();
776:     try {
777:       *ygpu = *xgpu * alpha + *ygpu * beta;
778:       ViennaCLWaitForGPU();
779:     } catch(std::exception const & ex) {
780:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
781:     }
782:     PetscLogGpuTimeEnd();
783:     VecViennaCLRestoreArrayRead(xin,&xgpu);
784:     VecViennaCLRestoreArray(yin,&ygpu);
785:     PetscLogGpuFlops(3.0*xin->map->n);
786:   }
787:   return(0);
788: }


791: /* operation  z = alpha * x + beta *y + gamma *z*/
792: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
793: {
794:   PetscErrorCode       ierr;
795:   PetscInt             n = zin->map->n;
796:   const ViennaCLVector *xgpu,*ygpu;
797:   ViennaCLVector       *zgpu;

800:   VecViennaCLGetArrayRead(xin,&xgpu);
801:   VecViennaCLGetArrayRead(yin,&ygpu);
802:   VecViennaCLGetArray(zin,&zgpu);
803:   if (alpha == 0.0 && xin->map->n > 0) {
804:     PetscLogGpuTimeBegin();
805:     try {
806:       if (beta == 0.0) {
807:         *zgpu = gamma * *zgpu;
808:         ViennaCLWaitForGPU();
809:         PetscLogGpuFlops(1.0*n);
810:       } else if (gamma == 0.0) {
811:         *zgpu = beta * *ygpu;
812:         ViennaCLWaitForGPU();
813:         PetscLogGpuFlops(1.0*n);
814:       } else {
815:         *zgpu = beta * *ygpu + gamma * *zgpu;
816:         ViennaCLWaitForGPU();
817:         PetscLogGpuFlops(3.0*n);
818:       }
819:     } catch(std::exception const & ex) {
820:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
821:     }
822:     PetscLogGpuTimeEnd();
823:     PetscLogGpuFlops(3.0*n);
824:   } else if (beta == 0.0 && xin->map->n > 0) {
825:     PetscLogGpuTimeBegin();
826:     try {
827:       if (gamma == 0.0) {
828:         *zgpu = alpha * *xgpu;
829:         ViennaCLWaitForGPU();
830:         PetscLogGpuFlops(1.0*n);
831:       } else {
832:         *zgpu = alpha * *xgpu + gamma * *zgpu;
833:         ViennaCLWaitForGPU();
834:         PetscLogGpuFlops(3.0*n);
835:       }
836:     } catch(std::exception const & ex) {
837:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
838:     }
839:     PetscLogGpuTimeEnd();
840:   } else if (gamma == 0.0 && xin->map->n > 0) {
841:     PetscLogGpuTimeBegin();
842:     try {
843:       *zgpu = alpha * *xgpu + beta * *ygpu;
844:       ViennaCLWaitForGPU();
845:     } catch(std::exception const & ex) {
846:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
847:     }
848:     PetscLogGpuTimeEnd();
849:     PetscLogGpuFlops(3.0*n);
850:   } else if (xin->map->n > 0) {
851:     PetscLogGpuTimeBegin();
852:     try {
853:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
854:       if (gamma != 1.0)
855:         *zgpu *= gamma;
856:       *zgpu += alpha * *xgpu + beta * *ygpu;
857:       ViennaCLWaitForGPU();
858:     } catch(std::exception const & ex) {
859:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
860:     }
861:     PetscLogGpuTimeEnd();
862:     VecViennaCLRestoreArray(zin,&zgpu);
863:     VecViennaCLRestoreArrayRead(xin,&xgpu);
864:     VecViennaCLRestoreArrayRead(yin,&ygpu);
865:     PetscLogGpuFlops(5.0*n);
866:   }
867:   return(0);
868: }

870: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
871: {
872:   PetscErrorCode       ierr;
873:   PetscInt             n = win->map->n;
874:   const ViennaCLVector *xgpu,*ygpu;
875:   ViennaCLVector       *wgpu;

878:   if (xin->map->n > 0) {
879:     VecViennaCLGetArrayRead(xin,&xgpu);
880:     VecViennaCLGetArrayRead(yin,&ygpu);
881:     VecViennaCLGetArray(win,&wgpu);
882:     PetscLogGpuTimeBegin();
883:     try {
884:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
885:       ViennaCLWaitForGPU();
886:     } catch(std::exception const & ex) {
887:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
888:     }
889:     PetscLogGpuTimeEnd();
890:     VecViennaCLRestoreArrayRead(xin,&xgpu);
891:     VecViennaCLRestoreArrayRead(yin,&ygpu);
892:     VecViennaCLRestoreArray(win,&wgpu);
893:     PetscLogGpuFlops(n);
894:   }
895:   return(0);
896: }


899: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
900: {
901:   PetscErrorCode       ierr;
902:   PetscInt             n = xin->map->n;
903:   PetscBLASInt         bn;
904:   const ViennaCLVector *xgpu;

907:   if (xin->map->n > 0) {
908:     PetscBLASIntCast(n,&bn);
909:     VecViennaCLGetArrayRead(xin,&xgpu);
910:     if (type == NORM_2 || type == NORM_FROBENIUS) {
911:       PetscLogGpuTimeBegin();
912:       try {
913:         *z = viennacl::linalg::norm_2(*xgpu);
914:         ViennaCLWaitForGPU();
915:       } catch(std::exception const & ex) {
916:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
917:       }
918:       PetscLogGpuTimeEnd();
919:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
920:     } else if (type == NORM_INFINITY) {
921:       PetscLogGpuTimeBegin();
922:       try {
923:         *z = viennacl::linalg::norm_inf(*xgpu);
924:         ViennaCLWaitForGPU();
925:       } catch(std::exception const & ex) {
926:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
927:       }
928:       PetscLogGpuTimeEnd();
929:     } else if (type == NORM_1) {
930:       PetscLogGpuTimeBegin();
931:       try {
932:         *z = viennacl::linalg::norm_1(*xgpu);
933:         ViennaCLWaitForGPU();
934:       } catch(std::exception const & ex) {
935:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
936:       }
937:       PetscLogGpuTimeEnd();
938:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
939:     } else if (type == NORM_1_AND_2) {
940:       PetscLogGpuTimeBegin();
941:       try {
942:         *z     = viennacl::linalg::norm_1(*xgpu);
943:         *(z+1) = viennacl::linalg::norm_2(*xgpu);
944:         ViennaCLWaitForGPU();
945:       } catch(std::exception const & ex) {
946:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
947:       }
948:       PetscLogGpuTimeEnd();
949:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
950:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
951:     }
952:     VecViennaCLRestoreArrayRead(xin,&xgpu);
953:   } else if (type == NORM_1_AND_2) {
954:     *z      = 0.0;
955:     *(z+1)  = 0.0;
956:   } else *z = 0.0;
957:   return(0);
958: }


961: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
962: {

966:   VecSetRandom_SeqViennaCL_Private(xin,r);
967:   xin->offloadmask = PETSC_OFFLOAD_CPU;
968:   return(0);
969: }

971: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
972: {

977:   VecViennaCLCopyFromGPU(vin);
978:   VecResetArray_SeqViennaCL_Private(vin);
979:   vin->offloadmask = PETSC_OFFLOAD_CPU;
980:   return(0);
981: }

983: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
984: {

989:   VecViennaCLCopyFromGPU(vin);
990:   VecPlaceArray_Seq(vin,a);
991:   vin->offloadmask = PETSC_OFFLOAD_CPU;
992:   return(0);
993: }

995: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
996: {

1001:   VecViennaCLCopyFromGPU(vin);
1002:   VecReplaceArray_Seq(vin,a);
1003:   vin->offloadmask = PETSC_OFFLOAD_CPU;
1004:   return(0);
1005: }


1008: /*@C
1009:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

1011:    Collective

1013:    Input Parameter:
1014: +  comm - the communicator, should be PETSC_COMM_SELF
1015: -  n - the vector length

1017:    Output Parameter:
1018: .  V - the vector

1020:    Notes:
1021:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1022:    same type as an existing vector.

1024:    Level: intermediate

1026: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1027: @*/
1028: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1029: {

1033:   VecCreate(comm,v);
1034:   VecSetSizes(*v,n,n);
1035:   VecSetType(*v,VECSEQVIENNACL);
1036:   return(0);
1037: }


1040: /*@C
1041:    VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
1042:    where the user provides the array space to store the vector values.

1044:    Collective

1046:    Input Parameter:
1047: +  comm - the communicator, should be PETSC_COMM_SELF
1048: .  bs - the block size
1049: .  n - the vector length
1050: -  array - viennacl array where the vector elements are to be stored.

1052:    Output Parameter:
1053: .  V - the vector

1055:    Notes:
1056:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1057:    same type as an existing vector.

1059:    If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
1060:    at a later stage to SET the array for storing the vector values.

1062:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1063:    The user should not free the array until the vector is destroyed.

1065:    Level: intermediate

1067: .seealso: VecCreateMPIViennaCLWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
1068:           VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
1069:           VecCreateMPIWithArray()
1070: @*/
1071: PETSC_EXTERN PetscErrorCode  VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector* array,Vec *V)
1072: {
1074:   PetscMPIInt    size;

1077:   VecCreate(comm,V);
1078:   VecSetSizes(*V,n,n);
1079:   VecSetBlockSize(*V,bs);
1080:   MPI_Comm_size(comm,&size);
1081:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1082:   VecCreate_SeqViennaCL_Private(*V,array);
1083:   return(0);
1084: }

1086: /*@C
1087:    VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
1088:    the user provides the array space to store the vector values.

1090:    Collective

1092:    Input Parameter:
1093: +  comm - the communicator, should be PETSC_COMM_SELF
1094: .  bs - the block size
1095: .  n - the vector length
1096: -  cpuarray - CPU memory where the vector elements are to be stored.
1097: -  viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

1099:    Output Parameter:
1100: .  V - the vector

1102:    Notes:
1103:    If both cpuarray and viennaclvec are provided, the caller must ensure that
1104:    the provided arrays have identical values.

1106:    PETSc does NOT free the provided arrays when the vector is destroyed via
1107:    VecDestroy(). The user should not free the array until the vector is
1108:    destroyed.

1110:    Level: intermediate

1112: .seealso: VecCreateMPIViennaCLWithArrays(), VecCreate(), VecCreateSeqWithArray(),
1113:           VecViennaCLPlaceArray(), VecPlaceArray(), VecCreateSeqCUDAWithArrays(),
1114:           VecViennaCLAllocateCheckHost()
1115: @*/
1116: PetscErrorCode  VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector* viennaclvec,Vec *V)
1117: {
1119:   PetscMPIInt    size;


1123:   MPI_Comm_size(comm,&size);
1124:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");

1126:   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1127:   VecCreateSeqViennaCLWithArray(comm,bs,n,viennaclvec,V);

1129:   if (cpuarray && viennaclvec) {
1130:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
1131:     s->array = (PetscScalar*)cpuarray;
1132:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1133:   } else if (cpuarray) {
1134:     Vec_Seq *s = (Vec_Seq*)((*V)->data);
1135:     s->array = (PetscScalar*)cpuarray;
1136:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1137:   } else if (viennaclvec) {
1138:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1139:   } else {
1140:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1141:   }

1143:   return(0);
1144: }

1146: /*@C
1147:    VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1148:    the one provided by the user. This is useful to avoid a copy.

1150:    Not Collective

1152:    Input Parameters:
1153: +  vec - the vector
1154: -  array - the ViennaCL vector

1156:    Notes:
1157:    You can return to the original viennacl vector with a call to
1158:    VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1159:    and VecPlaceArray() at the same time on the same vector.

1161:    Level: intermediate

1163: .seealso: VecPlaceArray(), VecSetValues(), VecViennaCLResetArray(),
1164:           VecCUDAPlaceArray(),

1166: @*/
1167: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin,const ViennaCLVector* a)
1168: {

1173:   VecViennaCLCopyToGPU(vin);
1174:   if (((Vec_Seq*)vin->data)->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1175:   ((Vec_Seq*)vin->data)->unplacedarray  = (PetscScalar *) ((Vec_ViennaCL*)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1176:   ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector*)a;
1177:   vin->offloadmask = PETSC_OFFLOAD_GPU;
1178:   PetscObjectStateIncrease((PetscObject)vin);
1179:   return(0);
1180: }

1182: /*@C
1183:    VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1184:    after the use of VecViennaCLPlaceArray().

1186:    Not Collective

1188:    Input Parameters:
1189: .  vec - the vector

1191:    Level: developer

1193: .seealso: VecViennaCLPlaceArray(), VecResetArray(), VecCUDAResetArray(), VecPlaceArray()
1194: @*/
1195: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1196: {

1201:   VecViennaCLCopyToGPU(vin);
1202:   ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector *) ((Vec_Seq*)vin->data)->unplacedarray;
1203:   ((Vec_Seq*)vin->data)->unplacedarray = 0;
1204:   vin->offloadmask = PETSC_OFFLOAD_GPU;
1205:   PetscObjectStateIncrease((PetscObject)vin);
1206:   return(0);
1207: }


1210: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1211:  *
1212:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1213:  */
1214: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1215: {
1216:   PetscErrorCode                         ierr;

1219:   VecDot_SeqViennaCL(s,t,dp);
1220:   VecNorm_SeqViennaCL(t,NORM_2,nm);
1221:   *nm *= *nm; //squared norm required
1222:   return(0);
1223: }

1225: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1226: {

1230:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1231:   PetscLayoutReference(win->map,&(*V)->map);
1232:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1233:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1234:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1235:   return(0);
1236: }

1238: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1239: {

1243:   try {
1244:     if (v->spptr) {
1245:       delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
1246:       delete (Vec_ViennaCL*) v->spptr;
1247:     }
1248:   } catch(char *ex) {
1249:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1250:   }
1251:   VecDestroy_SeqViennaCL_Private(v);
1252:   return(0);
1253: }

1255: PetscErrorCode VecGetArray_SeqViennaCL(Vec v,PetscScalar **a)
1256: {

1260:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1261:     VecViennaCLCopyFromGPU(v);
1262:   } else {
1263:     VecViennaCLAllocateCheckHost(v);
1264:   }
1265:   *a = *((PetscScalar**)v->data);
1266:   return(0);
1267: }

1269: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v,PetscScalar **a)
1270: {
1272:   v->offloadmask = PETSC_OFFLOAD_CPU;
1273:   return(0);
1274: }

1276: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v,PetscScalar **a)
1277: {

1281:   VecViennaCLAllocateCheckHost(v);
1282:   *a   = *((PetscScalar**)v->data);
1283:   return(0);
1284: }

1286: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1287: {

1291:   V->boundtocpu = flg;
1292:   if (flg) {
1293:     VecViennaCLCopyFromGPU(V);
1294:     V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1295:     V->ops->dot             = VecDot_Seq;
1296:     V->ops->norm            = VecNorm_Seq;
1297:     V->ops->tdot            = VecTDot_Seq;
1298:     V->ops->scale           = VecScale_Seq;
1299:     V->ops->copy            = VecCopy_Seq;
1300:     V->ops->set             = VecSet_Seq;
1301:     V->ops->swap            = VecSwap_Seq;
1302:     V->ops->axpy            = VecAXPY_Seq;
1303:     V->ops->axpby           = VecAXPBY_Seq;
1304:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1305:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1306:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1307:     V->ops->setrandom       = VecSetRandom_Seq;
1308:     V->ops->dot_local       = VecDot_Seq;
1309:     V->ops->tdot_local      = VecTDot_Seq;
1310:     V->ops->norm_local      = VecNorm_Seq;
1311:     V->ops->mdot_local      = VecMDot_Seq;
1312:     V->ops->mtdot_local     = VecMTDot_Seq;
1313:     V->ops->maxpy           = VecMAXPY_Seq;
1314:     V->ops->mdot            = VecMDot_Seq;
1315:     V->ops->mtdot           = VecMTDot_Seq;
1316:     V->ops->aypx            = VecAYPX_Seq;
1317:     V->ops->waxpy           = VecWAXPY_Seq;
1318:     V->ops->dotnorm2        = NULL;
1319:     V->ops->placearray      = VecPlaceArray_Seq;
1320:     V->ops->replacearray    = VecReplaceArray_Seq;
1321:     V->ops->resetarray      = VecResetArray_Seq;
1322:     V->ops->duplicate       = VecDuplicate_Seq;
1323:   } else {
1324:     V->ops->dot             = VecDot_SeqViennaCL;
1325:     V->ops->norm            = VecNorm_SeqViennaCL;
1326:     V->ops->tdot            = VecTDot_SeqViennaCL;
1327:     V->ops->scale           = VecScale_SeqViennaCL;
1328:     V->ops->copy            = VecCopy_SeqViennaCL;
1329:     V->ops->set             = VecSet_SeqViennaCL;
1330:     V->ops->swap            = VecSwap_SeqViennaCL;
1331:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1332:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1333:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1334:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1335:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1336:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1337:     V->ops->dot_local       = VecDot_SeqViennaCL;
1338:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1339:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1340:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1341:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1342:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1343:     V->ops->mdot            = VecMDot_SeqViennaCL;
1344:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1345:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1346:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1347:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1348:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1349:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1350:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1351:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1352:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1353:     V->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
1354:     V->ops->getarray        = VecGetArray_SeqViennaCL;
1355:     V->ops->restorearray    = VecRestoreArray_SeqViennaCL;
1356:   }
1357:   return(0);
1358: }

1360: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1361: {
1363:   PetscMPIInt    size;

1366:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1367:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1368:   VecCreate_Seq_Private(V,0);
1369:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);

1371:   VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1372:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1374:   VecViennaCLAllocateCheck(V);
1375:   VecViennaCLAllocateCheckHost(V);
1376:   VecSet(V,0.0);
1377:   VecSet_Seq(V,0.0);
1378:   V->offloadmask = PETSC_OFFLOAD_BOTH;
1379:   return(0);
1380: }

1382: /*@C
1383:   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.

1385:   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1386:   invoking clReleaseContext().


1389:   Input Parameters:
1390: .  v    - the vector

1392:   Output Parameters:
1393: .  ctx - pointer to the underlying CL context

1395:   Level: intermediate

1397: .seealso: VecViennaCLGetCLQueue(), VecViennaCLGetCLMemRead()
1398: @*/
1399: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T* ctx)
1400: {
1401: #if !defined(PETSC_HAVE_OPENCL)
1402:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_context.");
1403: #else


1409:   const ViennaCLVector *v_vcl;
1410:   VecViennaCLGetArrayRead(v, &v_vcl);
1411:   try{
1412:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1413:     const cl_context ocl_ctx = vcl_ctx.handle().get();
1414:     clRetainContext(ocl_ctx);
1415:     *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1416:   } catch (std::exception const & ex) {
1417:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1418:   }

1420:   return(0);
1421: #endif
1422: }

1424: /*@C
1425:   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1426:   operations of the Vec are enqueued.

1428:   Caller should cast (*queue) to (const cl_command_queue). Caller is
1429:   responsible for invoking clReleaseCommandQueue().

1431:   Input Parameters:
1432: .  v    - the vector

1434:   Output Parameters:
1435: .  ctx - pointer to the CL command queue

1437:   Level: intermediate

1439: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemRead()
1440: @*/
1441: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T* queue)
1442: {
1443: #if !defined(PETSC_HAVE_OPENCL)
1444:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1445: #else

1450:   const ViennaCLVector *v_vcl;
1451:   VecViennaCLGetArrayRead(v, &v_vcl);
1452:   try{
1453:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1454:     const viennacl::ocl::command_queue& vcl_queue = vcl_ctx.current_queue();
1455:     const cl_command_queue ocl_queue = vcl_queue.handle().get();
1456:     clRetainCommandQueue(ocl_queue);
1457:     *queue = (PETSC_UINTPTR_T)(ocl_queue);
1458:   } catch (std::exception const & ex) {
1459:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1460:   }

1462:   return(0);
1463: #endif
1464: }

1466: /*@C
1467:   VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.

1469:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1470:   invoking clReleaseMemObject().

1472:   Input Parameters:
1473: .  v    - the vector

1475:   Output Parameters:
1476: .  mem - pointer to the device buffer

1478:   Level: intermediate

1480: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1481: @*/
1482: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T* mem)
1483: {
1484: #if !defined(PETSC_HAVE_OPENCL)
1485:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1486: #else

1491:   const ViennaCLVector *v_vcl;
1492:   VecViennaCLGetArrayRead(v, &v_vcl);
1493:   try{
1494:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1495:     clRetainMemObject(ocl_mem);
1496:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1497:   } catch (std::exception const & ex) {
1498:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1499:   }
1500:   return(0);
1501: #endif
1502: }

1504: /*@C
1505:   VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.

1507:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1508:   invoking clReleaseMemObject().

1510:   The device pointer has to be released by calling
1511:   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1512:   the host will be marked as out of date.  A subsequent access of the host data
1513:   will thus incur a data transfer from the device to the host.

1515:   Input Parameters:
1516: .  v    - the vector

1518:   Output Parameters:
1519: .  mem - pointer to the device buffer

1521:   Level: intermediate

1523: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMemWrite()
1524: @*/
1525: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T* mem)
1526: {
1527: #if !defined(PETSC_HAVE_OPENCL)
1528:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1529: #else

1534:   ViennaCLVector *v_vcl;
1535:   VecViennaCLGetArrayWrite(v, &v_vcl);
1536:   try{
1537:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1538:     clRetainMemObject(ocl_mem);
1539:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1540:   } catch (std::exception const & ex) {
1541:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1542:   }

1544:   return(0);
1545: #endif
1546: }

1548: /*@C
1549:   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1550:   acquired with VecViennaCLGetCLMemWrite().

1552:    This marks the host data as out of date.  Subsequent access to the
1553:    vector data on the host side with for instance VecGetArray() incurs a
1554:    data transfer.

1556:   Input Parameters:
1557: .  v    - the vector

1559:   Level: intermediate

1561: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1562: @*/
1563: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1564: {
1565: #if !defined(PETSC_HAVE_OPENCL)
1566:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1567: #else

1572:   VecViennaCLRestoreArrayWrite(v, PETSC_NULL);

1574:   return(0);
1575: #endif
1576: }


1579: /*@C
1580:   VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.

1582:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1583:   invoking clReleaseMemObject().

1585:   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1586:   Upon restoring the vector data the data on the host will be marked as out of
1587:   date.  A subsequent access of the host data will thus incur a data transfer
1588:   from the device to the host.

1590:   Input Parameters:
1591: .  v    - the vector

1593:   Output Parameters:
1594: .  mem - pointer to the device buffer

1596:   Level: intermediate

1598: .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMem()
1599: @*/
1600: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T* mem)
1601: {
1602: #if !defined(PETSC_HAVE_OPENCL)
1603:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1604: #else

1609:   ViennaCLVector *v_vcl;
1610:   VecViennaCLGetArray(v, &v_vcl);
1611:   try{
1612:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1613:     clRetainMemObject(ocl_mem);
1614:     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1615:   } catch (std::exception const & ex) {
1616:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1617:   }

1619:   return(0);
1620: #endif
1621: }

1623: /*@C
1624:   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1625:   acquired with VecViennaCLGetCLMem().

1627:    This marks the host data as out of date. Subsequent access to the vector
1628:    data on the host side with for instance VecGetArray() incurs a data
1629:    transfer.

1631:   Input Parameters:
1632: .  v    - the vector

1634:   Level: intermediate

1636: .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMem()
1637: @*/
1638: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1639: {
1640: #if !defined(PETSC_HAVE_OPENCL)
1641:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1642: #else

1647:   VecViennaCLRestoreArray(v, PETSC_NULL);

1649:   return(0);
1650: #endif
1651: }

1653: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector *array)
1654: {
1656:   Vec_ViennaCL   *vecviennacl;
1657:   PetscMPIInt    size;

1660:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1661:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1662:   VecCreate_Seq_Private(V,0);
1663:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);
1664:   VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1665:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1667:   if (array) {
1668:     if (!V->spptr)
1669:       V->spptr = new Vec_ViennaCL;
1670:     vecviennacl = (Vec_ViennaCL*)V->spptr;
1671:     vecviennacl->GPUarray_allocated = 0;
1672:     vecviennacl->GPUarray           = (ViennaCLVector*)array;
1673:     V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1674:   }

1676:   return(0);
1677: }