Actual source code: vecviennacl.cxx

petsc-3.11.3 2019-06-26
Report Typos and Errors
  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6:  #include <petsc/private/vecimpl.h>
  7:  #include <../src/vec/vec/impls/dvecimpl.h>
  8:  #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include <vector>

 12: #include "viennacl/linalg/inner_prod.hpp"
 13: #include "viennacl/linalg/norm_1.hpp"
 14: #include "viennacl/linalg/norm_2.hpp"
 15: #include "viennacl/linalg/norm_inf.hpp"

 17: #ifdef VIENNACL_WITH_OPENCL
 18: #include "viennacl/ocl/backend.hpp"
 19: #endif


 22: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayReadWrite(Vec v, ViennaCLVector **a)
 23: {

 28:   *a   = 0;
 29:   VecViennaCLCopyToGPU(v);
 30:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 31:   ViennaCLWaitForGPU();
 32:   return(0);
 33: }

 35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayReadWrite(Vec v, ViennaCLVector **a)
 36: {

 41:   v->valid_GPU_array = PETSC_OFFLOAD_GPU;

 43:   PetscObjectStateIncrease((PetscObject)v);
 44:   return(0);
 45: }

 47: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 48: {

 53:   *a   = 0;
 54:   VecViennaCLCopyToGPU(v);
 55:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 56:   ViennaCLWaitForGPU();
 57:   return(0);
 58: }

 60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 61: {
 64:   return(0);
 65: }

 67: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 68: {

 73:   *a   = 0;
 74:   VecViennaCLAllocateCheck(v);
 75:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 76:   ViennaCLWaitForGPU();
 77:   return(0);
 78: }

 80: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 81: {

 86:   v->valid_GPU_array = PETSC_OFFLOAD_GPU;

 88:   PetscObjectStateIncrease((PetscObject)v);
 89:   return(0);
 90: }



 94: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 95: {
 96:   PetscErrorCode       ierr;
 97:   char                 string[20];
 98:   PetscBool            flg,flg_cuda,flg_opencl,flg_openmp;

101:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102:   PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,12,&flg);
103:   if (flg) {
104:     try {
105:       PetscStrcasecmp(string,"cuda",&flg_cuda);
106:       PetscStrcasecmp(string,"opencl",&flg_opencl);
107:       PetscStrcasecmp(string,"openmp",&flg_openmp);

109:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111: #if defined(PETSC_HAVE_CUDA)
112:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113: #endif
114: #if defined(PETSC_HAVE_OPENCL)
115:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116: #endif
117:       else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118:     } catch (std::exception const & ex) {
119:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120:     }
121:   }

123: #if defined(PETSC_HAVE_OPENCL)
124:   /* ViennaCL OpenCL device type configuration */
125:   PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,12,&flg);
126:   if (flg) {
127:     try {
128:       PetscStrcasecmp(string,"cpu",&flg);
129:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

131:       PetscStrcasecmp(string,"gpu",&flg);
132:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

134:       PetscStrcasecmp(string,"accelerator",&flg);
135:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136:     } catch (std::exception const & ex) {
137:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138:     }
139:   }
140: #endif

142:   /* Print available backends */
143:   PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
144:   if (flg) {
145:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
146: #if defined(PETSC_HAVE_CUDA)
147:     PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
148: #endif
149: #if defined(PETSC_HAVE_OPENCL)
150:     PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
151: #endif
152: #if defined(PETSC_HAVE_OPENMP)
153:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
154: #else
155:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
156: #endif
157:     PetscPrintf(PETSC_COMM_WORLD, "\n");

159:     /* Print selected backends */
160:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: ");
161: #if defined(PETSC_HAVE_CUDA)
162:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163:       PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
164:     }
165: #endif
166: #if defined(PETSC_HAVE_OPENCL)
167:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168:       PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
169:     }
170: #endif
171: #if defined(PETSC_HAVE_OPENMP)
172:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
174:     }
175: #else
176:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
178:     }
179: #endif
180:     PetscPrintf(PETSC_COMM_WORLD, "\n");
181:   }
182:   return(0);
183: }

185: /*
186:     Allocates space for the vector array on the Host if it does not exist.
187:     Does NOT change the PetscViennaCLFlag for the vector
188:     Does NOT zero the ViennaCL array
189:  */
190: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191: {
193:   PetscScalar    *array;
194:   Vec_Seq        *s;
195:   PetscInt       n = v->map->n;

198:   s    = (Vec_Seq*)v->data;
199:   VecViennaCLAllocateCheck(v);
200:   if (s->array == 0) {
201:     PetscMalloc1(n,&array);
202:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
203:     s->array           = array;
204:     s->array_allocated = array;
205:   }
206:   return(0);
207: }


210: /*
211:     Allocates space for the vector array on the GPU if it does not exist.
212:     Does NOT change the PetscViennaCLFlag for the vector
213:     Does NOT zero the ViennaCL array

215:  */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219:   int            rank;

222:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
223:   // First allocate memory on the GPU if needed
224:   if (!v->spptr) {
225:     try {
226:       v->spptr                            = new Vec_ViennaCL;
227:       ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);

229:     } catch(std::exception const & ex) {
230:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
231:     }
232:   }
233:   return(0);
234: }


237: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
238: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
239: {

244:   VecViennaCLAllocateCheck(v);
245:   if (v->map->n > 0) {
246:     if (v->valid_GPU_array == PETSC_OFFLOAD_CPU) {
247:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
248:       try {
249:         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
250:         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
251:         ViennaCLWaitForGPU();
252:       } catch(std::exception const & ex) {
253:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
254:       }
255:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
256:       v->valid_GPU_array = PETSC_OFFLOAD_BOTH;
257:     }
258:   }
259:   return(0);
260: }



264: /*
265:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
266: */
267: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
268: {

273:   VecViennaCLAllocateCheckHost(v);
274:   if (v->valid_GPU_array == PETSC_OFFLOAD_GPU) {
275:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
276:     try {
277:       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
278:       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
279:       ViennaCLWaitForGPU();
280:     } catch(std::exception const & ex) {
281:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
282:     }
283:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
284:     v->valid_GPU_array = PETSC_OFFLOAD_BOTH;
285:   }
286:   return(0);
287: }


290: /* Copy on CPU */
291: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
292: {
293:   PetscScalar       *ya;
294:   const PetscScalar *xa;
295:   PetscErrorCode    ierr;

298:   VecViennaCLAllocateCheckHost(xin);
299:   VecViennaCLAllocateCheckHost(yin);
300:   if (xin != yin) {
301:     VecGetArrayRead(xin,&xa);
302:     VecGetArray(yin,&ya);
303:     PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
304:     VecRestoreArrayRead(xin,&xa);
305:     VecRestoreArray(yin,&ya);
306:   }
307:   return(0);
308: }

310: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
311: {
313:   PetscInt       n = xin->map->n,i;
314:   PetscScalar    *xx;

317:   VecGetArray(xin,&xx);
318:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
319:   VecRestoreArray(xin,&xx);
320:   return(0);
321: }

323: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
324: {
325:   Vec_Seq        *vs = (Vec_Seq*)v->data;

329:   PetscObjectSAWsViewOff(v);
330: #if defined(PETSC_USE_LOG)
331:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
332: #endif
333:   if (vs->array_allocated) PetscFree(vs->array_allocated);
334:   PetscFree(vs);
335:   return(0);
336: }

338: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
339: {
340:   Vec_Seq *v = (Vec_Seq*)vin->data;

343:   v->array         = v->unplacedarray;
344:   v->unplacedarray = 0;
345:   return(0);
346: }


349: /*MC
350:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

352:    Options Database Keys:
353: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

355:   Level: beginner

357: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
358: M*/


361: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
362: {
363:   const ViennaCLVector  *xgpu;
364:   ViennaCLVector        *ygpu;
365:   PetscErrorCode        ierr;

368:   VecViennaCLGetArrayRead(xin,&xgpu);
369:   VecViennaCLGetArrayReadWrite(yin,&ygpu);
370:   try {
371:     if (alpha != 0.0 && xin->map->n > 0) {
372:       *ygpu = *xgpu + alpha * *ygpu;
373:       PetscLogFlops(2.0*yin->map->n);
374:     } else {
375:       *ygpu = *xgpu;
376:     }
377:     ViennaCLWaitForGPU();
378:   } catch(std::exception const & ex) {
379:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
380:   }
381:   VecViennaCLRestoreArrayRead(xin,&xgpu);
382:   VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
383:   return(0);
384: }


387: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
388: {
389:   const ViennaCLVector  *xgpu;
390:   ViennaCLVector        *ygpu;
391:   PetscErrorCode        ierr;

394:   if (alpha != 0.0 && xin->map->n > 0) {
395:     VecViennaCLGetArrayRead(xin,&xgpu);
396:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
397:     try {
398:       *ygpu += alpha * *xgpu;
399:       ViennaCLWaitForGPU();
400:     } catch(std::exception const & ex) {
401:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
402:     }
403:     VecViennaCLRestoreArrayRead(xin,&xgpu);
404:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
405:     PetscLogFlops(2.0*yin->map->n);
406:   }
407:   return(0);
408: }


411: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
412: {
413:   const ViennaCLVector  *xgpu,*ygpu;
414:   ViennaCLVector        *wgpu;
415:   PetscErrorCode        ierr;

418:   if (xin->map->n > 0) {
419:     VecViennaCLGetArrayRead(xin,&xgpu);
420:     VecViennaCLGetArrayRead(yin,&ygpu);
421:     VecViennaCLGetArrayWrite(win,&wgpu);
422:     try {
423:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
424:       ViennaCLWaitForGPU();
425:     } catch(std::exception const & ex) {
426:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
427:     }
428:     PetscLogFlops(win->map->n);
429:     VecViennaCLRestoreArrayRead(xin,&xgpu);
430:     VecViennaCLRestoreArrayRead(yin,&ygpu);
431:     VecViennaCLRestoreArrayWrite(win,&wgpu);
432:   }
433:   return(0);
434: }


437: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
438: {
439:   const ViennaCLVector  *xgpu,*ygpu;
440:   ViennaCLVector        *wgpu;
441:   PetscErrorCode        ierr;

444:   if (alpha == 0.0 && xin->map->n > 0) {
445:     VecCopy_SeqViennaCL(yin,win);
446:   } else {
447:     VecViennaCLGetArrayRead(xin,&xgpu);
448:     VecViennaCLGetArrayRead(yin,&ygpu);
449:     VecViennaCLGetArrayWrite(win,&wgpu);
450:     if (alpha == 1.0) {
451:       try {
452:         *wgpu = *ygpu + *xgpu;
453:       } catch(std::exception const & ex) {
454:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
455:       }
456:       PetscLogFlops(win->map->n);
457:     } else if (alpha == -1.0) {
458:       try {
459:         *wgpu = *ygpu - *xgpu;
460:       } catch(std::exception const & ex) {
461:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
462:       }
463:       PetscLogFlops(win->map->n);
464:     } else {
465:       try {
466:         *wgpu = *ygpu + alpha * *xgpu;
467:       } catch(std::exception const & ex) {
468:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
469:       }
470:       PetscLogFlops(2*win->map->n);
471:     }
472:     ViennaCLWaitForGPU();
473:     VecViennaCLRestoreArrayRead(xin,&xgpu);
474:     VecViennaCLRestoreArrayRead(yin,&ygpu);
475:     VecViennaCLRestoreArrayWrite(win,&wgpu);
476:   }
477:   return(0);
478: }


481: /*
482:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
483:  *
484:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
485:  * hence there is an iterated application of these until the final result is obtained
486:  */
487: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
488: {
490:   PetscInt       j;

493:   for (j = 0; j < nv; ++j) {
494:     if (j+1 < nv) {
495:       VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
496:       ++j;
497:     } else {
498:       VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
499:     }
500:   }
501:   ViennaCLWaitForGPU();
502:   return(0);
503: }


506: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
507: {
508:   const ViennaCLVector  *xgpu,*ygpu;
509:   PetscErrorCode        ierr;

512:   if (xin->map->n > 0) {
513:     VecViennaCLGetArrayRead(xin,&xgpu);
514:     VecViennaCLGetArrayRead(yin,&ygpu);
515:     try {
516:       *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
517:     } catch(std::exception const & ex) {
518:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
519:     }
520:     if (xin->map->n >0) {
521:       PetscLogFlops(2.0*xin->map->n-1);
522:     }
523:     ViennaCLWaitForGPU();
524:     VecViennaCLRestoreArrayRead(xin,&xgpu);
525:     VecViennaCLRestoreArrayRead(yin,&ygpu);
526:   } else *z = 0.0;
527:   return(0);
528: }



532: /*
533:  * Operation z[j] = dot(x, y[j])
534:  *
535:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
536:  */
537: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
538: {
539:   PetscErrorCode       ierr;
540:   PetscInt             n = xin->map->n,i;
541:   const ViennaCLVector *xgpu,*ygpu;
542:   Vec                  *yyin = (Vec*)yin;
543:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

546:   if (xin->map->n > 0) {
547:     VecViennaCLGetArrayRead(xin,&xgpu);
548:     for (i=0; i<nv; i++) {
549:       VecViennaCLGetArrayRead(yyin[i],&ygpu);
550:       ygpu_array[i] = ygpu;
551:     }

553:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
554:     ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);

556:     viennacl::copy(result.begin(), result.end(), z);

558:     for (i=0; i<nv; i++) {
559:       VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
560:     }

562:     ViennaCLWaitForGPU();
563:     VecViennaCLRestoreArrayRead(xin,&xgpu);
564:     PetscLogFlops(PetscMax(nv*(2.0*n-1),0.0));
565:   } else {
566:     for (i=0; i<nv; i++) z[i] = 0.0;
567:   }
568:   return(0);
569: }

571: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
572: {

576:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
577:   VecMDot_SeqViennaCL(xin,nv,yin,z);
578:   ViennaCLWaitForGPU();
579:   return(0);
580: }


583: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
584: {
585:   ViennaCLVector *xgpu;

589:   if (xin->map->n > 0) {
590:     VecViennaCLGetArrayWrite(xin,&xgpu);
591:     try {
592:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
593:       ViennaCLWaitForGPU();
594:     } catch(std::exception const & ex) {
595:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
596:     }
597:     VecViennaCLRestoreArrayWrite(xin,&xgpu);
598:   }
599:   return(0);
600: }

602: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
603: {
604:   ViennaCLVector *xgpu;

608:   if (alpha == 0.0 && xin->map->n > 0) {
609:     VecSet_SeqViennaCL(xin,alpha);
610:     PetscLogFlops(xin->map->n);
611:   } else if (alpha != 1.0 && xin->map->n > 0) {
612:     VecViennaCLGetArrayReadWrite(xin,&xgpu);
613:     try {
614:       *xgpu *= alpha;
615:       ViennaCLWaitForGPU();
616:     } catch(std::exception const & ex) {
617:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
618:     }
619:     VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
620:     PetscLogFlops(xin->map->n);
621:   }
622:   return(0);
623: }


626: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
627: {

631:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
632:   VecDot_SeqViennaCL(xin, yin, z);
633:   ViennaCLWaitForGPU();
634:   return(0);
635: }


638: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
639: {
640:   const ViennaCLVector *xgpu;
641:   ViennaCLVector       *ygpu;
642:   PetscErrorCode       ierr;

645:   if (xin != yin && xin->map->n > 0) {
646:     if (xin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
647:       VecViennaCLGetArrayRead(xin,&xgpu);
648:       VecViennaCLGetArrayWrite(yin,&ygpu);
649:       try {
650:         *ygpu = *xgpu;
651:         ViennaCLWaitForGPU();
652:       } catch(std::exception const & ex) {
653:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
654:       }
655:       VecViennaCLRestoreArrayRead(xin,&xgpu);
656:       VecViennaCLRestoreArrayWrite(yin,&ygpu);

658:     } else if (xin->valid_GPU_array == PETSC_OFFLOAD_CPU) {
659:       /* copy in CPU if we are on the CPU*/
660:       VecCopy_SeqViennaCL_Private(xin,yin);
661:       ViennaCLWaitForGPU();
662:     } else if (xin->valid_GPU_array == PETSC_OFFLOAD_BOTH) {
663:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
664:       if (yin->valid_GPU_array == PETSC_OFFLOAD_CPU) {
665:         /* copy in CPU */
666:         VecCopy_SeqViennaCL_Private(xin,yin);
667:         ViennaCLWaitForGPU();
668:       } else if (yin->valid_GPU_array == PETSC_OFFLOAD_GPU) {
669:         /* copy in GPU */
670:         VecViennaCLGetArrayRead(xin,&xgpu);
671:         VecViennaCLGetArrayWrite(yin,&ygpu);
672:         try {
673:           *ygpu = *xgpu;
674:           ViennaCLWaitForGPU();
675:         } catch(std::exception const & ex) {
676:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
677:         }
678:         VecViennaCLRestoreArrayRead(xin,&xgpu);
679:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
680:       } else if (yin->valid_GPU_array == PETSC_OFFLOAD_BOTH) {
681:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
682:            default to copy in GPU (this is an arbitrary choice) */
683:         VecViennaCLGetArrayRead(xin,&xgpu);
684:         VecViennaCLGetArrayWrite(yin,&ygpu);
685:         try {
686:           *ygpu = *xgpu;
687:           ViennaCLWaitForGPU();
688:         } catch(std::exception const & ex) {
689:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
690:         }
691:         VecViennaCLRestoreArrayRead(xin,&xgpu);
692:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
693:       } else {
694:         VecCopy_SeqViennaCL_Private(xin,yin);
695:         ViennaCLWaitForGPU();
696:       }
697:     }
698:   }
699:   return(0);
700: }


703: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
704: {
706:   ViennaCLVector *xgpu,*ygpu;

709:   if (xin != yin && xin->map->n > 0) {
710:     VecViennaCLGetArrayReadWrite(xin,&xgpu);
711:     VecViennaCLGetArrayReadWrite(yin,&ygpu);

713:     try {
714:       viennacl::swap(*xgpu, *ygpu);
715:       ViennaCLWaitForGPU();
716:     } catch(std::exception const & ex) {
717:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
718:     }
719:     VecViennaCLRestoreArrayReadWrite(xin,&xgpu);
720:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
721:   }
722:   return(0);
723: }


726: // y = alpha * x + beta * y
727: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
728: {
729:   PetscErrorCode       ierr;
730:   PetscScalar          a = alpha,b = beta;
731:   const ViennaCLVector *xgpu;
732:   ViennaCLVector       *ygpu;

735:   if (a == 0.0 && xin->map->n > 0) {
736:     VecScale_SeqViennaCL(yin,beta);
737:   } else if (b == 1.0 && xin->map->n > 0) {
738:     VecAXPY_SeqViennaCL(yin,alpha,xin);
739:   } else if (a == 1.0 && xin->map->n > 0) {
740:     VecAYPX_SeqViennaCL(yin,beta,xin);
741:   } else if (b == 0.0 && xin->map->n > 0) {
742:     VecViennaCLGetArrayRead(xin,&xgpu);
743:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
744:     try {
745:       *ygpu = *xgpu * alpha;
746:       ViennaCLWaitForGPU();
747:     } catch(std::exception const & ex) {
748:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
749:     }
750:     PetscLogFlops(xin->map->n);
751:     VecViennaCLRestoreArrayRead(xin,&xgpu);
752:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
753:   } else if (xin->map->n > 0) {
754:     VecViennaCLGetArrayRead(xin,&xgpu);
755:     VecViennaCLGetArrayReadWrite(yin,&ygpu);
756:     try {
757:       *ygpu = *xgpu * alpha + *ygpu * beta;
758:       ViennaCLWaitForGPU();
759:     } catch(std::exception const & ex) {
760:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
761:     }
762:     VecViennaCLRestoreArrayRead(xin,&xgpu);
763:     VecViennaCLRestoreArrayReadWrite(yin,&ygpu);
764:     PetscLogFlops(3.0*xin->map->n);
765:   }
766:   return(0);
767: }


770: /* operation  z = alpha * x + beta *y + gamma *z*/
771: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
772: {
773:   PetscErrorCode       ierr;
774:   PetscInt             n = zin->map->n;
775:   const ViennaCLVector *xgpu,*ygpu;
776:   ViennaCLVector       *zgpu;

779:   VecViennaCLGetArrayRead(xin,&xgpu);
780:   VecViennaCLGetArrayRead(yin,&ygpu);
781:   VecViennaCLGetArrayReadWrite(zin,&zgpu);
782:   if (alpha == 0.0 && xin->map->n > 0) {
783:     try {
784:       if (beta == 0.0) {
785:         *zgpu = gamma * *zgpu;
786:         ViennaCLWaitForGPU();
787:         PetscLogFlops(1.0*n);
788:       } else if (gamma == 0.0) {
789:         *zgpu = beta * *ygpu;
790:         ViennaCLWaitForGPU();
791:         PetscLogFlops(1.0*n);
792:       } else {
793:         *zgpu = beta * *ygpu + gamma * *zgpu;
794:         ViennaCLWaitForGPU();
795:         PetscLogFlops(3.0*n);
796:       }
797:     } catch(std::exception const & ex) {
798:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
799:     }
800:     PetscLogFlops(3.0*n);
801:   } else if (beta == 0.0 && xin->map->n > 0) {
802:     try {
803:       if (gamma == 0.0) {
804:         *zgpu = alpha * *xgpu;
805:         ViennaCLWaitForGPU();
806:         PetscLogFlops(1.0*n);
807:       } else {
808:         *zgpu = alpha * *xgpu + gamma * *zgpu;
809:         ViennaCLWaitForGPU();
810:         PetscLogFlops(3.0*n);
811:       }
812:     } catch(std::exception const & ex) {
813:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
814:     }
815:   } else if (gamma == 0.0 && xin->map->n > 0) {
816:     try {
817:       *zgpu = alpha * *xgpu + beta * *ygpu;
818:       ViennaCLWaitForGPU();
819:     } catch(std::exception const & ex) {
820:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
821:     }
822:     PetscLogFlops(3.0*n);
823:   } else if (xin->map->n > 0) {
824:     try {
825:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
826:       if (gamma != 1.0)
827:         *zgpu *= gamma;
828:       *zgpu += alpha * *xgpu + beta * *ygpu;
829:       ViennaCLWaitForGPU();
830:     } catch(std::exception const & ex) {
831:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
832:     }
833:     VecViennaCLRestoreArrayReadWrite(zin,&zgpu);
834:     VecViennaCLRestoreArrayRead(xin,&xgpu);
835:     VecViennaCLRestoreArrayRead(yin,&ygpu);
836:     PetscLogFlops(5.0*n);
837:   }
838:   return(0);
839: }

841: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
842: {
843:   PetscErrorCode       ierr;
844:   PetscInt             n = win->map->n;
845:   const ViennaCLVector *xgpu,*ygpu;
846:   ViennaCLVector       *wgpu;

849:   if (xin->map->n > 0) {
850:     VecViennaCLGetArrayRead(xin,&xgpu);
851:     VecViennaCLGetArrayRead(yin,&ygpu);
852:     VecViennaCLGetArrayReadWrite(win,&wgpu);
853:     try {
854:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
855:       ViennaCLWaitForGPU();
856:     } catch(std::exception const & ex) {
857:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
858:     }
859:     VecViennaCLRestoreArrayRead(xin,&xgpu);
860:     VecViennaCLRestoreArrayRead(yin,&ygpu);
861:     VecViennaCLRestoreArrayReadWrite(win,&wgpu);
862:     PetscLogFlops(n);
863:   }
864:   return(0);
865: }


868: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
869: {
870:   PetscErrorCode       ierr;
871:   PetscInt             n = xin->map->n;
872:   PetscBLASInt         bn;
873:   const ViennaCLVector *xgpu;

876:   if (xin->map->n > 0) {
877:     PetscBLASIntCast(n,&bn);
878:     VecViennaCLGetArrayRead(xin,&xgpu);
879:     if (type == NORM_2 || type == NORM_FROBENIUS) {
880:       try {
881:         *z = viennacl::linalg::norm_2(*xgpu);
882:         ViennaCLWaitForGPU();
883:       } catch(std::exception const & ex) {
884:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
885:       }
886:       PetscLogFlops(PetscMax(2.0*n-1,0.0));
887:     } else if (type == NORM_INFINITY) {
888:       VecViennaCLGetArrayRead(xin,&xgpu);
889:       try {
890:         *z = viennacl::linalg::norm_inf(*xgpu);
891:         ViennaCLWaitForGPU();
892:       } catch(std::exception const & ex) {
893:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
894:       }
895:       VecViennaCLRestoreArrayRead(xin,&xgpu);
896:     } else if (type == NORM_1) {
897:       try {
898:         *z = viennacl::linalg::norm_1(*xgpu);
899:         ViennaCLWaitForGPU();
900:       } catch(std::exception const & ex) {
901:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
902:       }
903:       PetscLogFlops(PetscMax(n-1.0,0.0));
904:     } else if (type == NORM_1_AND_2) {
905:       try {
906:         *z     = viennacl::linalg::norm_1(*xgpu);
907:         *(z+1) = viennacl::linalg::norm_2(*xgpu);
908:         ViennaCLWaitForGPU();
909:       } catch(std::exception const & ex) {
910:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
911:       }
912:       PetscLogFlops(PetscMax(2.0*n-1,0.0));
913:       PetscLogFlops(PetscMax(n-1.0,0.0));
914:     }
915:     VecViennaCLRestoreArrayRead(xin,&xgpu);
916:   } else if (type == NORM_1_AND_2) {
917:     *z      = 0.0;
918:     *(z+1)  = 0.0;
919:   } else *z = 0.0;
920:   return(0);
921: }


924: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
925: {

929:   VecSetRandom_SeqViennaCL_Private(xin,r);
930:   xin->valid_GPU_array = PETSC_OFFLOAD_CPU;
931:   return(0);
932: }

934: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
935: {

940:   VecViennaCLCopyFromGPU(vin);
941:   VecResetArray_SeqViennaCL_Private(vin);
942:   vin->valid_GPU_array = PETSC_OFFLOAD_CPU;
943:   return(0);
944: }

946: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
947: {

952:   VecViennaCLCopyFromGPU(vin);
953:   VecPlaceArray_Seq(vin,a);
954:   vin->valid_GPU_array = PETSC_OFFLOAD_CPU;
955:   return(0);
956: }

958: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
959: {

964:   VecViennaCLCopyFromGPU(vin);
965:   VecReplaceArray_Seq(vin,a);
966:   vin->valid_GPU_array = PETSC_OFFLOAD_CPU;
967:   return(0);
968: }


971: /*@
972:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

974:    Collective on MPI_Comm

976:    Input Parameter:
977: +  comm - the communicator, should be PETSC_COMM_SELF
978: -  n - the vector length

980:    Output Parameter:
981: .  V - the vector

983:    Notes:
984:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
985:    same type as an existing vector.

987:    Level: intermediate

989:    Concepts: vectors^creating sequential

991: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
992: @*/
993: PetscErrorCode  VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
994: {

998:   VecCreate(comm,v);
999:   VecSetSizes(*v,n,n);
1000:   VecSetType(*v,VECSEQVIENNACL);
1001:   return(0);
1002: }


1005: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1006:  *
1007:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1008:  */
1009: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1010: {
1011:   PetscErrorCode                         ierr;

1014:   VecDot_SeqViennaCL(s,t,dp);
1015:   VecNorm_SeqViennaCL(t,NORM_2,nm);
1016:   *nm *= *nm; //squared norm required
1017:   return(0);
1018: }

1020: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1021: {

1025:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1026:   PetscLayoutReference(win->map,&(*V)->map);
1027:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1028:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1029:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1030:   return(0);
1031: }

1033: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1034: {

1038:   try {
1039:     if (v->spptr) {
1040:       delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1041:       delete (Vec_ViennaCL*) v->spptr;
1042:     }
1043:   } catch(char *ex) {
1044:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1045:   }
1046:   VecDestroy_SeqViennaCL_Private(v);
1047:   return(0);
1048: }


1051: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1052: {
1054:   PetscMPIInt    size;

1057:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1058:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1059:   VecCreate_Seq_Private(V,0);
1060:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);

1062:   V->ops->dot             = VecDot_SeqViennaCL;
1063:   V->ops->norm            = VecNorm_SeqViennaCL;
1064:   V->ops->tdot            = VecTDot_SeqViennaCL;
1065:   V->ops->scale           = VecScale_SeqViennaCL;
1066:   V->ops->copy            = VecCopy_SeqViennaCL;
1067:   V->ops->set             = VecSet_SeqViennaCL;
1068:   V->ops->swap            = VecSwap_SeqViennaCL;
1069:   V->ops->axpy            = VecAXPY_SeqViennaCL;
1070:   V->ops->axpby           = VecAXPBY_SeqViennaCL;
1071:   V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1072:   V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1073:   V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1074:   V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1075:   V->ops->dot_local       = VecDot_SeqViennaCL;
1076:   V->ops->tdot_local      = VecTDot_SeqViennaCL;
1077:   V->ops->norm_local      = VecNorm_SeqViennaCL;
1078:   V->ops->mdot_local      = VecMDot_SeqViennaCL;
1079:   V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1080:   V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1081:   V->ops->mdot            = VecMDot_SeqViennaCL;
1082:   V->ops->mtdot           = VecMTDot_SeqViennaCL;
1083:   V->ops->aypx            = VecAYPX_SeqViennaCL;
1084:   V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1085:   V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1086:   V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1087:   V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1088:   V->ops->resetarray      = VecResetArray_SeqViennaCL;
1089:   V->ops->destroy         = VecDestroy_SeqViennaCL;
1090:   V->ops->duplicate       = VecDuplicate_SeqViennaCL;

1092:   VecViennaCLAllocateCheck(V);
1093:   V->valid_GPU_array      = PETSC_OFFLOAD_GPU;
1094:   VecSet(V,0.0);
1095:   return(0);
1096: }