Actual source code: vecviennacl.cxx

petsc-master 2020-08-09
Report Typos and Errors
  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6:  #include <petsc/private/vecimpl.h>
  7:  #include <../src/vec/vec/impls/dvecimpl.h>
  8:  #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include <vector>

 12: #include "viennacl/linalg/inner_prod.hpp"
 13: #include "viennacl/linalg/norm_1.hpp"
 14: #include "viennacl/linalg/norm_2.hpp"
 15: #include "viennacl/linalg/norm_inf.hpp"

 17: #ifdef VIENNACL_WITH_OPENCL
 18: #include "viennacl/ocl/backend.hpp"
 19: #endif


 22: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 23: {

 28:   *a   = 0;
 29:   VecViennaCLCopyToGPU(v);
 30:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 31:   ViennaCLWaitForGPU();
 32:   return(0);
 33: }

 35: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 36: {

 41:   v->offloadmask = PETSC_OFFLOAD_GPU;

 43:   PetscObjectStateIncrease((PetscObject)v);
 44:   return(0);
 45: }

 47: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 48: {

 53:   *a   = 0;
 54:   VecViennaCLCopyToGPU(v);
 55:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 56:   ViennaCLWaitForGPU();
 57:   return(0);
 58: }

 60: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 61: {
 64:   return(0);
 65: }

 67: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 68: {

 73:   *a   = 0;
 74:   VecViennaCLAllocateCheck(v);
 75:   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
 76:   ViennaCLWaitForGPU();
 77:   return(0);
 78: }

 80: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 81: {

 86:   v->offloadmask = PETSC_OFFLOAD_GPU;

 88:   PetscObjectStateIncrease((PetscObject)v);
 89:   return(0);
 90: }



 94: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 95: {
 96:   PetscErrorCode       ierr;
 97:   char                 string[20];
 98:   PetscBool            flg,flg_cuda,flg_opencl,flg_openmp;

101:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102:   PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);
103:   if (flg) {
104:     try {
105:       PetscStrcasecmp(string,"cuda",&flg_cuda);
106:       PetscStrcasecmp(string,"opencl",&flg_opencl);
107:       PetscStrcasecmp(string,"openmp",&flg_openmp);

109:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111: #if defined(PETSC_HAVE_CUDA)
112:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113: #endif
114: #if defined(PETSC_HAVE_OPENCL)
115:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116: #endif
117:       else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118:     } catch (std::exception const & ex) {
119:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120:     }
121:   }

123: #if defined(PETSC_HAVE_OPENCL)
124:   /* ViennaCL OpenCL device type configuration */
125:   PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);
126:   if (flg) {
127:     try {
128:       PetscStrcasecmp(string,"cpu",&flg);
129:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

131:       PetscStrcasecmp(string,"gpu",&flg);
132:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

134:       PetscStrcasecmp(string,"accelerator",&flg);
135:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136:     } catch (std::exception const & ex) {
137:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138:     }
139:   }
140: #endif

142:   /* Print available backends */
143:   PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);
144:   if (flg) {
145:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");
146: #if defined(PETSC_HAVE_CUDA)
147:     PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");
148: #endif
149: #if defined(PETSC_HAVE_OPENCL)
150:     PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");
151: #endif
152: #if defined(PETSC_HAVE_OPENMP)
153:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
154: #else
155:     PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");
156: #endif
157:     PetscPrintf(PETSC_COMM_WORLD, "\n");

159:     /* Print selected backends */
160:     PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: ");
161: #if defined(PETSC_HAVE_CUDA)
162:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163:       PetscPrintf(PETSC_COMM_WORLD, "CUDA ");
164:     }
165: #endif
166: #if defined(PETSC_HAVE_OPENCL)
167:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168:       PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");
169:     }
170: #endif
171: #if defined(PETSC_HAVE_OPENMP)
172:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");
174:     }
175: #else
176:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177:       PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");
178:     }
179: #endif
180:     PetscPrintf(PETSC_COMM_WORLD, "\n");
181:   }
182:   return(0);
183: }

185: /*
186:     Allocates space for the vector array on the Host if it does not exist.
187:     Does NOT change the PetscViennaCLFlag for the vector
188:     Does NOT zero the ViennaCL array
189:  */
190: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191: {
193:   PetscScalar    *array;
194:   Vec_Seq        *s;
195:   PetscInt       n = v->map->n;

198:   s    = (Vec_Seq*)v->data;
199:   VecViennaCLAllocateCheck(v);
200:   if (s->array == 0) {
201:     PetscMalloc1(n,&array);
202:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
203:     s->array           = array;
204:     s->array_allocated = array;
205:   }
206:   return(0);
207: }


210: /*
211:     Allocates space for the vector array on the GPU if it does not exist.
212:     Does NOT change the PetscViennaCLFlag for the vector
213:     Does NOT zero the ViennaCL array

215:  */
216: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217: {
219:   if (!v->spptr) {
220:     try {
221:       v->spptr                            = new Vec_ViennaCL;
222:       ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);

224:     } catch(std::exception const & ex) {
225:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
226:     }
227:   }
228:   return(0);
229: }


232: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
233: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
234: {

239:   VecViennaCLAllocateCheck(v);
240:   if (v->map->n > 0) {
241:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
242:       PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);
243:       try {
244:         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
245:         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
246:         ViennaCLWaitForGPU();
247:       } catch(std::exception const & ex) {
248:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
249:       }
250:       PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));
251:       PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);
252:       v->offloadmask = PETSC_OFFLOAD_BOTH;
253:     }
254:   }
255:   return(0);
256: }



260: /*
261:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
262: */
263: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
264: {

269:   VecViennaCLAllocateCheckHost(v);
270:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
271:     PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);
272:     try {
273:       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
274:       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
275:       ViennaCLWaitForGPU();
276:     } catch(std::exception const & ex) {
277:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
278:     }
279:     PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));
280:     PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);
281:     v->offloadmask = PETSC_OFFLOAD_BOTH;
282:   }
283:   return(0);
284: }


287: /* Copy on CPU */
288: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
289: {
290:   PetscScalar       *ya;
291:   const PetscScalar *xa;
292:   PetscErrorCode    ierr;

295:   VecViennaCLAllocateCheckHost(xin);
296:   VecViennaCLAllocateCheckHost(yin);
297:   if (xin != yin) {
298:     VecGetArrayRead(xin,&xa);
299:     VecGetArray(yin,&ya);
300:     PetscArraycpy(ya,xa,xin->map->n);
301:     VecRestoreArrayRead(xin,&xa);
302:     VecRestoreArray(yin,&ya);
303:   }
304:   return(0);
305: }

307: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
308: {
310:   PetscInt       n = xin->map->n,i;
311:   PetscScalar    *xx;

314:   VecGetArray(xin,&xx);
315:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
316:   VecRestoreArray(xin,&xx);
317:   return(0);
318: }

320: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
321: {
322:   Vec_Seq        *vs = (Vec_Seq*)v->data;

326:   PetscObjectSAWsViewOff(v);
327: #if defined(PETSC_USE_LOG)
328:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
329: #endif
330:   if (vs->array_allocated) { PetscFree(vs->array_allocated); }
331:   PetscFree(vs);
332:   return(0);
333: }

335: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
336: {
337:   Vec_Seq *v = (Vec_Seq*)vin->data;

340:   v->array         = v->unplacedarray;
341:   v->unplacedarray = 0;
342:   return(0);
343: }


346: /*MC
347:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

349:    Options Database Keys:
350: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

352:   Level: beginner

354: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
355: M*/


358: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
359: {
360:   const ViennaCLVector  *xgpu;
361:   ViennaCLVector        *ygpu;
362:   PetscErrorCode        ierr;

365:   VecViennaCLGetArrayRead(xin,&xgpu);
366:   VecViennaCLGetArray(yin,&ygpu);
367:   PetscLogGpuTimeBegin();
368:   try {
369:     if (alpha != 0.0 && xin->map->n > 0) {
370:       *ygpu = *xgpu + alpha * *ygpu;
371:       PetscLogGpuFlops(2.0*yin->map->n);
372:     } else {
373:       *ygpu = *xgpu;
374:     }
375:     ViennaCLWaitForGPU();
376:   } catch(std::exception const & ex) {
377:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
378:   }
379:   PetscLogGpuTimeEnd();
380:   VecViennaCLRestoreArrayRead(xin,&xgpu);
381:   VecViennaCLRestoreArray(yin,&ygpu);
382:   return(0);
383: }


386: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
387: {
388:   const ViennaCLVector  *xgpu;
389:   ViennaCLVector        *ygpu;
390:   PetscErrorCode        ierr;

393:   if (alpha != 0.0 && xin->map->n > 0) {
394:     VecViennaCLGetArrayRead(xin,&xgpu);
395:     VecViennaCLGetArray(yin,&ygpu);
396:     PetscLogGpuTimeBegin();
397:     try {
398:       *ygpu += alpha * *xgpu;
399:       ViennaCLWaitForGPU();
400:     } catch(std::exception const & ex) {
401:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
402:     }
403:     PetscLogGpuTimeEnd();
404:     VecViennaCLRestoreArrayRead(xin,&xgpu);
405:     VecViennaCLRestoreArray(yin,&ygpu);
406:     PetscLogGpuFlops(2.0*yin->map->n);
407:   }
408:   return(0);
409: }


412: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
413: {
414:   const ViennaCLVector  *xgpu,*ygpu;
415:   ViennaCLVector        *wgpu;
416:   PetscErrorCode        ierr;

419:   if (xin->map->n > 0) {
420:     VecViennaCLGetArrayRead(xin,&xgpu);
421:     VecViennaCLGetArrayRead(yin,&ygpu);
422:     VecViennaCLGetArrayWrite(win,&wgpu);
423:     PetscLogGpuTimeBegin();
424:     try {
425:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
426:       ViennaCLWaitForGPU();
427:     } catch(std::exception const & ex) {
428:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
429:     }
430:     PetscLogGpuTimeEnd();
431:     PetscLogGpuFlops(win->map->n);
432:     VecViennaCLRestoreArrayRead(xin,&xgpu);
433:     VecViennaCLRestoreArrayRead(yin,&ygpu);
434:     VecViennaCLRestoreArrayWrite(win,&wgpu);
435:   }
436:   return(0);
437: }


440: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
441: {
442:   const ViennaCLVector  *xgpu,*ygpu;
443:   ViennaCLVector        *wgpu;
444:   PetscErrorCode        ierr;

447:   if (alpha == 0.0 && xin->map->n > 0) {
448:     VecCopy_SeqViennaCL(yin,win);
449:   } else {
450:     VecViennaCLGetArrayRead(xin,&xgpu);
451:     VecViennaCLGetArrayRead(yin,&ygpu);
452:     VecViennaCLGetArrayWrite(win,&wgpu);
453:     PetscLogGpuTimeBegin();
454:     if (alpha == 1.0) {
455:       try {
456:         *wgpu = *ygpu + *xgpu;
457:       } catch(std::exception const & ex) {
458:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
459:       }
460:       PetscLogGpuFlops(win->map->n);
461:     } else if (alpha == -1.0) {
462:       try {
463:         *wgpu = *ygpu - *xgpu;
464:       } catch(std::exception const & ex) {
465:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
466:       }
467:       PetscLogGpuFlops(win->map->n);
468:     } else {
469:       try {
470:         *wgpu = *ygpu + alpha * *xgpu;
471:       } catch(std::exception const & ex) {
472:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
473:       }
474:       PetscLogGpuFlops(2*win->map->n);
475:     }
476:     ViennaCLWaitForGPU();
477:     PetscLogGpuTimeEnd();
478:     VecViennaCLRestoreArrayRead(xin,&xgpu);
479:     VecViennaCLRestoreArrayRead(yin,&ygpu);
480:     VecViennaCLRestoreArrayWrite(win,&wgpu);
481:   }
482:   return(0);
483: }


486: /*
487:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
488:  *
489:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
490:  * hence there is an iterated Section 1.5 Writing Application Codes with PETSc of these until the final result is obtained
491:  */
492: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
493: {
495:   PetscInt       j;

498:   for (j = 0; j < nv; ++j) {
499:     if (j+1 < nv) {
500:       VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
501:       ++j;
502:     } else {
503:       VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);
504:     }
505:   }
506:   ViennaCLWaitForGPU();
507:   return(0);
508: }


511: PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
512: {
513:   const ViennaCLVector  *xgpu,*ygpu;
514:   PetscErrorCode        ierr;

517:   if (xin->map->n > 0) {
518:     VecViennaCLGetArrayRead(xin,&xgpu);
519:     VecViennaCLGetArrayRead(yin,&ygpu);
520:     PetscLogGpuTimeBegin();
521:     try {
522:       *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
523:     } catch(std::exception const & ex) {
524:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
525:     }
526:     ViennaCLWaitForGPU();
527:     PetscLogGpuTimeEnd();
528:     if (xin->map->n >0) {
529:       PetscLogGpuFlops(2.0*xin->map->n-1);
530:     }
531:     VecViennaCLRestoreArrayRead(xin,&xgpu);
532:     VecViennaCLRestoreArrayRead(yin,&ygpu);
533:   } else *z = 0.0;
534:   return(0);
535: }



539: /*
540:  * Operation z[j] = dot(x, y[j])
541:  *
542:  * We use an iterated Section 1.5 Writing Application Codes with PETSc of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
543:  */
544: PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
545: {
546:   PetscErrorCode       ierr;
547:   PetscInt             n = xin->map->n,i;
548:   const ViennaCLVector *xgpu,*ygpu;
549:   Vec                  *yyin = (Vec*)yin;
550:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

553:   if (xin->map->n > 0) {
554:     VecViennaCLGetArrayRead(xin,&xgpu);
555:     for (i=0; i<nv; i++) {
556:       VecViennaCLGetArrayRead(yyin[i],&ygpu);
557:       ygpu_array[i] = ygpu;
558:     }
559:     PetscLogGpuTimeBegin();
560:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
561:     ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
562:     viennacl::copy(result.begin(), result.end(), z);
563:     for (i=0; i<nv; i++) {
564:       VecViennaCLRestoreArrayRead(yyin[i],&ygpu);
565:     }
566:     ViennaCLWaitForGPU();
567:     PetscLogGpuTimeEnd();
568:     VecViennaCLRestoreArrayRead(xin,&xgpu);
569:     PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));
570:   } else {
571:     for (i=0; i<nv; i++) z[i] = 0.0;
572:   }
573:   return(0);
574: }

576: PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
577: {

581:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
582:   VecMDot_SeqViennaCL(xin,nv,yin,z);
583:   ViennaCLWaitForGPU();
584:   return(0);
585: }


588: PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
589: {
590:   ViennaCLVector *xgpu;

594:   if (xin->map->n > 0) {
595:     VecViennaCLGetArrayWrite(xin,&xgpu);
596:     PetscLogGpuTimeBegin();
597:     try {
598:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
599:       ViennaCLWaitForGPU();
600:     } catch(std::exception const & ex) {
601:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
602:     }
603:     PetscLogGpuTimeEnd();
604:     VecViennaCLRestoreArrayWrite(xin,&xgpu);
605:   }
606:   return(0);
607: }

609: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
610: {
611:   ViennaCLVector *xgpu;

615:   if (alpha == 0.0 && xin->map->n > 0) {
616:     VecSet_SeqViennaCL(xin,alpha);
617:     PetscLogGpuFlops(xin->map->n);
618:   } else if (alpha != 1.0 && xin->map->n > 0) {
619:     VecViennaCLGetArray(xin,&xgpu);
620:     PetscLogGpuTimeBegin();
621:     try {
622:       *xgpu *= alpha;
623:       ViennaCLWaitForGPU();
624:     } catch(std::exception const & ex) {
625:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
626:     }
627:     PetscLogGpuTimeEnd();
628:     VecViennaCLRestoreArray(xin,&xgpu);
629:     PetscLogGpuFlops(xin->map->n);
630:   }
631:   return(0);
632: }


635: PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
636: {

640:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
641:   VecDot_SeqViennaCL(xin, yin, z);
642:   ViennaCLWaitForGPU();
643:   return(0);
644: }


647: PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
648: {
649:   const ViennaCLVector *xgpu;
650:   ViennaCLVector       *ygpu;
651:   PetscErrorCode       ierr;

654:   if (xin != yin && xin->map->n > 0) {
655:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
656:       VecViennaCLGetArrayRead(xin,&xgpu);
657:       VecViennaCLGetArrayWrite(yin,&ygpu);
658:       PetscLogGpuTimeBegin();
659:       try {
660:         *ygpu = *xgpu;
661:         ViennaCLWaitForGPU();
662:       } catch(std::exception const & ex) {
663:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
664:       }
665:       PetscLogGpuTimeEnd();
666:       VecViennaCLRestoreArrayRead(xin,&xgpu);
667:       VecViennaCLRestoreArrayWrite(yin,&ygpu);

669:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
670:       /* copy in CPU if we are on the CPU*/
671:       VecCopy_SeqViennaCL_Private(xin,yin);
672:       ViennaCLWaitForGPU();
673:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
674:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
675:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
676:         /* copy in CPU */
677:         VecCopy_SeqViennaCL_Private(xin,yin);
678:         ViennaCLWaitForGPU();
679:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
680:         /* copy in GPU */
681:         VecViennaCLGetArrayRead(xin,&xgpu);
682:         VecViennaCLGetArrayWrite(yin,&ygpu);
683:         PetscLogGpuTimeBegin();
684:         try {
685:           *ygpu = *xgpu;
686:           ViennaCLWaitForGPU();
687:         } catch(std::exception const & ex) {
688:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
689:         }
690:         PetscLogGpuTimeEnd();
691:         VecViennaCLRestoreArrayRead(xin,&xgpu);
692:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
693:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
694:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
695:            default to copy in GPU (this is an arbitrary choice) */
696:         VecViennaCLGetArrayRead(xin,&xgpu);
697:         VecViennaCLGetArrayWrite(yin,&ygpu);
698:         PetscLogGpuTimeBegin();
699:         try {
700:           *ygpu = *xgpu;
701:           ViennaCLWaitForGPU();
702:         } catch(std::exception const & ex) {
703:           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
704:         }
705:         PetscLogGpuTimeEnd();
706:         VecViennaCLRestoreArrayRead(xin,&xgpu);
707:         VecViennaCLRestoreArrayWrite(yin,&ygpu);
708:       } else {
709:         VecCopy_SeqViennaCL_Private(xin,yin);
710:         ViennaCLWaitForGPU();
711:       }
712:     }
713:   }
714:   return(0);
715: }


718: PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
719: {
721:   ViennaCLVector *xgpu,*ygpu;

724:   if (xin != yin && xin->map->n > 0) {
725:     VecViennaCLGetArray(xin,&xgpu);
726:     VecViennaCLGetArray(yin,&ygpu);
727:     PetscLogGpuTimeBegin();
728:     try {
729:       viennacl::swap(*xgpu, *ygpu);
730:       ViennaCLWaitForGPU();
731:     } catch(std::exception const & ex) {
732:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
733:     }
734:     PetscLogGpuTimeEnd();
735:     VecViennaCLRestoreArray(xin,&xgpu);
736:     VecViennaCLRestoreArray(yin,&ygpu);
737:   }
738:   return(0);
739: }


742: // y = alpha * x + beta * y
743: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
744: {
745:   PetscErrorCode       ierr;
746:   PetscScalar          a = alpha,b = beta;
747:   const ViennaCLVector *xgpu;
748:   ViennaCLVector       *ygpu;

751:   if (a == 0.0 && xin->map->n > 0) {
752:     VecScale_SeqViennaCL(yin,beta);
753:   } else if (b == 1.0 && xin->map->n > 0) {
754:     VecAXPY_SeqViennaCL(yin,alpha,xin);
755:   } else if (a == 1.0 && xin->map->n > 0) {
756:     VecAYPX_SeqViennaCL(yin,beta,xin);
757:   } else if (b == 0.0 && xin->map->n > 0) {
758:     VecViennaCLGetArrayRead(xin,&xgpu);
759:     VecViennaCLGetArray(yin,&ygpu);
760:     PetscLogGpuTimeBegin();
761:     try {
762:       *ygpu = *xgpu * alpha;
763:       ViennaCLWaitForGPU();
764:     } catch(std::exception const & ex) {
765:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
766:     }
767:     PetscLogGpuTimeEnd();
768:     PetscLogGpuFlops(xin->map->n);
769:     VecViennaCLRestoreArrayRead(xin,&xgpu);
770:     VecViennaCLRestoreArray(yin,&ygpu);
771:   } else if (xin->map->n > 0) {
772:     VecViennaCLGetArrayRead(xin,&xgpu);
773:     VecViennaCLGetArray(yin,&ygpu);
774:     PetscLogGpuTimeBegin();
775:     try {
776:       *ygpu = *xgpu * alpha + *ygpu * beta;
777:       ViennaCLWaitForGPU();
778:     } catch(std::exception const & ex) {
779:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
780:     }
781:     PetscLogGpuTimeEnd();
782:     VecViennaCLRestoreArrayRead(xin,&xgpu);
783:     VecViennaCLRestoreArray(yin,&ygpu);
784:     PetscLogGpuFlops(3.0*xin->map->n);
785:   }
786:   return(0);
787: }


790: /* operation  z = alpha * x + beta *y + gamma *z*/
791: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
792: {
793:   PetscErrorCode       ierr;
794:   PetscInt             n = zin->map->n;
795:   const ViennaCLVector *xgpu,*ygpu;
796:   ViennaCLVector       *zgpu;

799:   VecViennaCLGetArrayRead(xin,&xgpu);
800:   VecViennaCLGetArrayRead(yin,&ygpu);
801:   VecViennaCLGetArray(zin,&zgpu);
802:   if (alpha == 0.0 && xin->map->n > 0) {
803:     PetscLogGpuTimeBegin();
804:     try {
805:       if (beta == 0.0) {
806:         *zgpu = gamma * *zgpu;
807:         ViennaCLWaitForGPU();
808:         PetscLogGpuFlops(1.0*n);
809:       } else if (gamma == 0.0) {
810:         *zgpu = beta * *ygpu;
811:         ViennaCLWaitForGPU();
812:         PetscLogGpuFlops(1.0*n);
813:       } else {
814:         *zgpu = beta * *ygpu + gamma * *zgpu;
815:         ViennaCLWaitForGPU();
816:         PetscLogGpuFlops(3.0*n);
817:       }
818:     } catch(std::exception const & ex) {
819:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
820:     }
821:     PetscLogGpuTimeEnd();
822:     PetscLogGpuFlops(3.0*n);
823:   } else if (beta == 0.0 && xin->map->n > 0) {
824:     PetscLogGpuTimeBegin();
825:     try {
826:       if (gamma == 0.0) {
827:         *zgpu = alpha * *xgpu;
828:         ViennaCLWaitForGPU();
829:         PetscLogGpuFlops(1.0*n);
830:       } else {
831:         *zgpu = alpha * *xgpu + gamma * *zgpu;
832:         ViennaCLWaitForGPU();
833:         PetscLogGpuFlops(3.0*n);
834:       }
835:     } catch(std::exception const & ex) {
836:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
837:     }
838:     PetscLogGpuTimeEnd();
839:   } else if (gamma == 0.0 && xin->map->n > 0) {
840:     PetscLogGpuTimeBegin();
841:     try {
842:       *zgpu = alpha * *xgpu + beta * *ygpu;
843:       ViennaCLWaitForGPU();
844:     } catch(std::exception const & ex) {
845:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
846:     }
847:     PetscLogGpuTimeEnd();
848:     PetscLogGpuFlops(3.0*n);
849:   } else if (xin->map->n > 0) {
850:     PetscLogGpuTimeBegin();
851:     try {
852:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
853:       if (gamma != 1.0)
854:         *zgpu *= gamma;
855:       *zgpu += alpha * *xgpu + beta * *ygpu;
856:       ViennaCLWaitForGPU();
857:     } catch(std::exception const & ex) {
858:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
859:     }
860:     PetscLogGpuTimeEnd();
861:     VecViennaCLRestoreArray(zin,&zgpu);
862:     VecViennaCLRestoreArrayRead(xin,&xgpu);
863:     VecViennaCLRestoreArrayRead(yin,&ygpu);
864:     PetscLogGpuFlops(5.0*n);
865:   }
866:   return(0);
867: }

869: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
870: {
871:   PetscErrorCode       ierr;
872:   PetscInt             n = win->map->n;
873:   const ViennaCLVector *xgpu,*ygpu;
874:   ViennaCLVector       *wgpu;

877:   if (xin->map->n > 0) {
878:     VecViennaCLGetArrayRead(xin,&xgpu);
879:     VecViennaCLGetArrayRead(yin,&ygpu);
880:     VecViennaCLGetArray(win,&wgpu);
881:     PetscLogGpuTimeBegin();
882:     try {
883:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
884:       ViennaCLWaitForGPU();
885:     } catch(std::exception const & ex) {
886:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
887:     }
888:     PetscLogGpuTimeEnd();
889:     VecViennaCLRestoreArrayRead(xin,&xgpu);
890:     VecViennaCLRestoreArrayRead(yin,&ygpu);
891:     VecViennaCLRestoreArray(win,&wgpu);
892:     PetscLogGpuFlops(n);
893:   }
894:   return(0);
895: }


898: PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
899: {
900:   PetscErrorCode       ierr;
901:   PetscInt             n = xin->map->n;
902:   PetscBLASInt         bn;
903:   const ViennaCLVector *xgpu;

906:   if (xin->map->n > 0) {
907:     PetscBLASIntCast(n,&bn);
908:     VecViennaCLGetArrayRead(xin,&xgpu);
909:     if (type == NORM_2 || type == NORM_FROBENIUS) {
910:       PetscLogGpuTimeBegin();
911:       try {
912:         *z = viennacl::linalg::norm_2(*xgpu);
913:         ViennaCLWaitForGPU();
914:       } catch(std::exception const & ex) {
915:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
916:       }
917:       PetscLogGpuTimeEnd();
918:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
919:     } else if (type == NORM_INFINITY) {
920:       PetscLogGpuTimeBegin();
921:       try {
922:         *z = viennacl::linalg::norm_inf(*xgpu);
923:         ViennaCLWaitForGPU();
924:       } catch(std::exception const & ex) {
925:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
926:       }
927:       PetscLogGpuTimeEnd();
928:     } else if (type == NORM_1) {
929:       PetscLogGpuTimeBegin();
930:       try {
931:         *z = viennacl::linalg::norm_1(*xgpu);
932:         ViennaCLWaitForGPU();
933:       } catch(std::exception const & ex) {
934:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
935:       }
936:       PetscLogGpuTimeEnd();
937:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
938:     } else if (type == NORM_1_AND_2) {
939:       PetscLogGpuTimeBegin();
940:       try {
941:         *z     = viennacl::linalg::norm_1(*xgpu);
942:         *(z+1) = viennacl::linalg::norm_2(*xgpu);
943:         ViennaCLWaitForGPU();
944:       } catch(std::exception const & ex) {
945:         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
946:       }
947:       PetscLogGpuTimeEnd();
948:       PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));
949:       PetscLogGpuFlops(PetscMax(n-1.0,0.0));
950:     }
951:     VecViennaCLRestoreArrayRead(xin,&xgpu);
952:   } else if (type == NORM_1_AND_2) {
953:     *z      = 0.0;
954:     *(z+1)  = 0.0;
955:   } else *z = 0.0;
956:   return(0);
957: }


960: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
961: {

965:   VecSetRandom_SeqViennaCL_Private(xin,r);
966:   xin->offloadmask = PETSC_OFFLOAD_CPU;
967:   return(0);
968: }

970: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
971: {

976:   VecViennaCLCopyFromGPU(vin);
977:   VecResetArray_SeqViennaCL_Private(vin);
978:   vin->offloadmask = PETSC_OFFLOAD_CPU;
979:   return(0);
980: }

982: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
983: {

988:   VecViennaCLCopyFromGPU(vin);
989:   VecPlaceArray_Seq(vin,a);
990:   vin->offloadmask = PETSC_OFFLOAD_CPU;
991:   return(0);
992: }

994: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
995: {

1000:   VecViennaCLCopyFromGPU(vin);
1001:   VecReplaceArray_Seq(vin,a);
1002:   vin->offloadmask = PETSC_OFFLOAD_CPU;
1003:   return(0);
1004: }


1007: /*@C
1008:    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

1010:    Collective

1012:    Input Parameter:
1013: +  comm - the communicator, should be PETSC_COMM_SELF
1014: -  n - the vector length

1016:    Output Parameter:
1017: .  V - the vector

1019:    Notes:
1020:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1021:    same type as an existing vector.

1023:    Level: intermediate

1025: .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1026: @*/
1027: PetscErrorCode  VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1028: {

1032:   VecCreate(comm,v);
1033:   VecSetSizes(*v,n,n);
1034:   VecSetType(*v,VECSEQVIENNACL);
1035:   return(0);
1036: }


1039: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1040:  *
1041:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1042:  */
1043: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1044: {
1045:   PetscErrorCode                         ierr;

1048:   VecDot_SeqViennaCL(s,t,dp);
1049:   VecNorm_SeqViennaCL(t,NORM_2,nm);
1050:   *nm *= *nm; //squared norm required
1051:   return(0);
1052: }

1054: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1055: {

1059:   VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);
1060:   PetscLayoutReference(win->map,&(*V)->map);
1061:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
1062:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
1063:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1064:   return(0);
1065: }

1067: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1068: {

1072:   try {
1073:     if (v->spptr) {
1074:       delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
1075:       delete (Vec_ViennaCL*) v->spptr;
1076:     }
1077:   } catch(char *ex) {
1078:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1079:   }
1080:   VecDestroy_SeqViennaCL_Private(v);
1081:   return(0);
1082: }

1084: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1085: {

1089:   V->boundtocpu = flg;
1090:   if (flg) {
1091:     VecViennaCLCopyFromGPU(V);
1092:     V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1093:     V->ops->dot             = VecDot_Seq;
1094:     V->ops->norm            = VecNorm_Seq;
1095:     V->ops->tdot            = VecTDot_Seq;
1096:     V->ops->scale           = VecScale_Seq;
1097:     V->ops->copy            = VecCopy_Seq;
1098:     V->ops->set             = VecSet_Seq;
1099:     V->ops->swap            = VecSwap_Seq;
1100:     V->ops->axpy            = VecAXPY_Seq;
1101:     V->ops->axpby           = VecAXPBY_Seq;
1102:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1103:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1104:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1105:     V->ops->setrandom       = VecSetRandom_Seq;
1106:     V->ops->dot_local       = VecDot_Seq;
1107:     V->ops->tdot_local      = VecTDot_Seq;
1108:     V->ops->norm_local      = VecNorm_Seq;
1109:     V->ops->mdot_local      = VecMDot_Seq;
1110:     V->ops->mtdot_local     = VecMTDot_Seq;
1111:     V->ops->maxpy           = VecMAXPY_Seq;
1112:     V->ops->mdot            = VecMDot_Seq;
1113:     V->ops->mtdot           = VecMTDot_Seq;
1114:     V->ops->aypx            = VecAYPX_Seq;
1115:     V->ops->waxpy           = VecWAXPY_Seq;
1116:     V->ops->dotnorm2        = NULL;
1117:     V->ops->placearray      = VecPlaceArray_Seq;
1118:     V->ops->replacearray    = VecReplaceArray_Seq;
1119:     V->ops->resetarray      = VecResetArray_Seq;
1120:     V->ops->duplicate       = VecDuplicate_Seq;
1121:   } else {
1122:     V->ops->dot             = VecDot_SeqViennaCL;
1123:     V->ops->norm            = VecNorm_SeqViennaCL;
1124:     V->ops->tdot            = VecTDot_SeqViennaCL;
1125:     V->ops->scale           = VecScale_SeqViennaCL;
1126:     V->ops->copy            = VecCopy_SeqViennaCL;
1127:     V->ops->set             = VecSet_SeqViennaCL;
1128:     V->ops->swap            = VecSwap_SeqViennaCL;
1129:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1130:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1131:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1132:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1133:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1134:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1135:     V->ops->dot_local       = VecDot_SeqViennaCL;
1136:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1137:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1138:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1139:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1140:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1141:     V->ops->mdot            = VecMDot_SeqViennaCL;
1142:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1143:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1144:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1145:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1146:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1147:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1148:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1149:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1150:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1151:   }
1152:   return(0);
1153: }

1155: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1156: {
1158:   PetscMPIInt    size;

1161:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
1162:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1163:   VecCreate_Seq_Private(V,0);
1164:   PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);

1166:   VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);
1167:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1169:   VecViennaCLAllocateCheck(V);
1170:   VecViennaCLAllocateCheckHost(V);
1171:   VecSet(V,0.0);
1172:   VecSet_Seq(V,0.0);
1173:   V->offloadmask = PETSC_OFFLOAD_BOTH;
1174:   return(0);
1175: }