Actual source code: mpicusp.cu

petsc-3.3-p5 2012-12-01
  2: /*
  3:    This file contains routines for Parallel vector operations.
  4:  */
  5: #include <petscconf.h>
  6: PETSC_CUDA_EXTERN_C_BEGIN
  7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
  8: PETSC_CUDA_EXTERN_C_END
  9: #include <../src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h>

 13: PetscErrorCode VecDestroy_MPICUSP(Vec v)
 14: {

 18:   try{
 19:     if (v->spptr) {
 20:       delete ((Vec_CUSP*)v->spptr)->GPUarray;
 21:       delete (Vec_CUSP *)v->spptr;
 22:     }
 23:   } catch(char* ex){
 24:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
 25:   }
 26:   VecDestroy_MPI(v);
 27:   return(0);
 28: }

 32: PetscErrorCode VecNorm_MPICUSP(Vec xin,NormType type,PetscReal *z)
 33: {
 34:   PetscReal      sum,work = 0.0;

 38:   if (type == NORM_2 || type == NORM_FROBENIUS) {
 39:     VecNorm_SeqCUSP(xin,NORM_2,&work);
 40:     work *= work;
 41:     MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,((PetscObject)xin)->comm);
 42:     *z = PetscSqrtReal(sum);
 43:   } else if (type == NORM_1) {
 44:     /* Find the local part */
 45:     VecNorm_SeqCUSP(xin,NORM_1,&work);
 46:     /* Find the global max */
 47:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,((PetscObject)xin)->comm);
 48:   } else if (type == NORM_INFINITY) {
 49:     /* Find the local max */
 50:     VecNorm_SeqCUSP(xin,NORM_INFINITY,&work);
 51:     /* Find the global max */
 52:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
 53:   } else if (type == NORM_1_AND_2) {
 54:     PetscReal temp[2];
 55:     VecNorm_SeqCUSP(xin,NORM_1,temp);
 56:     VecNorm_SeqCUSP(xin,NORM_2,temp+1);
 57:     temp[1] = temp[1]*temp[1];
 58:     MPI_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,((PetscObject)xin)->comm);
 59:     z[1] = PetscSqrtReal(z[1]);
 60:   }
 61:   return(0);
 62: }

 66: PetscErrorCode VecDot_MPICUSP(Vec xin,Vec yin,PetscScalar *z)
 67: {
 68:   PetscScalar    sum,work;

 72:   VecDot_SeqCUSP(xin,yin,&work);
 73:   MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)xin)->comm);
 74:   *z = sum;
 75:   return(0);
 76: }

 80: PetscErrorCode VecTDot_MPICUSP(Vec xin,Vec yin,PetscScalar *z)
 81: {
 82:   PetscScalar    sum,work;

 86:   VecTDot_SeqCUSP(xin,yin,&work);
 87:   MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)xin)->comm);
 88:   *z   = sum;
 89:   return(0);
 90: }

 94: PetscErrorCode VecMDot_MPICUSP(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
 95: {
 96:   PetscScalar    awork[128],*work = awork;

100:   if (nv > 128) {
101:     PetscMalloc(nv*sizeof(PetscScalar),&work);
102:   }
103:   VecMDot_SeqCUSP(xin,nv,y,work);
104:   MPI_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,((PetscObject)xin)->comm);
105:   if (nv > 128) {
106:     PetscFree(work);
107:   }
108:   return(0);
109: }

111: /*MC
112:    VECMPICUSP - VECMPICUSP = "mpicusp" - The basic parallel vector, modified to use CUSP

114:    Options Database Keys:
115: . -vec_type mpicusp - sets the vector type to VECMPICUSP during a call to VecSetFromOptions()

117:   Level: beginner

119: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMpiWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateMpi()
120: M*/


125: PetscErrorCode VecDuplicate_MPICUSP(Vec win,Vec *v)
126: {
128:   Vec_MPI        *vw,*w = (Vec_MPI *)win->data;
129:   PetscScalar    *array;

132:   VecCreate(((PetscObject)win)->comm,v);
133:   PetscLayoutReference(win->map,&(*v)->map);

135:   VecCreate_MPI_Private(*v,PETSC_FALSE,w->nghost,0);
136:   vw   = (Vec_MPI *)(*v)->data;
137:   PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));

139:   /* save local representation of the parallel vector (and scatter) if it exists */
140:   if (w->localrep) {
141:     VecGetArray(*v,&array);
142:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
143:     PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
144:     VecRestoreArray(*v,&array);
145:     PetscLogObjectParent(*v,vw->localrep);
146:     vw->localupdate = w->localupdate;
147:     if (vw->localupdate) {
148:       PetscObjectReference((PetscObject)vw->localupdate);
149:     }
150:   }

152:   /* New vector should inherit stashing property of parent */
153:   (*v)->stash.donotstash = win->stash.donotstash;
154:   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;

156:   /* change type_name appropriately */
157:   PetscObjectChangeTypeName((PetscObject)(*v),VECMPICUSP);

159:   PetscOListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
160:   PetscFListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
161:   (*v)->map->bs    = win->map->bs;
162:   (*v)->bstash.bs = win->bstash.bs;

164:   return(0);
165: }

169: PetscErrorCode VecDotNorm2_MPICUSP(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
170: {
171:   PetscErrorCode  ierr;
172:   PetscScalar     work[2],sum[2];

175:   VecDotNorm2_SeqCUSP(s,t,work,work+1);
176:   MPI_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,((PetscObject)s)->comm);
177:   *dp     = sum[0];
178:   *nm     = sum[1];
179:   return(0);
180: }

182: EXTERN_C_BEGIN
185: PetscErrorCode  VecCreate_MPICUSP(Vec vv)
186: {
189:   VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);
190:   PetscObjectChangeTypeName((PetscObject)vv,VECMPICUSP);
191:   vv->ops->dotnorm2        = VecDotNorm2_MPICUSP;
192:   vv->ops->waxpy           = VecWAXPY_SeqCUSP;
193:   vv->ops->duplicate       = VecDuplicate_MPICUSP;
194:   vv->ops->dot             = VecDot_MPICUSP;
195:   vv->ops->mdot            = VecMDot_MPICUSP;
196:   vv->ops->tdot            = VecTDot_MPICUSP;
197:   vv->ops->norm            = VecNorm_MPICUSP;
198:   vv->ops->scale           = VecScale_SeqCUSP;
199:   vv->ops->copy            = VecCopy_SeqCUSP;
200:   vv->ops->set             = VecSet_SeqCUSP;
201:   vv->ops->swap            = VecSwap_SeqCUSP;
202:   vv->ops->axpy            = VecAXPY_SeqCUSP;
203:   vv->ops->axpby           = VecAXPBY_SeqCUSP;
204:   vv->ops->maxpy           = VecMAXPY_SeqCUSP;
205:   vv->ops->aypx            = VecAYPX_SeqCUSP;
206:   vv->ops->axpbypcz        = VecAXPBYPCZ_SeqCUSP;
207:   vv->ops->pointwisemult   = VecPointwiseMult_SeqCUSP;
208:   vv->ops->setrandom       = VecSetRandom_SeqCUSP;
209:   vv->ops->replacearray    = VecReplaceArray_SeqCUSP;
210:   vv->ops->dot_local       = VecDot_SeqCUSP;
211:   vv->ops->tdot_local      = VecTDot_SeqCUSP;
212:   vv->ops->norm_local      = VecNorm_SeqCUSP;
213:   vv->ops->mdot_local      = VecMDot_SeqCUSP;
214:   vv->ops->destroy         = VecDestroy_MPICUSP;
215:   vv->ops->pointwisedivide = VecPointwiseDivide_SeqCUSP;
216:   /* place array?
217:      reset array?
218:      get values?
219:   */
220:   VecCUSPAllocateCheck(vv);CHKERRCUSP(ierr);
221:   vv->valid_GPU_array      = PETSC_CUSP_GPU;
222:   VecSet(vv,0.0);
223:   return(0);
224: }
225: EXTERN_C_END

227: EXTERN_C_BEGIN
230: PetscErrorCode  VecCreate_CUSP(Vec v)
231: {
233:   PetscMPIInt    size;

236:   MPI_Comm_size(((PetscObject)v)->comm,&size);
237:   if (size == 1) {
238:     VecSetType(v,VECSEQCUSP);
239:   } else {
240:     VecSetType(v,VECMPICUSP);
241:   }
242:   return(0);
243: }
244: EXTERN_C_END