Actual source code: mpivecpthread.c

petsc-3.3-p5 2012-12-01

  3: #include <petsc-private/vecimpl.h>
  4: #include <../src/sys/objects/pthread/pthreadimpl.h>
  5: #include <../src/vec/vec/impls/seq/seqpthread/vecpthreadimpl.h>
  6: #include <../src/vec/vec/impls/mpi/pvecimpl.h>  /*I   "petscvec.h"  I*/
  7: #include <petscblaslapack.h>

  9: extern PetscInt     vecs_created;
 10: extern Vec_KernelData  *vec_kerneldatap;
 11: extern Vec_KernelData  **vec_pdata;

 15: PetscErrorCode VecDot_MPIPThread(Vec xin,Vec yin,PetscScalar *z)
 16: {
 17:   PetscScalar    sum,work;

 21:   VecDot_SeqPThread(xin,yin,&work);
 22:   MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)xin)->comm);
 23:   *z = sum;
 24:   return(0);
 25: }

 29: PetscErrorCode VecMDot_MPIPThread(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
 30: {
 31:   PetscScalar    awork[128],*work = awork;

 35:   if (nv > 128) {
 36:     PetscMalloc(nv*sizeof(PetscScalar),&work);
 37:   }
 38:   VecMDot_SeqPThread(xin,nv,y,work);
 39:   MPI_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,((PetscObject)xin)->comm);
 40:   if (nv > 128) {
 41:     PetscFree(work);
 42:   }
 43:   return(0);
 44: }

 46: #include <../src/vec/vec/impls/seq/ftn-kernels/fnorm.h>
 49: PetscErrorCode VecNorm_MPIPThread(Vec xin,NormType type,PetscReal *z)
 50: {
 51:   PetscReal      sum,work = 0.0;

 55:   if (type == NORM_2 || type == NORM_FROBENIUS) {
 56:     VecNorm_SeqPThread(xin,type,&work);
 57:     work *= work;
 58:     MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,((PetscObject)xin)->comm);
 59:     *z = PetscSqrtReal(sum);
 60:   } else if (type == NORM_1) {
 61:     /* Find the local part */
 62:     VecNorm_SeqPThread(xin,NORM_1,&work);
 63:     /* Find the global max */
 64:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,((PetscObject)xin)->comm);
 65:   } else if (type == NORM_INFINITY) {
 66:     /* Find the local max */
 67:     VecNorm_SeqPThread(xin,NORM_INFINITY,&work);
 68:     /* Find the global max */
 69:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
 70:   } else if (type == NORM_1_AND_2) {
 71:     PetscReal temp[2];
 72:     VecNorm_SeqPThread(xin,NORM_1,temp);
 73:     VecNorm_SeqPThread(xin,NORM_2,temp+1);
 74:     temp[1] = temp[1]*temp[1];
 75:     MPI_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,((PetscObject)xin)->comm);
 76:     z[1] = PetscSqrtReal(z[1]);
 77:   }
 78:   return(0);
 79: }

 81: extern MPI_Op VecMax_Local_Op;
 82: extern MPI_Op VecMin_Local_Op;

 86: PetscErrorCode VecMax_MPIPThread(Vec xin,PetscInt *idx,PetscReal *z)
 87: {
 89:   PetscReal      work;

 92:   /* Find the local max */
 93:   VecMax_SeqPThread(xin,idx,&work);

 95:   /* Find the global max */
 96:   if (!idx) {
 97:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
 98:   } else {
 99:     PetscReal work2[2],z2[2];
100:     PetscInt  rstart;
101:     rstart = xin->map->rstart;
102:     work2[0] = work;
103:     work2[1] = *idx + rstart;
104:     MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,((PetscObject)xin)->comm);
105:     *z   = z2[0];
106:     *idx = (PetscInt)z2[1];
107:   }
108:   return(0);
109: }

113: PetscErrorCode VecMin_MPIPThread(Vec xin,PetscInt *idx,PetscReal *z)
114: {
116:   PetscReal      work;

119:   /* Find the local Min */
120:   VecMin_SeqPThread(xin,idx,&work);

122:   /* Find the global Min */
123:   if (!idx) {
124:     MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_MIN,((PetscObject)xin)->comm);
125:   } else {
126:     PetscReal work2[2],z2[2];
127:     PetscInt  rstart;

129:     VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
130:     work2[0] = work;
131:     work2[1] = *idx + rstart;
132:     MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,((PetscObject)xin)->comm);
133:     *z   = z2[0];
134:     *idx = (PetscInt)z2[1];
135:   }
136:   return(0);
137: }

139: PetscErrorCode VecCreate_MPIPThread_Private(Vec,PetscBool,PetscInt,const PetscScalar []);

143: PetscErrorCode VecDuplicate_MPIPThread(Vec win,Vec *v)
144: {
146:   Vec_MPI        *vw,*w = (Vec_MPI *)win->data;
147:   PetscScalar    *array;

150:   VecCreate(((PetscObject)win)->comm,v);
151:   PetscLayoutReference(win->map,&(*v)->map);

153:   VecCreate_MPIPThread_Private(*v,PETSC_TRUE,w->nghost,0);
154:   vw   = (Vec_MPI *)(*v)->data;
155:   PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));

157:   /* save local representation of the parallel vector (and scatter) if it exists */
158:   if (w->localrep) {
159:     VecGetArray(*v,&array);
160:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
161:     PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
162:     VecRestoreArray(*v,&array);
163:     PetscLogObjectParent(*v,vw->localrep);
164:     vw->localupdate = w->localupdate;
165:     if (vw->localupdate) {
166:       PetscObjectReference((PetscObject)vw->localupdate);
167:     }
168:   }

170:   /* New vector should inherit stashing property of parent */
171:   (*v)->stash.donotstash = win->stash.donotstash;
172:   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
173: 
174:   PetscOListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
175:   PetscFListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
176:   (*v)->map->bs    = win->map->bs;
177:   (*v)->bstash.bs = win->bstash.bs;

179:   return(0);
180: }

184: PetscErrorCode VecDestroy_MPIPThread(Vec v)
185: {
186:   Vec_MPI *x = (Vec_MPI*)v->data;

190: #if defined(PETSC_USE_LOG)
191:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->N);
192: #endif
193:   if (!x) return(0);
194:   PetscFree(x->array_allocated);

196:   /* Destroy local representation of vector if it exists */
197:   if (x->localrep) {
198:     VecDestroy(&x->localrep);
199:     VecScatterDestroy(&x->localupdate);
200:   }

202:   if(!v->map->refcnt) {
203:     PetscThreadsLayoutDestroy(&v->map->tmap);
204:   }

206:   vecs_created--;
207:   /* Free the kernel data structure on the destruction of the last vector */
208:   if(vecs_created == 0) {
209:     PetscFree(vec_kerneldatap);
210:     PetscFree(vec_pdata);
211:   }

213:   /* Destroy the stashes: note the order - so that the tags are freed properly */
214:   VecStashDestroy_Private(&v->bstash);
215:   VecStashDestroy_Private(&v->stash);
216:   PetscFree(v->data);
217:   return(0);
218: }

220: extern PetscErrorCode VecSetOption_MPI(Vec,VecOption,PetscBool);
221: extern PetscErrorCode VecResetArray_MPI(Vec);

223: static struct _VecOps DvOps = { VecDuplicate_MPIPThread, /* 1 */
224:             VecDuplicateVecs_Default,
225:             VecDestroyVecs_Default,
226:             VecDot_MPIPThread,
227:             VecMDot_MPIPThread,
228:             VecNorm_MPIPThread,
229:             VecTDot_MPI,
230:             VecMTDot_MPI,
231:             VecScale_SeqPThread,
232:             VecCopy_SeqPThread, /* 10 */
233:             VecSet_SeqPThread,
234:             VecSwap_SeqPThread,
235:             VecAXPY_SeqPThread,
236:             VecAXPBY_Seq,
237:             VecMAXPY_SeqPThread,
238:             VecAYPX_SeqPThread,
239:             VecWAXPY_SeqPThread,
240:             VecAXPBYPCZ_Seq,
241:             VecPointwiseMult_SeqPThread,
242:             VecPointwiseDivide_SeqPThread,
243:             VecSetValues_MPI, /* 20 */
244:             VecAssemblyBegin_MPI,
245:             VecAssemblyEnd_MPI,
246:             0,
247:             VecGetSize_MPI,
248:             VecGetSize_Seq,
249:             0,
250:             VecMax_MPIPThread,
251:             VecMin_MPIPThread,
252:             VecSetRandom_SeqPThread,
253:             VecSetOption_MPI,
254:             VecSetValuesBlocked_MPI,
255:             VecDestroy_MPIPThread,
256:             VecView_MPI,
257:             VecPlaceArray_MPI,
258:             VecReplaceArray_Seq,
259:             VecDot_SeqPThread,
260:             VecTDot_Seq,
261:             VecNorm_SeqPThread,
262:             VecMDot_SeqPThread,
263:             VecMTDot_Seq,
264:             VecLoad_Default,
265:             VecReciprocal_Default,
266:             VecConjugate_Seq,
267:             0,
268:             0,
269:             VecResetArray_MPI,
270:             0,
271:             VecMaxPointwiseDivide_Seq,
272:             VecPointwiseMax_Seq,
273:             VecPointwiseMaxAbs_Seq,
274:             VecPointwiseMin_Seq,
275:               VecGetValues_MPI,
276:                 0,
277:                 0,
278:                 0,
279:                 0,
280:                 0,
281:                 0,
282:                VecStrideGather_Default,
283:                VecStrideScatter_Default
284: };

288: PetscErrorCode VecCreate_MPIPThread_Private(Vec v,PetscBool  alloc,PetscInt nghost,const PetscScalar array[])
289: {
290:   Vec_MPI         *s;
292:   PetscThreadsLayout tmap=v->map->tmap;


296:   PetscNewLog(v,Vec_MPI,&s);
297:   v->data        = (void*)s;
298:   PetscMemcpy(v->ops,&DvOps,sizeof(DvOps));
299:   s->nghost      = nghost;
300:   v->petscnative = PETSC_TRUE;

302:   PetscLayoutSetUp(v->map);

304:   if(!v->map->tmap) {
305:     PetscThreadsLayoutCreate(&v->map->tmap);
306:     tmap = v->map->tmap;
307:   }
308:   tmap->N = v->map->n;

310:  /* Set the number of threads */
311:   if(tmap->nthreads == PETSC_DECIDE) {
312:     VecSetNThreads(v,PETSC_DECIDE);
313:   }
314:   /* Set thread affinities */
315:   if(!tmap->affinity) {
316:     VecSetThreadAffinities(v,PETSC_NULL);
317:   }

319:   PetscThreadsLayoutSetUp(tmap);

321:   s->array           = (PetscScalar *)array;
322:   s->array_allocated = 0;

324:   if (alloc && !array) {
325:     PetscInt n         = v->map->n+nghost;
326:     PetscMalloc(n*sizeof(PetscScalar),&s->array);
327:     PetscLogObjectMemory(v,n*sizeof(PetscScalar));
328:   }

330:   if(!vecs_created) {
331:     PetscMalloc((PetscMaxThreads+PetscMainThreadShareWork)*sizeof(Vec_KernelData),&vec_kerneldatap);
332:     PetscMalloc((PetscMaxThreads+PetscMainThreadShareWork)*sizeof(Vec_KernelData*),&vec_pdata);
333:   }
334:   vecs_created++;

336:   VecSet_SeqPThread(v,0.0);
337:   s->array_allocated = (PetscScalar*)s->array;

339:   /* By default parallel vectors do not have local representation */
340:   s->localrep    = 0;
341:   s->localupdate = 0;

343:   v->stash.insertmode  = NOT_SET_VALUES;
344:   /* create the stashes. The block-size for bstash is set later when 
345:      VecSetValuesBlocked is called.
346:   */
347:   VecStashCreate_Private(((PetscObject)v)->comm,1,&v->stash);
348:   VecStashCreate_Private(((PetscObject)v)->comm,v->map->bs,&v->bstash);
349: 
350: #if defined(PETSC_HAVE_MATLAB_ENGINE)
351:   PetscObjectComposeFunctionDynamic((PetscObject)v,"PetscMatlabEnginePut_C","VecMatlabEnginePut_Default",VecMatlabEnginePut_Default);
352:   PetscObjectComposeFunctionDynamic((PetscObject)v,"PetscMatlabEngineGet_C","VecMatlabEngineGet_Default",VecMatlabEngineGet_Default);
353: #endif
354:   PetscObjectChangeTypeName((PetscObject)v,VECMPIPTHREAD);
355:   return(0);
356: }

358: /*MC
359:    VECMPIPTHREAD - VECMPIPTHREAD = "mpipthread" - The basic parallel vector using posix threads

361:    Options Database Keys:
362: . -vec_type mpipthread - sets the vector type to VECMPIPTHREAD during a call to VecSetFromOptions()

364:   Level: beginner

366: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VECSEQPTHREAD, VECMPI,
367: M*/

369: EXTERN_C_BEGIN
372: PetscErrorCode  VecCreate_MPIPThread(Vec vv)
373: {

377:   PetscThreadsInitialize(PetscMaxThreads);
378:   VecCreate_MPIPThread_Private(vv,PETSC_TRUE,0,0);
379:   return(0);
380: }
381: EXTERN_C_END

383: /*MC
384:    VECPTHREAD = "pthread" - A VECSEQPTHREAD on one process and VECMPIPTHREAD on more than one process

386:    Options Database Keys:
387: . -vec_type pthread - sets a vector type to standard on calls to VecSetFromOptions()

389:   Level: intermediate

391: .seealso: VecCreateSeqPThread(), VecCreateMPI()
392: M*/

394: EXTERN_C_BEGIN
397: PetscErrorCode VecCreate_PThread(Vec v)
398: {
400:   PetscMPIInt    size;

403:   MPI_Comm_size(((PetscObject)v)->comm,&size);
404:   if (size == 1) {
405:     VecCreate_SeqPThread(v);
406:   } else {
407:     VecCreate_MPIPThread(v);
408:   }
409:   return(0);
410: }
411: EXTERN_C_END

415: /*@
416:    VecCreateMPIPThread - Creates a parallel vector using posix threads.

418:    Collective on MPI_Comm
419:  
420:    Input Parameters:
421: +  comm - the MPI communicator to use 
422: .  n - local vector length (or PETSC_DECIDE to have calculated if N is given)
423: .  N - global vector length (or PETSC_DETERMINE to have calculated if n is given)
424: .  nthreads - Number of local threads (or PETSC_DECIDE to have nthreads calculated)
425: -  affinities - Local thread affinities (or PETSC_NULL for PETSc to set affinities)

427:    Output Parameter:
428: .  vv - the vector

430:    Notes:
431:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
432:    same type as an existing vector.

434:    Level: intermediate

436:    Concepts: vectors^creating parallel

438: .seealso: VecCreateSeqPThread(), VecCreate(), VecDuplicate(), VecDuplicateVecs()

440: @*/
441: PetscErrorCode VecCreateMPIPThread(MPI_Comm comm,PetscInt n,PetscInt N,PetscInt nthreads,PetscInt affinities[],Vec *v)
442: {

446:   VecCreate(comm,v);
447:   VecSetSizes(*v,n,N);
448:   VecSetNThreads(*v,nthreads);
449:   VecSetThreadAffinities(*v,affinities);
450:   VecSetType(*v,VECMPIPTHREAD);
451:   return(0);
452: }