Actual source code: mpivecpthread.c
petsc-3.3-p5 2012-12-01
3: #include <petsc-private/vecimpl.h>
4: #include <../src/sys/objects/pthread/pthreadimpl.h>
5: #include <../src/vec/vec/impls/seq/seqpthread/vecpthreadimpl.h>
6: #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
7: #include <petscblaslapack.h>
9: extern PetscInt vecs_created;
10: extern Vec_KernelData *vec_kerneldatap;
11: extern Vec_KernelData **vec_pdata;
15: PetscErrorCode VecDot_MPIPThread(Vec xin,Vec yin,PetscScalar *z)
16: {
17: PetscScalar sum,work;
21: VecDot_SeqPThread(xin,yin,&work);
22: MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)xin)->comm);
23: *z = sum;
24: return(0);
25: }
29: PetscErrorCode VecMDot_MPIPThread(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
30: {
31: PetscScalar awork[128],*work = awork;
35: if (nv > 128) {
36: PetscMalloc(nv*sizeof(PetscScalar),&work);
37: }
38: VecMDot_SeqPThread(xin,nv,y,work);
39: MPI_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,((PetscObject)xin)->comm);
40: if (nv > 128) {
41: PetscFree(work);
42: }
43: return(0);
44: }
46: #include <../src/vec/vec/impls/seq/ftn-kernels/fnorm.h>
49: PetscErrorCode VecNorm_MPIPThread(Vec xin,NormType type,PetscReal *z)
50: {
51: PetscReal sum,work = 0.0;
55: if (type == NORM_2 || type == NORM_FROBENIUS) {
56: VecNorm_SeqPThread(xin,type,&work);
57: work *= work;
58: MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,((PetscObject)xin)->comm);
59: *z = PetscSqrtReal(sum);
60: } else if (type == NORM_1) {
61: /* Find the local part */
62: VecNorm_SeqPThread(xin,NORM_1,&work);
63: /* Find the global max */
64: MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,((PetscObject)xin)->comm);
65: } else if (type == NORM_INFINITY) {
66: /* Find the local max */
67: VecNorm_SeqPThread(xin,NORM_INFINITY,&work);
68: /* Find the global max */
69: MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
70: } else if (type == NORM_1_AND_2) {
71: PetscReal temp[2];
72: VecNorm_SeqPThread(xin,NORM_1,temp);
73: VecNorm_SeqPThread(xin,NORM_2,temp+1);
74: temp[1] = temp[1]*temp[1];
75: MPI_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,((PetscObject)xin)->comm);
76: z[1] = PetscSqrtReal(z[1]);
77: }
78: return(0);
79: }
81: extern MPI_Op VecMax_Local_Op;
82: extern MPI_Op VecMin_Local_Op;
86: PetscErrorCode VecMax_MPIPThread(Vec xin,PetscInt *idx,PetscReal *z)
87: {
89: PetscReal work;
92: /* Find the local max */
93: VecMax_SeqPThread(xin,idx,&work);
95: /* Find the global max */
96: if (!idx) {
97: MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
98: } else {
99: PetscReal work2[2],z2[2];
100: PetscInt rstart;
101: rstart = xin->map->rstart;
102: work2[0] = work;
103: work2[1] = *idx + rstart;
104: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,((PetscObject)xin)->comm);
105: *z = z2[0];
106: *idx = (PetscInt)z2[1];
107: }
108: return(0);
109: }
113: PetscErrorCode VecMin_MPIPThread(Vec xin,PetscInt *idx,PetscReal *z)
114: {
116: PetscReal work;
119: /* Find the local Min */
120: VecMin_SeqPThread(xin,idx,&work);
122: /* Find the global Min */
123: if (!idx) {
124: MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_MIN,((PetscObject)xin)->comm);
125: } else {
126: PetscReal work2[2],z2[2];
127: PetscInt rstart;
129: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
130: work2[0] = work;
131: work2[1] = *idx + rstart;
132: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,((PetscObject)xin)->comm);
133: *z = z2[0];
134: *idx = (PetscInt)z2[1];
135: }
136: return(0);
137: }
139: PetscErrorCode VecCreate_MPIPThread_Private(Vec,PetscBool,PetscInt,const PetscScalar []);
143: PetscErrorCode VecDuplicate_MPIPThread(Vec win,Vec *v)
144: {
146: Vec_MPI *vw,*w = (Vec_MPI *)win->data;
147: PetscScalar *array;
150: VecCreate(((PetscObject)win)->comm,v);
151: PetscLayoutReference(win->map,&(*v)->map);
153: VecCreate_MPIPThread_Private(*v,PETSC_TRUE,w->nghost,0);
154: vw = (Vec_MPI *)(*v)->data;
155: PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));
157: /* save local representation of the parallel vector (and scatter) if it exists */
158: if (w->localrep) {
159: VecGetArray(*v,&array);
160: VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
161: PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
162: VecRestoreArray(*v,&array);
163: PetscLogObjectParent(*v,vw->localrep);
164: vw->localupdate = w->localupdate;
165: if (vw->localupdate) {
166: PetscObjectReference((PetscObject)vw->localupdate);
167: }
168: }
170: /* New vector should inherit stashing property of parent */
171: (*v)->stash.donotstash = win->stash.donotstash;
172: (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
173:
174: PetscOListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
175: PetscFListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
176: (*v)->map->bs = win->map->bs;
177: (*v)->bstash.bs = win->bstash.bs;
179: return(0);
180: }
184: PetscErrorCode VecDestroy_MPIPThread(Vec v)
185: {
186: Vec_MPI *x = (Vec_MPI*)v->data;
190: #if defined(PETSC_USE_LOG)
191: PetscLogObjectState((PetscObject)v,"Length=%D",v->map->N);
192: #endif
193: if (!x) return(0);
194: PetscFree(x->array_allocated);
196: /* Destroy local representation of vector if it exists */
197: if (x->localrep) {
198: VecDestroy(&x->localrep);
199: VecScatterDestroy(&x->localupdate);
200: }
202: if(!v->map->refcnt) {
203: PetscThreadsLayoutDestroy(&v->map->tmap);
204: }
206: vecs_created--;
207: /* Free the kernel data structure on the destruction of the last vector */
208: if(vecs_created == 0) {
209: PetscFree(vec_kerneldatap);
210: PetscFree(vec_pdata);
211: }
213: /* Destroy the stashes: note the order - so that the tags are freed properly */
214: VecStashDestroy_Private(&v->bstash);
215: VecStashDestroy_Private(&v->stash);
216: PetscFree(v->data);
217: return(0);
218: }
220: extern PetscErrorCode VecSetOption_MPI(Vec,VecOption,PetscBool);
221: extern PetscErrorCode VecResetArray_MPI(Vec);
223: static struct _VecOps DvOps = { VecDuplicate_MPIPThread, /* 1 */
224: VecDuplicateVecs_Default,
225: VecDestroyVecs_Default,
226: VecDot_MPIPThread,
227: VecMDot_MPIPThread,
228: VecNorm_MPIPThread,
229: VecTDot_MPI,
230: VecMTDot_MPI,
231: VecScale_SeqPThread,
232: VecCopy_SeqPThread, /* 10 */
233: VecSet_SeqPThread,
234: VecSwap_SeqPThread,
235: VecAXPY_SeqPThread,
236: VecAXPBY_Seq,
237: VecMAXPY_SeqPThread,
238: VecAYPX_SeqPThread,
239: VecWAXPY_SeqPThread,
240: VecAXPBYPCZ_Seq,
241: VecPointwiseMult_SeqPThread,
242: VecPointwiseDivide_SeqPThread,
243: VecSetValues_MPI, /* 20 */
244: VecAssemblyBegin_MPI,
245: VecAssemblyEnd_MPI,
246: 0,
247: VecGetSize_MPI,
248: VecGetSize_Seq,
249: 0,
250: VecMax_MPIPThread,
251: VecMin_MPIPThread,
252: VecSetRandom_SeqPThread,
253: VecSetOption_MPI,
254: VecSetValuesBlocked_MPI,
255: VecDestroy_MPIPThread,
256: VecView_MPI,
257: VecPlaceArray_MPI,
258: VecReplaceArray_Seq,
259: VecDot_SeqPThread,
260: VecTDot_Seq,
261: VecNorm_SeqPThread,
262: VecMDot_SeqPThread,
263: VecMTDot_Seq,
264: VecLoad_Default,
265: VecReciprocal_Default,
266: VecConjugate_Seq,
267: 0,
268: 0,
269: VecResetArray_MPI,
270: 0,
271: VecMaxPointwiseDivide_Seq,
272: VecPointwiseMax_Seq,
273: VecPointwiseMaxAbs_Seq,
274: VecPointwiseMin_Seq,
275: VecGetValues_MPI,
276: 0,
277: 0,
278: 0,
279: 0,
280: 0,
281: 0,
282: VecStrideGather_Default,
283: VecStrideScatter_Default
284: };
288: PetscErrorCode VecCreate_MPIPThread_Private(Vec v,PetscBool alloc,PetscInt nghost,const PetscScalar array[])
289: {
290: Vec_MPI *s;
292: PetscThreadsLayout tmap=v->map->tmap;
296: PetscNewLog(v,Vec_MPI,&s);
297: v->data = (void*)s;
298: PetscMemcpy(v->ops,&DvOps,sizeof(DvOps));
299: s->nghost = nghost;
300: v->petscnative = PETSC_TRUE;
302: PetscLayoutSetUp(v->map);
304: if(!v->map->tmap) {
305: PetscThreadsLayoutCreate(&v->map->tmap);
306: tmap = v->map->tmap;
307: }
308: tmap->N = v->map->n;
310: /* Set the number of threads */
311: if(tmap->nthreads == PETSC_DECIDE) {
312: VecSetNThreads(v,PETSC_DECIDE);
313: }
314: /* Set thread affinities */
315: if(!tmap->affinity) {
316: VecSetThreadAffinities(v,PETSC_NULL);
317: }
319: PetscThreadsLayoutSetUp(tmap);
321: s->array = (PetscScalar *)array;
322: s->array_allocated = 0;
324: if (alloc && !array) {
325: PetscInt n = v->map->n+nghost;
326: PetscMalloc(n*sizeof(PetscScalar),&s->array);
327: PetscLogObjectMemory(v,n*sizeof(PetscScalar));
328: }
330: if(!vecs_created) {
331: PetscMalloc((PetscMaxThreads+PetscMainThreadShareWork)*sizeof(Vec_KernelData),&vec_kerneldatap);
332: PetscMalloc((PetscMaxThreads+PetscMainThreadShareWork)*sizeof(Vec_KernelData*),&vec_pdata);
333: }
334: vecs_created++;
336: VecSet_SeqPThread(v,0.0);
337: s->array_allocated = (PetscScalar*)s->array;
339: /* By default parallel vectors do not have local representation */
340: s->localrep = 0;
341: s->localupdate = 0;
343: v->stash.insertmode = NOT_SET_VALUES;
344: /* create the stashes. The block-size for bstash is set later when
345: VecSetValuesBlocked is called.
346: */
347: VecStashCreate_Private(((PetscObject)v)->comm,1,&v->stash);
348: VecStashCreate_Private(((PetscObject)v)->comm,v->map->bs,&v->bstash);
349:
350: #if defined(PETSC_HAVE_MATLAB_ENGINE)
351: PetscObjectComposeFunctionDynamic((PetscObject)v,"PetscMatlabEnginePut_C","VecMatlabEnginePut_Default",VecMatlabEnginePut_Default);
352: PetscObjectComposeFunctionDynamic((PetscObject)v,"PetscMatlabEngineGet_C","VecMatlabEngineGet_Default",VecMatlabEngineGet_Default);
353: #endif
354: PetscObjectChangeTypeName((PetscObject)v,VECMPIPTHREAD);
355: return(0);
356: }
358: /*MC
359: VECMPIPTHREAD - VECMPIPTHREAD = "mpipthread" - The basic parallel vector using posix threads
361: Options Database Keys:
362: . -vec_type mpipthread - sets the vector type to VECMPIPTHREAD during a call to VecSetFromOptions()
364: Level: beginner
366: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VECSEQPTHREAD, VECMPI,
367: M*/
369: EXTERN_C_BEGIN
372: PetscErrorCode VecCreate_MPIPThread(Vec vv)
373: {
377: PetscThreadsInitialize(PetscMaxThreads);
378: VecCreate_MPIPThread_Private(vv,PETSC_TRUE,0,0);
379: return(0);
380: }
381: EXTERN_C_END
383: /*MC
384: VECPTHREAD = "pthread" - A VECSEQPTHREAD on one process and VECMPIPTHREAD on more than one process
386: Options Database Keys:
387: . -vec_type pthread - sets a vector type to standard on calls to VecSetFromOptions()
389: Level: intermediate
391: .seealso: VecCreateSeqPThread(), VecCreateMPI()
392: M*/
394: EXTERN_C_BEGIN
397: PetscErrorCode VecCreate_PThread(Vec v)
398: {
400: PetscMPIInt size;
403: MPI_Comm_size(((PetscObject)v)->comm,&size);
404: if (size == 1) {
405: VecCreate_SeqPThread(v);
406: } else {
407: VecCreate_MPIPThread(v);
408: }
409: return(0);
410: }
411: EXTERN_C_END
415: /*@
416: VecCreateMPIPThread - Creates a parallel vector using posix threads.
418: Collective on MPI_Comm
419:
420: Input Parameters:
421: + comm - the MPI communicator to use
422: . n - local vector length (or PETSC_DECIDE to have calculated if N is given)
423: . N - global vector length (or PETSC_DETERMINE to have calculated if n is given)
424: . nthreads - Number of local threads (or PETSC_DECIDE to have nthreads calculated)
425: - affinities - Local thread affinities (or PETSC_NULL for PETSc to set affinities)
427: Output Parameter:
428: . vv - the vector
430: Notes:
431: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
432: same type as an existing vector.
434: Level: intermediate
436: Concepts: vectors^creating parallel
438: .seealso: VecCreateSeqPThread(), VecCreate(), VecDuplicate(), VecDuplicateVecs()
440: @*/
441: PetscErrorCode VecCreateMPIPThread(MPI_Comm comm,PetscInt n,PetscInt N,PetscInt nthreads,PetscInt affinities[],Vec *v)
442: {
446: VecCreate(comm,v);
447: VecSetSizes(*v,n,N);
448: VecSetNThreads(*v,nthreads);
449: VecSetThreadAffinities(*v,affinities);
450: VecSetType(*v,VECMPIPTHREAD);
451: return(0);
452: }