Actual source code: comb.c
petsc-3.3-p7 2013-05-11
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc-private/vecimpl.h> /*I "petscvec.h" I*/
24: #if defined(PETSC_HAVE_PAMI)
25: PetscErrorCode MPIPetsc_Iallreduce_PAMI(void*,void*,PetscMPIInt,MPI_Datatype,MPI_Op,MPI_Comm,MPI_Request*);
26: #endif
27: #if defined(PETSC_HAVE_DCMF)
28: PetscErrorCode MPIPetsc_Iallreduce_DCMF(void*,void*,PetscMPIInt,MPI_Datatype,MPI_Op,MPI_Comm,MPI_Request*);
29: #endif
31: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
32: {
35: #if defined(PETSC_HAVE_MPIX_IALLREDUCE)
36: MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
37: #elif defined(PETSC_HAVE_PAMI)
38: MPIPetsc_Iallreduce_PAMI(sendbuf,recvbuf,count,datatype,op,comm,request);
39: #elif defined(PETSC_HAVE_DCMF)
40: MPIPetsc_Iallreduce_DCMF(sendbuf,recvbuf,count,datatype,op,comm,request);
41: #else
42: MPI_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
43: *request = MPI_REQUEST_NULL;
44: #endif
45: return(0);
46: }
48: typedef enum {STATE_BEGIN, STATE_PENDING, STATE_END} SRState;
50: #define REDUCE_SUM 0
51: #define REDUCE_MAX 1
52: #define REDUCE_MIN 2
54: typedef struct {
55: MPI_Comm comm;
56: MPI_Request request;
57: PetscBool async;
58: PetscScalar *lvalues; /* this are the reduced values before call to MPI_Allreduce() */
59: PetscScalar *gvalues; /* values after call to MPI_Allreduce() */
60: void **invecs; /* for debugging only, vector/memory used with each op */
61: PetscInt *reducetype; /* is particular value to be summed or maxed? */
62: SRState state; /* are we calling xxxBegin() or xxxEnd()? */
63: PetscInt maxops; /* total amount of space we have for requests */
64: PetscInt numopsbegin; /* number of requests that have been queued in */
65: PetscInt numopsend; /* number of requests that have been gotten by user */
66: } PetscSplitReduction;
67: /*
68: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
69: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
70: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
71: some of each.
72: */
74: static PetscErrorCode PetscSplitReductionGet(MPI_Comm,PetscSplitReduction**);
75: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);
79: /*
80: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
81: */
82: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
83: {
87: PetscNew(PetscSplitReduction,sr);
88: (*sr)->numopsbegin = 0;
89: (*sr)->numopsend = 0;
90: (*sr)->state = STATE_BEGIN;
91: (*sr)->maxops = 32;
92: PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
93: PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
94: PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
95: (*sr)->comm = comm;
96: (*sr)->request = MPI_REQUEST_NULL;
97: PetscMalloc(32*sizeof(PetscInt),&(*sr)->reducetype);
98: (*sr)->async = PETSC_FALSE;
99: #if defined(PETSC_HAVE_MPIX_IALLREDUCE) || defined(PETSC_HAVE_PAMI) || defined(PETSC_HAVE_DCMF)
100: (*sr)->async = PETSC_TRUE; /* Enable by default */
101: PetscOptionsGetBool(PETSC_NULL,"-splitreduction_async",&(*sr)->async,PETSC_NULL);
102: #endif
103: return(0);
104: }
106: /*
107: This function is the MPI reduction operation used when there is
108: a combination of sums and max in the reduction. The call below to
109: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
110: MPI operator PetscSplitReduction_Op.
111: */
112: MPI_Op PetscSplitReduction_Op = 0;
117: {
118: PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
119: PetscInt i,count = (PetscInt)*cnt;
122: if (*datatype != MPIU_REAL) {
123: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
124: MPI_Abort(MPI_COMM_SELF,1);
125: }
126: #if defined(PETSC_USE_COMPLEX)
127: count = count/2;
128: #endif
129: count = count/2;
130: for (i=0; i<count; i++) {
131: if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
132: xout[i] += xin[i];
133: } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
134: xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
135: } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
136: xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
137: } else {
138: (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
139: MPI_Abort(MPI_COMM_SELF,1);
140: }
141: }
142: PetscFunctionReturnVoid();
143: }
147: /*@
148: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
150: Collective but not synchronizing
152: Input Arguments:
153: comm - communicator on which split reduction has been queued
155: Level: advanced
157: Note:
158: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
159: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
161: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
162: @*/
163: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
164: {
166: PetscSplitReduction *sr;
169: PetscSplitReductionGet(comm,&sr);
170: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
171: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
172: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
173: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
174: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
175: MPI_Comm comm = sr->comm;
176: PetscMPIInt size;
177: PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
178: MPI_Comm_size(sr->comm,&size);
179: if (size == 1) {
180: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
181: } else {
182: /* determine if all reductions are sum, max, or min */
183: for (i=0; i<numops; i++) {
184: if (reducetype[i] == REDUCE_MAX) {
185: max_flg = 1;
186: } else if (reducetype[i] == REDUCE_SUM) {
187: sum_flg = 1;
188: } else if (reducetype[i] == REDUCE_MIN) {
189: min_flg = 1;
190: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
191: }
192: if (sum_flg + max_flg + min_flg > 1) {
193: /*
194: after all the entires in lvalues we store the reducetype flags to indicate
195: to the reduction operations what are sums and what are max
196: */
197: for (i=0; i<numops; i++) {
198: lvalues[numops+i] = reducetype[i];
199: }
200: #if defined(PETSC_USE_COMPLEX)
201: MPIPetsc_Iallreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm,&sr->request);
202: #else
203: MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm,&sr->request);
204: #endif
205: } else if (max_flg) {
206: #if defined(PETSC_USE_COMPLEX)
207: /*
208: complex case we max both the real and imaginary parts, the imaginary part
209: is just ignored later
210: */
211: MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
212: #else
213: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
214: #endif
215: } else if (min_flg) {
216: #if defined(PETSC_USE_COMPLEX)
217: /*
218: complex case we min both the real and imaginary parts, the imaginary part
219: is just ignored later
220: */
221: MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
222: #else
223: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
224: #endif
225: } else {
226: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
227: }
228: }
229: sr->state = STATE_PENDING;
230: sr->numopsend = 0;
231: PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
232: } else {
233: PetscSplitReductionApply(sr);
234: }
235: return(0);
236: }
240: static PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
241: {
245: switch (sr->state) {
246: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
247: PetscSplitReductionApply(sr);
248: break;
249: case STATE_PENDING:
250: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
251: PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
252: if (sr->request != MPI_REQUEST_NULL) {
253: MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
254: }
255: sr->state = STATE_END;
256: PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
257: break;
258: default: break; /* everything is already done */
259: }
260: return(0);
261: }
265: /*
266: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
267: */
268: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
269: {
271: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
272: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
273: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
274: MPI_Comm comm = sr->comm;
275: PetscMPIInt size;
278: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
279: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
280: MPI_Comm_size(sr->comm,&size);
281: if (size == 1) {
282: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
283: } else {
284: /* determine if all reductions are sum, max, or min */
285: for (i=0; i<numops; i++) {
286: if (reducetype[i] == REDUCE_MAX) {
287: max_flg = 1;
288: } else if (reducetype[i] == REDUCE_SUM) {
289: sum_flg = 1;
290: } else if (reducetype[i] == REDUCE_MIN) {
291: min_flg = 1;
292: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
293: }
294: if (sum_flg + max_flg + min_flg > 1) {
295: /*
296: after all the entires in lvalues we store the reducetype flags to indicate
297: to the reduction operations what are sums and what are max
298: */
299: for (i=0; i<numops; i++) {
300: lvalues[numops+i] = reducetype[i];
301: }
302: #if defined(PETSC_USE_COMPLEX)
303: MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
304: #else
305: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
306: #endif
307: } else if (max_flg) {
308: #if defined(PETSC_USE_COMPLEX)
309: /*
310: complex case we max both the real and imaginary parts, the imaginary part
311: is just ignored later
312: */
313: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPIU_MAX,comm);
314: #else
315: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPIU_MAX,comm);
316: #endif
317: } else if (min_flg) {
318: #if defined(PETSC_USE_COMPLEX)
319: /*
320: complex case we min both the real and imaginary parts, the imaginary part
321: is just ignored later
322: */
323: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPIU_MIN,comm);
324: #else
325: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPIU_MIN,comm);
326: #endif
327: } else {
328: MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
329: }
330: }
331: sr->state = STATE_END;
332: sr->numopsend = 0;
333: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
334: return(0);
335: }
339: /*
340: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
341: */
342: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
343: {
345: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
346: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
347: void *invecs = sr->invecs;
350: sr->maxops = 2*maxops;
351: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
352: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
353: PetscMalloc(2*maxops*sizeof(PetscInt),&sr->reducetype);
354: PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
355: PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
356: PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
357: PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
358: PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
359: PetscFree(lvalues);
360: PetscFree(gvalues);
361: PetscFree(reducetype);
362: PetscFree(invecs);
363: return(0);
364: }
368: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
369: {
373: PetscFree(sr->lvalues);
374: PetscFree(sr->gvalues);
375: PetscFree(sr->reducetype);
376: PetscFree(sr->invecs);
377: PetscFree(sr);
378: return(0);
379: }
381: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
385: /*
386: Private routine to delete internal storage when a communicator is freed.
387: This is called by MPI, not by users.
389: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
390: it was MPI_Comm *comm.
391: */
393: {
397: PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
398: PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
399: return(0);
400: }
402: /*
403: PetscSplitReductionGet - Gets the split reduction object from a
404: PETSc vector, creates if it does not exit.
406: */
409: static PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
410: {
412: PetscMPIInt flag;
415: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
416: /*
417: The calling sequence of the 2nd argument to this function changed
418: between MPI Standard 1.0 and the revisions 1.1 Here we match the
419: new standard, if you are using an MPI implementation that uses
420: the older version you will get a warning message about the next line;
421: it is only a warning message and should do no harm.
422: */
423: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
424: }
425: MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
426: if (!flag) { /* doesn't exist yet so create it and put it in */
427: PetscSplitReductionCreate(comm,sr);
428: MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
429: PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
430: }
431: return(0);
432: }
434: /* ----------------------------------------------------------------------------------------------------*/
438: /*@
439: VecDotBegin - Starts a split phase dot product computation.
441: Input Parameters:
442: + x - the first vector
443: . y - the second vector
444: - result - where the result will go (can be PETSC_NULL)
446: Level: advanced
448: Notes:
449: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
451: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
452: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
453: @*/
454: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
455: {
456: PetscErrorCode ierr;
457: PetscSplitReduction *sr;
458: MPI_Comm comm;
461: PetscObjectGetComm((PetscObject)x,&comm);
462: PetscSplitReductionGet(comm,&sr);
463: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
464: if (sr->numopsbegin >= sr->maxops) {
465: PetscSplitReductionExtend(sr);
466: }
467: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
468: sr->invecs[sr->numopsbegin] = (void*)x;
469: if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
470: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
471: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
472: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
473: return(0);
474: }
478: /*@
479: VecDotEnd - Ends a split phase dot product computation.
481: Input Parameters:
482: + x - the first vector (can be PETSC_NULL)
483: . y - the second vector (can be PETSC_NULL)
484: - result - where the result will go
486: Level: advanced
488: Notes:
489: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
491: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
492: VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()
494: @*/
495: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
496: {
497: PetscErrorCode ierr;
498: PetscSplitReduction *sr;
499: MPI_Comm comm;
502: PetscObjectGetComm((PetscObject)x,&comm);
503: PetscSplitReductionGet(comm,&sr);
504: PetscSplitReductionEnd(sr);
506: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
507: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
508: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
509: *result = sr->gvalues[sr->numopsend++];
511: /*
512: We are finished getting all the results so reset to no outstanding requests
513: */
514: if (sr->numopsend == sr->numopsbegin) {
515: sr->state = STATE_BEGIN;
516: sr->numopsend = 0;
517: sr->numopsbegin = 0;
518: }
519: return(0);
520: }
524: /*@
525: VecTDotBegin - Starts a split phase transpose dot product computation.
527: Input Parameters:
528: + x - the first vector
529: . y - the second vector
530: - result - where the result will go (can be PETSC_NULL)
532: Level: advanced
534: Notes:
535: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
537: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
538: VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
540: @*/
541: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
542: {
543: PetscErrorCode ierr;
544: PetscSplitReduction *sr;
545: MPI_Comm comm;
548: PetscObjectGetComm((PetscObject)x,&comm);
549: PetscSplitReductionGet(comm,&sr);
550: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
551: if (sr->numopsbegin >= sr->maxops) {
552: PetscSplitReductionExtend(sr);
553: }
554: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
555: sr->invecs[sr->numopsbegin] = (void*)x;
556: if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
557: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
558: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
559: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
560: return(0);
561: }
565: /*@
566: VecTDotEnd - Ends a split phase transpose dot product computation.
568: Input Parameters:
569: + x - the first vector (can be PETSC_NULL)
570: . y - the second vector (can be PETSC_NULL)
571: - result - where the result will go
573: Level: advanced
575: Notes:
576: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
578: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
579: VecDotBegin(), VecDotEnd()
580: @*/
581: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
582: {
586: /*
587: TDotEnd() is the same as DotEnd() so reuse the code
588: */
589: VecDotEnd(x,y,result);
590: return(0);
591: }
593: /* -------------------------------------------------------------------------*/
597: /*@
598: VecNormBegin - Starts a split phase norm computation.
600: Input Parameters:
601: + x - the first vector
602: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
603: - result - where the result will go (can be PETSC_NULL)
605: Level: advanced
607: Notes:
608: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
610: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
612: @*/
613: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
614: {
615: PetscErrorCode ierr;
616: PetscSplitReduction *sr;
617: PetscReal lresult[2];
618: MPI_Comm comm;
621: PetscObjectGetComm((PetscObject)x,&comm);
622: PetscSplitReductionGet(comm,&sr);
623: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
624: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
625: PetscSplitReductionExtend(sr);
626: }
627:
628: sr->invecs[sr->numopsbegin] = (void*)x;
629: if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
630: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
631: (*x->ops->norm_local)(x,ntype,lresult);
632: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
633: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
634: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
635: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
636: else sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
637: sr->lvalues[sr->numopsbegin++] = lresult[0];
638: if (ntype == NORM_1_AND_2) {
639: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
640: sr->lvalues[sr->numopsbegin++] = lresult[1];
641: }
642: return(0);
643: }
647: /*@
648: VecNormEnd - Ends a split phase norm computation.
650: Input Parameters:
651: + x - the first vector (can be PETSC_NULL)
652: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
653: - result - where the result will go
655: Level: advanced
657: Notes:
658: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
660: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
662: @*/
663: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
664: {
665: PetscErrorCode ierr;
666: PetscSplitReduction *sr;
667: MPI_Comm comm;
670: PetscObjectGetComm((PetscObject)x,&comm);
671: PetscSplitReductionGet(comm,&sr);
672: PetscSplitReductionEnd(sr);
674: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
675: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
676: if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
677: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
679: if (ntype == NORM_2) {
680: result[0] = PetscSqrtReal(result[0]);
681: } else if (ntype == NORM_1_AND_2) {
682: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
683: result[1] = PetscSqrtReal(result[1]);
684: }
685: if (ntype!=NORM_1_AND_2) {
686: PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
687: }
689: if (sr->numopsend == sr->numopsbegin) {
690: sr->state = STATE_BEGIN;
691: sr->numopsend = 0;
692: sr->numopsbegin = 0;
693: }
694: return(0);
695: }
697: /*
698: Possibly add
700: PetscReductionSumBegin/End()
701: PetscReductionMaxBegin/End()
702: PetscReductionMinBegin/End()
703: or have more like MPI with a single function with flag for Op? Like first better
704: */
708: /*@
709: VecMDotBegin - Starts a split phase multiple dot product computation.
711: Input Parameters:
712: + x - the first vector
713: . nv - number of vectors
714: . y - array of vectors
715: - result - where the result will go (can be PETSC_NULL)
717: Level: advanced
719: Notes:
720: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
722: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
723: VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
724: @*/
725: PetscErrorCode VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
726: {
727: PetscErrorCode ierr;
728: PetscSplitReduction *sr;
729: MPI_Comm comm;
730: int i;
733: PetscObjectGetComm((PetscObject)x,&comm);
734: PetscSplitReductionGet(comm,&sr);
735: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
736: for (i=0;i<nv;i++) {
737: if (sr->numopsbegin+i >= sr->maxops) {
738: PetscSplitReductionExtend(sr);
739: }
740: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
741: sr->invecs[sr->numopsbegin+i] = (void*)x;
742: }
743: if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
744: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
745: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
746: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
747: sr->numopsbegin += nv;
748: return(0);
749: }
753: /*@
754: VecMDotEnd - Ends a split phase multiple dot product computation.
756: Input Parameters:
757: + x - the first vector (can be PETSC_NULL)
758: . nv - number of vectors
759: - y - array of vectors (can be PETSC_NULL)
761: Output Parameters:
762: . result - where the result will go
764: Level: advanced
766: Notes:
767: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
769: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
770: VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
772: @*/
773: PetscErrorCode VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
774: {
775: PetscErrorCode ierr;
776: PetscSplitReduction *sr;
777: MPI_Comm comm;
778: int i;
781: PetscObjectGetComm((PetscObject)x,&comm);
782: PetscSplitReductionGet(comm,&sr);
783: PetscSplitReductionEnd(sr);
785: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
786: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
787: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
788: for (i=0;i<nv;i++) {
789: result[i] = sr->gvalues[sr->numopsend++];
790: }
791:
792: /*
793: We are finished getting all the results so reset to no outstanding requests
794: */
795: if (sr->numopsend == sr->numopsbegin) {
796: sr->state = STATE_BEGIN;
797: sr->numopsend = 0;
798: sr->numopsbegin = 0;
799: }
800: return(0);
801: }
805: /*@
806: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
808: Input Parameters:
809: + x - the first vector
810: . nv - number of vectors
811: . y - array of vectors
812: - result - where the result will go (can be PETSC_NULL)
814: Level: advanced
816: Notes:
817: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
819: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
820: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
822: @*/
823: PetscErrorCode VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
824: {
825: PetscErrorCode ierr;
826: PetscSplitReduction *sr;
827: MPI_Comm comm;
828: int i;
831: PetscObjectGetComm((PetscObject)x,&comm);
832: PetscSplitReductionGet(comm,&sr);
833: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
834: for (i=0;i<nv;i++) {
835: if (sr->numopsbegin+i >= sr->maxops) {
836: PetscSplitReductionExtend(sr);
837: }
838: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
839: sr->invecs[sr->numopsbegin+i] = (void*)x;
840: }
841: if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
842: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
843: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
844: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
845: sr->numopsbegin += nv;
846: return(0);
847: }
851: /*@
852: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
854: Input Parameters:
855: + x - the first vector (can be PETSC_NULL)
856: . nv - number of vectors
857: - y - array of vectors (can be PETSC_NULL)
859: Output Parameters
860: . result - where the result will go
862: Level: advanced
864: Notes:
865: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
867: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
868: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
869: @*/
870: PetscErrorCode VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
871: {
875: /*
876: MTDotEnd() is the same as MDotEnd() so reuse the code
877: */
878: VecMDotEnd(x,nv,y,result);
879: return(0);
880: }