Actual source code: comb.c
1: /*
2: Split phase global vector reductions with support for combining the
3: communication portion of several operations. Using MPI-1.1 support only
5: The idea for this and much of the initial code is contributed by
6: Victor Eijkhout.
8: Usage:
9: VecDotBegin(Vec,Vec,PetscScalar *);
10: VecNormBegin(Vec,NormType,PetscReal *);
11: ....
12: VecDotEnd(Vec,Vec,PetscScalar *);
13: VecNormEnd(Vec,NormType,PetscReal *);
15: Limitations:
16: - The order of the xxxEnd() functions MUST be in the same order
17: as the xxxBegin(). There is extensive error checking to try to
18: insure that the user calls the routines in the correct order
19: */
21: #include vecimpl.h
23: #define STATE_BEGIN 0
24: #define STATE_END 1
26: #define REDUCE_SUM 0
27: #define REDUCE_MAX 1
28: #define REDUCE_MIN 2
30: typedef struct {
31: MPI_Comm comm;
32: PetscScalar *lvalues; /* this are the reduced values before call to MPI_Allreduce() */
33: PetscScalar *gvalues; /* values after call to MPI_Allreduce() */
34: void **invecs; /* for debugging only, vector/memory used with each op */
35: PetscInt *reducetype; /* is particular value to be summed or maxed? */
36: PetscInt state; /* are we calling xxxBegin() or xxxEnd()? */
37: PetscInt maxops; /* total amount of space we have for requests */
38: PetscInt numopsbegin; /* number of requests that have been queued in */
39: PetscInt numopsend; /* number of requests that have been gotten by user */
40: } PetscSplitReduction;
41: /*
42: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
43: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
44: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
45: some of each.
46: */
50: /*
51: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
52: */
53: PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
54: {
58: PetscNew(PetscSplitReduction,sr);
59: (*sr)->numopsbegin = 0;
60: (*sr)->numopsend = 0;
61: (*sr)->state = STATE_BEGIN;
62: (*sr)->maxops = 32;
63: PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
64: PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
65: PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
66: (*sr)->comm = comm;
67: PetscMalloc(32*sizeof(PetscInt),&(*sr)->reducetype);
68: return(0);
69: }
71: /*
72: This function is the MPI reduction operation used when there is
73: a combination of sums and max in the reduction. The call below to
74: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
75: MPI operator PetscSplitReduction_Op.
76: */
77: MPI_Op PetscSplitReduction_Op = 0;
82: void PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
83: {
84: PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
85: PetscInt i,count = (PetscInt)*cnt;
88: if (*datatype != MPIU_REAL) {
89: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
90: MPI_Abort(MPI_COMM_WORLD,1);
91: }
92: #if defined(PETSC_USE_COMPLEX)
93: count = count/2;
94: #endif
95: count = count/2;
96: for (i=0; i<count; i++) {
97: if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
98: xout[i] += xin[i];
99: } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
100: xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
101: } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
102: xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
103: } else {
104: (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
105: MPI_Abort(MPI_COMM_WORLD,1);
106: }
107: }
108: PetscStackPop; /* since function returns void cannot use PetscFunctionReturn(); */
109: return;
110: }
115: /*
116: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
117: */
118: PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
119: {
121: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
122: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
123: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
124: MPI_Comm comm = sr->comm;
125: PetscMPIInt size;
128: if (sr->numopsend > 0) {
129: SETERRQ(PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
130: }
132: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
133: MPI_Comm_size(sr->comm,&size);
134: if (size == 1) {
135: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
136: } else {
137: /* determine if all reductions are sum, max, or min */
138: for (i=0; i<numops; i++) {
139: if (reducetype[i] == REDUCE_MAX) {
140: max_flg = 1;
141: } else if (reducetype[i] == REDUCE_SUM) {
142: sum_flg = 1;
143: } else if (reducetype[i] == REDUCE_MIN) {
144: min_flg = 1;
145: } else {
146: SETERRQ(PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
147: }
148: }
149: if (sum_flg + max_flg + min_flg > 1) {
150: /*
151: after all the entires in lvalues we store the reducetype flags to indicate
152: to the reduction operations what are sums and what are max
153: */
154: for (i=0; i<numops; i++) {
155: lvalues[numops+i] = reducetype[i];
156: }
157: #if defined(PETSC_USE_COMPLEX)
158: MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
159: #else
160: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
161: #endif
162: } else if (max_flg) {
163: #if defined(PETSC_USE_COMPLEX)
164: /*
165: complex case we max both the real and imaginary parts, the imaginary part
166: is just ignored later
167: */
168: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MAX,comm);
169: #else
170: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MAX,comm);
171: #endif
172: } else if (min_flg) {
173: #if defined(PETSC_USE_COMPLEX)
174: /*
175: complex case we min both the real and imaginary parts, the imaginary part
176: is just ignored later
177: */
178: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MIN,comm);
179: #else
180: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MIN,comm);
181: #endif
182: } else {
183: MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,PetscSum_Op,comm);
184: }
185: }
186: sr->state = STATE_END;
187: sr->numopsend = 0;
188: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
189: return(0);
190: }
195: /*
196: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
197: */
198: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
199: {
201: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
202: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
203: void *invecs = sr->invecs;
206: sr->maxops = 2*maxops;
207: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
208: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
209: PetscMalloc(2*maxops*sizeof(PetscInt),&sr->reducetype);
210: PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
211: PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
212: PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
213: PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
214: PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
215: PetscFree(lvalues);
216: PetscFree(gvalues);
217: PetscFree(reducetype);
218: PetscFree(invecs);
219: return(0);
220: }
224: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
225: {
229: PetscFree(sr->lvalues);
230: PetscFree(sr->gvalues);
231: PetscFree(sr->reducetype);
232: PetscFree(sr->invecs);
233: PetscFree(sr);
234: return(0);
235: }
237: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
242: /*
243: Private routine to delete internal storage when a communicator is freed.
244: This is called by MPI, not by users.
246: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
247: it was MPI_Comm *comm.
248: */
249: int Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
250: {
254: PetscLogInfo(0,"Petsc_DelReduction:Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
255: PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
256: return(0);
257: }
260: /*
261: PetscSplitReductionGet - Gets the split reduction object from a
262: PETSc vector, creates if it does not exit.
264: */
267: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
268: {
270: PetscMPIInt flag;
273: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
274: /*
275: The calling sequence of the 2nd argument to this function changed
276: between MPI Standard 1.0 and the revisions 1.1 Here we match the
277: new standard, if you are using an MPI implementation that uses
278: the older version you will get a warning message about the next line;
279: it is only a warning message and should do no harm.
280: */
281: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
282: }
283: MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
284: if (!flag) { /* doesn't exist yet so create it and put it in */
285: PetscSplitReductionCreate(comm,sr);
286: MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
287: PetscLogInfo(0,"PetscSplitReductionGet:Putting reduction data in an MPI_Comm %ld\n",(long)comm);
288: }
290: return(0);
291: }
293: /* ----------------------------------------------------------------------------------------------------*/
297: /*@
298: VecDotBegin - Starts a split phase dot product computation.
300: Input Parameters:
301: + x - the first vector
302: . y - the second vector
303: - result - where the result will go (can be PETSC_NULL)
305: Level: advanced
307: Notes:
308: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
310: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
311: VecTDotBegin(), VecTDotEnd()
312: @*/
313: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
314: {
316: PetscSplitReduction *sr;
317: MPI_Comm comm;
320: PetscObjectGetComm((PetscObject)x,&comm);
321: PetscSplitReductionGet(comm,&sr);
322: if (sr->state == STATE_END) {
323: SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
324: }
325: if (sr->numopsbegin >= sr->maxops) {
326: PetscSplitReductionExtend(sr);
327: }
328: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
329: sr->invecs[sr->numopsbegin] = (void*)x;
330: if (!x->ops->dot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local dots");
331: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
332: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
333: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
334: return(0);
335: }
339: /*@
340: VecDotEnd - Ends a split phase dot product computation.
342: Input Parameters:
343: + x - the first vector (can be PETSC_NULL)
344: . y - the second vector (can be PETSC_NULL)
345: - result - where the result will go
347: Level: advanced
349: Notes:
350: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
352: seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
353: VecTDotBegin(),VecTDotEnd()
355: @*/
356: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
357: {
359: PetscSplitReduction *sr;
360: MPI_Comm comm;
363: PetscObjectGetComm((PetscObject)x,&comm);
364: PetscSplitReductionGet(comm,&sr);
365:
366: if (sr->state != STATE_END) {
367: /* this is the first call to VecxxxEnd() so do the communication */
368: PetscSplitReductionApply(sr);
369: }
371: if (sr->numopsend >= sr->numopsbegin) {
372: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
373: }
374: if (x && (void*) x != sr->invecs[sr->numopsend]) {
375: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
376: }
377: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) {
378: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
379: }
380: *result = sr->gvalues[sr->numopsend++];
382: /*
383: We are finished getting all the results so reset to no outstanding requests
384: */
385: if (sr->numopsend == sr->numopsbegin) {
386: sr->state = STATE_BEGIN;
387: sr->numopsend = 0;
388: sr->numopsbegin = 0;
389: }
390: return(0);
391: }
395: /*@
396: VecTDotBegin - Starts a split phase transpose dot product computation.
398: Input Parameters:
399: + x - the first vector
400: . y - the second vector
401: - result - where the result will go (can be PETSC_NULL)
403: Level: advanced
405: Notes:
406: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
408: seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
409: VecDotBegin(), VecDotEnd()
411: @*/
412: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
413: {
415: PetscSplitReduction *sr;
416: MPI_Comm comm;
419: PetscObjectGetComm((PetscObject)x,&comm);
420: PetscSplitReductionGet(comm,&sr);
421: if (sr->state == STATE_END) {
422: SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
423: }
424: if (sr->numopsbegin >= sr->maxops) {
425: PetscSplitReductionExtend(sr);
426: }
427: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
428: sr->invecs[sr->numopsbegin] = (void*)x;
429: if (!x->ops->tdot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local dots");
430: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
431: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
432: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
433: return(0);
434: }
438: /*@
439: VecTDotEnd - Ends a split phase transpose dot product computation.
441: Input Parameters:
442: + x - the first vector (can be PETSC_NULL)
443: . y - the second vector (can be PETSC_NULL)
444: - result - where the result will go
446: Level: advanced
448: Notes:
449: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
451: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
452: VecDotBegin(), VecDotEnd()
453: @*/
454: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
455: {
459: /*
460: TDotEnd() is the same as DotEnd() so reuse the code
461: */
462: VecDotEnd(x,y,result);
463: return(0);
464: }
466: /* -------------------------------------------------------------------------*/
470: /*@
471: VecNormBegin - Starts a split phase norm computation.
473: Input Parameters:
474: + x - the first vector
475: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
476: - result - where the result will go (can be PETSC_NULL)
478: Level: advanced
480: Notes:
481: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
483: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()
485: @*/
486: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
487: {
489: PetscSplitReduction *sr;
490: PetscReal lresult[2];
491: MPI_Comm comm;
494: PetscObjectGetComm((PetscObject)x,&comm);
495: PetscSplitReductionGet(comm,&sr);
496: if (sr->state == STATE_END) {
497: SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
498: }
499: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
500: PetscSplitReductionExtend(sr);
501: }
502:
503: sr->invecs[sr->numopsbegin] = (void*)x;
504: if (!x->ops->norm_local) SETERRQ(PETSC_ERR_SUP,"Vector does not support local norms");
505: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
506: (*x->ops->norm_local)(x,ntype,lresult);
507: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
508: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
509: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
510: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
511: else sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
512: sr->lvalues[sr->numopsbegin++] = lresult[0];
513: if (ntype == NORM_1_AND_2) {
514: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
515: sr->lvalues[sr->numopsbegin++] = lresult[1];
516: }
517: return(0);
518: }
522: /*@
523: VecNormEnd - Ends a split phase norm computation.
525: Input Parameters:
526: + x - the first vector (can be PETSC_NULL)
527: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
528: - result - where the result will go
530: Level: advanced
532: Notes:
533: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
535: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()
537: @*/
538: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
539: {
540: PetscErrorCode ierr;
541: PetscInt type_id;
542: PetscSplitReduction *sr;
543: MPI_Comm comm;
546: VecNormComposedDataID(ntype,&type_id);
548: PetscObjectGetComm((PetscObject)x,&comm);
549: PetscSplitReductionGet(comm,&sr);
550:
551: if (sr->state != STATE_END) {
552: /* this is the first call to VecxxxEnd() so do the communication */
553: PetscSplitReductionApply(sr);
554: }
556: if (sr->numopsend >= sr->numopsbegin) {
557: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
558: }
559: if (x && (void*)x != sr->invecs[sr->numopsend]) {
560: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
561: }
562: if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) {
563: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
564: }
565: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
567: if (ntype == NORM_2) {
568: result[0] = sqrt(result[0]);
569: } else if (ntype == NORM_1_AND_2) {
570: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
571: result[1] = sqrt(result[1]);
572: }
573: if (ntype!=NORM_1_AND_2) {
574: PetscObjectSetRealComposedData((PetscObject)x,type_id,result[0]);
575: }
577: if (sr->numopsend == sr->numopsbegin) {
578: sr->state = STATE_BEGIN;
579: sr->numopsend = 0;
580: sr->numopsbegin = 0;
581: }
582: return(0);
583: }
585: /*
586: Possibly add
588: PetscReductionSumBegin/End()
589: PetscReductionMaxBegin/End()
590: PetscReductionMinBegin/End()
591: or have more like MPI with a single function with flag for Op? Like first better
592: */