Actual source code: comb.c
petsc-main 2021-04-16
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h>
24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
25: {
29: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
30: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
31: #else
32: MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
33: *request = MPI_REQUEST_NULL;
34: #endif
35: return(0);
36: }
38: /*
39: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
40: the entries to have a flag indicating if they are PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN these are used by
41: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
42: some of each.
43: */
45: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);
47: /*
48: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
49: */
50: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
51: {
55: PetscNew(sr);
56: (*sr)->numopsbegin = 0;
57: (*sr)->numopsend = 0;
58: (*sr)->state = STATE_BEGIN;
59: #define MAXOPS 32
60: (*sr)->maxops = MAXOPS;
61: PetscMalloc4(2*MAXOPS,&(*sr)->lvalues,2*MAXOPS,&(*sr)->gvalues,MAXOPS,&(*sr)->invecs,MAXOPS,&(*sr)->reducetype);
62: #undef MAXOPS
63: (*sr)->comm = comm;
64: (*sr)->request = MPI_REQUEST_NULL;
65: (*sr)->async = PETSC_FALSE;
66: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
67: (*sr)->async = PETSC_TRUE; /* Enable by default */
68: #endif
69: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
70: PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
71: return(0);
72: }
74: /*
75: This function is the MPI reduction operation used when there is
76: a combination of sums and max in the reduction. The call below to
77: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
78: MPI operator PetscSplitReduction_Op.
79: */
80: MPI_Op PetscSplitReduction_Op = 0;
82: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
83: {
84: PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
85: PetscInt i,count = (PetscInt)*cnt;
88: if (*datatype != MPIU_SCALAR) {
89: (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
90: PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
91: }
92: count = count/2;
93: for (i=0; i<count; i++) {
94: /* second half of xin[] is flags for reduction type */
95: if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_SUM) xout[i] += xin[i];
96: else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
97: else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
98: else {
99: (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
100: PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
101: }
102: }
103: PetscFunctionReturnVoid();
104: }
106: /*@
107: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
109: Collective but not synchronizing
111: Input Arguments:
112: comm - communicator on which split reduction has been queued
114: Level: advanced
116: Note:
117: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
118: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
120: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
121: @*/
122: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
123: {
124: PetscErrorCode ierr;
125: PetscSplitReduction *sr;
128: PetscSplitReductionGet(comm,&sr);
129: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
130: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
131: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
132: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
133: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
134: MPI_Comm comm = sr->comm;
135: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
136: PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
137: MPI_Comm_size(sr->comm,&size);
138: if (size == 1) {
139: PetscArraycpy(gvalues,lvalues,numops);
140: } else {
141: /* determine if all reductions are sum, max, or min */
142: for (i=0; i<numops; i++) {
143: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
144: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
145: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
146: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
147: }
148: if (sum_flg + max_flg + min_flg > 1) {
149: /*
150: after all the entires in lvalues we store the reducetype flags to indicate
151: to the reduction operations what are sums and what are max
152: */
153: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
155: MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
156: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
157: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
158: } else if (min_flg) {
159: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
160: } else {
161: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
162: }
163: }
164: sr->state = STATE_PENDING;
165: sr->numopsend = 0;
166: PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
167: } else {
168: PetscSplitReductionApply(sr);
169: }
170: return(0);
171: }
173: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
174: {
178: switch (sr->state) {
179: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
180: PetscSplitReductionApply(sr);
181: break;
182: case STATE_PENDING:
183: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
184: PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
185: if (sr->request != MPI_REQUEST_NULL) {
186: MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
187: }
188: sr->state = STATE_END;
189: PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
190: break;
191: default: break; /* everything is already done */
192: }
193: return(0);
194: }
196: /*
197: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
198: */
199: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
200: {
202: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
203: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
204: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
205: MPI_Comm comm = sr->comm;
206: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
209: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
210: PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
211: MPI_Comm_size(sr->comm,&size);
212: if (size == 1) {
213: PetscArraycpy(gvalues,lvalues,numops);
214: } else {
215: /* determine if all reductions are sum, max, or min */
216: for (i=0; i<numops; i++) {
217: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
218: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
219: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
220: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
221: }
222: if (sum_flg + max_flg + min_flg > 1) {
223: /*
224: after all the entires in lvalues we store the reducetype flags to indicate
225: to the reduction operations what are sums and what are max
226: */
227: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
228: MPIU_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
229: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
230: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
231: } else if (min_flg) {
232: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
233: } else {
234: MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
235: }
236: }
237: sr->state = STATE_END;
238: sr->numopsend = 0;
239: PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
240: return(0);
241: }
243: /*
244: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
245: */
246: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
247: {
249: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
250: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
251: void **invecs = sr->invecs;
254: sr->maxops = 2*maxops;
255: PetscMalloc4(2*2*maxops,&sr->lvalues,2*2*maxops,&sr->gvalues,2*maxops,&sr->reducetype,2*maxops,&sr->invecs);
256: PetscArraycpy(sr->lvalues,lvalues,maxops);
257: PetscArraycpy(sr->gvalues,gvalues,maxops);
258: PetscArraycpy(sr->reducetype,reducetype,maxops);
259: PetscArraycpy(sr->invecs,invecs,maxops);
260: PetscFree4(lvalues,gvalues,reducetype,invecs);
261: return(0);
262: }
264: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
265: {
269: PetscFree4(sr->lvalues,sr->gvalues,sr->reducetype,sr->invecs);
270: PetscFree(sr);
271: return(0);
272: }
274: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
276: /*
277: Private routine to delete internal storage when a communicator is freed.
278: This is called by MPI, not by users.
280: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
281: it was MPI_Comm *comm.
282: */
283: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
284: {
288: PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
289: PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
290: return(0);
291: }
293: /*
294: PetscSplitReductionGet - Gets the split reduction object from a
295: PETSc vector, creates if it does not exit.
297: */
298: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
299: {
301: PetscMPIInt flag;
304: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
305: /*
306: The calling sequence of the 2nd argument to this function changed
307: between MPI Standard 1.0 and the revisions 1.1 Here we match the
308: new standard, if you are using an MPI implementation that uses
309: the older version you will get a warning message about the next line;
310: it is only a warning message and should do no harm.
311: */
312: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,NULL);
313: }
314: MPI_Comm_get_attr(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
315: if (!flag) { /* doesn't exist yet so create it and put it in */
316: PetscSplitReductionCreate(comm,sr);
317: MPI_Comm_set_attr(comm,Petsc_Reduction_keyval,*sr);
318: PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
319: }
320: return(0);
321: }
323: /* ----------------------------------------------------------------------------------------------------*/
325: /*@
326: VecDotBegin - Starts a split phase dot product computation.
328: Input Parameters:
329: + x - the first vector
330: . y - the second vector
331: - result - where the result will go (can be NULL)
333: Level: advanced
335: Notes:
336: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
338: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
339: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
340: @*/
341: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
342: {
343: PetscErrorCode ierr;
344: PetscSplitReduction *sr;
345: MPI_Comm comm;
350: PetscObjectGetComm((PetscObject)x,&comm);
351: PetscSplitReductionGet(comm,&sr);
352: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
353: if (sr->numopsbegin >= sr->maxops) {
354: PetscSplitReductionExtend(sr);
355: }
356: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
357: sr->invecs[sr->numopsbegin] = (void*)x;
358: if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
359: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
360: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
361: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
362: return(0);
363: }
365: /*@
366: VecDotEnd - Ends a split phase dot product computation.
368: Input Parameters:
369: + x - the first vector (can be NULL)
370: . y - the second vector (can be NULL)
371: - result - where the result will go
373: Level: advanced
375: Notes:
376: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
378: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
379: VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()
381: @*/
382: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
383: {
384: PetscErrorCode ierr;
385: PetscSplitReduction *sr;
386: MPI_Comm comm;
389: PetscObjectGetComm((PetscObject)x,&comm);
390: PetscSplitReductionGet(comm,&sr);
391: PetscSplitReductionEnd(sr);
393: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
394: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
395: if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
396: *result = sr->gvalues[sr->numopsend++];
398: /*
399: We are finished getting all the results so reset to no outstanding requests
400: */
401: if (sr->numopsend == sr->numopsbegin) {
402: sr->state = STATE_BEGIN;
403: sr->numopsend = 0;
404: sr->numopsbegin = 0;
405: }
406: return(0);
407: }
409: /*@
410: VecTDotBegin - Starts a split phase transpose dot product computation.
412: Input Parameters:
413: + x - the first vector
414: . y - the second vector
415: - result - where the result will go (can be NULL)
417: Level: advanced
419: Notes:
420: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
422: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
423: VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
425: @*/
426: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
427: {
428: PetscErrorCode ierr;
429: PetscSplitReduction *sr;
430: MPI_Comm comm;
433: PetscObjectGetComm((PetscObject)x,&comm);
434: PetscSplitReductionGet(comm,&sr);
435: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
436: if (sr->numopsbegin >= sr->maxops) {
437: PetscSplitReductionExtend(sr);
438: }
439: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
440: sr->invecs[sr->numopsbegin] = (void*)x;
441: if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
442: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
443: (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
444: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
445: return(0);
446: }
448: /*@
449: VecTDotEnd - Ends a split phase transpose dot product computation.
451: Input Parameters:
452: + x - the first vector (can be NULL)
453: . y - the second vector (can be NULL)
454: - result - where the result will go
456: Level: advanced
458: Notes:
459: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
461: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
462: VecDotBegin(), VecDotEnd()
463: @*/
464: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
465: {
469: /*
470: TDotEnd() is the same as DotEnd() so reuse the code
471: */
472: VecDotEnd(x,y,result);
473: return(0);
474: }
476: /* -------------------------------------------------------------------------*/
478: /*@
479: VecNormBegin - Starts a split phase norm computation.
481: Input Parameters:
482: + x - the first vector
483: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
484: - result - where the result will go (can be NULL)
486: Level: advanced
488: Notes:
489: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
491: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
493: @*/
494: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
495: {
496: PetscErrorCode ierr;
497: PetscSplitReduction *sr;
498: PetscReal lresult[2];
499: MPI_Comm comm;
503: PetscObjectGetComm((PetscObject)x,&comm);
504: PetscSplitReductionGet(comm,&sr);
505: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
506: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
507: PetscSplitReductionExtend(sr);
508: }
510: sr->invecs[sr->numopsbegin] = (void*)x;
511: if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
512: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
513: (*x->ops->norm_local)(x,ntype,lresult);
514: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
515: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
516: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
517: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
518: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
519: sr->lvalues[sr->numopsbegin++] = lresult[0];
520: if (ntype == NORM_1_AND_2) {
521: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
522: sr->lvalues[sr->numopsbegin++] = lresult[1];
523: }
524: return(0);
525: }
527: /*@
528: VecNormEnd - Ends a split phase norm computation.
530: Input Parameters:
531: + x - the first vector
532: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
533: - result - where the result will go
535: Level: advanced
537: Notes:
538: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
540: The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value
542: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
544: @*/
545: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
546: {
547: PetscErrorCode ierr;
548: PetscSplitReduction *sr;
549: MPI_Comm comm;
553: PetscObjectGetComm((PetscObject)x,&comm);
554: PetscSplitReductionGet(comm,&sr);
555: PetscSplitReductionEnd(sr);
557: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
558: if ((void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
559: if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
560: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
562: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
563: else if (ntype == NORM_1_AND_2) {
564: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
565: result[1] = PetscSqrtReal(result[1]);
566: }
567: if (ntype!=NORM_1_AND_2) {
568: PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
569: }
571: if (sr->numopsend == sr->numopsbegin) {
572: sr->state = STATE_BEGIN;
573: sr->numopsend = 0;
574: sr->numopsbegin = 0;
575: }
576: return(0);
577: }
579: /*
580: Possibly add
582: PetscReductionSumBegin/End()
583: PetscReductionMaxBegin/End()
584: PetscReductionMinBegin/End()
585: or have more like MPI with a single function with flag for Op? Like first better
586: */
588: /*@
589: VecMDotBegin - Starts a split phase multiple dot product computation.
591: Input Parameters:
592: + x - the first vector
593: . nv - number of vectors
594: . y - array of vectors
595: - result - where the result will go (can be NULL)
597: Level: advanced
599: Notes:
600: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
602: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
603: VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
604: @*/
605: PetscErrorCode VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
606: {
607: PetscErrorCode ierr;
608: PetscSplitReduction *sr;
609: MPI_Comm comm;
610: int i;
613: PetscObjectGetComm((PetscObject)x,&comm);
614: PetscSplitReductionGet(comm,&sr);
615: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
616: for (i=0; i<nv; i++) {
617: if (sr->numopsbegin+i >= sr->maxops) {
618: PetscSplitReductionExtend(sr);
619: }
620: sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
621: sr->invecs[sr->numopsbegin+i] = (void*)x;
622: }
623: if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
624: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
625: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
626: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
627: sr->numopsbegin += nv;
628: return(0);
629: }
631: /*@
632: VecMDotEnd - Ends a split phase multiple dot product computation.
634: Input Parameters:
635: + x - the first vector (can be NULL)
636: . nv - number of vectors
637: - y - array of vectors (can be NULL)
639: Output Parameters:
640: . result - where the result will go
642: Level: advanced
644: Notes:
645: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
647: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
648: VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
650: @*/
651: PetscErrorCode VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
652: {
653: PetscErrorCode ierr;
654: PetscSplitReduction *sr;
655: MPI_Comm comm;
656: int i;
659: PetscObjectGetComm((PetscObject)x,&comm);
660: PetscSplitReductionGet(comm,&sr);
661: PetscSplitReductionEnd(sr);
663: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
664: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
665: if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
666: for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];
668: /*
669: We are finished getting all the results so reset to no outstanding requests
670: */
671: if (sr->numopsend == sr->numopsbegin) {
672: sr->state = STATE_BEGIN;
673: sr->numopsend = 0;
674: sr->numopsbegin = 0;
675: }
676: return(0);
677: }
679: /*@
680: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
682: Input Parameters:
683: + x - the first vector
684: . nv - number of vectors
685: . y - array of vectors
686: - result - where the result will go (can be NULL)
688: Level: advanced
690: Notes:
691: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
693: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
694: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
696: @*/
697: PetscErrorCode VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
698: {
699: PetscErrorCode ierr;
700: PetscSplitReduction *sr;
701: MPI_Comm comm;
702: int i;
705: PetscObjectGetComm((PetscObject)x,&comm);
706: PetscSplitReductionGet(comm,&sr);
707: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
708: for (i=0; i<nv; i++) {
709: if (sr->numopsbegin+i >= sr->maxops) {
710: PetscSplitReductionExtend(sr);
711: }
712: sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
713: sr->invecs[sr->numopsbegin+i] = (void*)x;
714: }
715: if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
716: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
717: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
718: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
719: sr->numopsbegin += nv;
720: return(0);
721: }
723: /*@
724: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
726: Input Parameters:
727: + x - the first vector (can be NULL)
728: . nv - number of vectors
729: - y - array of vectors (can be NULL)
731: Output Parameters:
732: . result - where the result will go
734: Level: advanced
736: Notes:
737: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
739: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
740: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
741: @*/
742: PetscErrorCode VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
743: {
747: /*
748: MTDotEnd() is the same as MDotEnd() so reuse the code
749: */
750: VecMDotEnd(x,nv,y,result);
751: return(0);
752: }