Actual source code: comb.c
petsc-3.5.4 2015-05-23
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc-private/vecimpl.h> /*I "petscvec.h" I*/
26: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
27: {
31: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
32: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
33: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
34: MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
35: #else
36: MPI_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
37: *request = MPI_REQUEST_NULL;
38: #endif
39: return(0);
40: }
42: typedef enum {STATE_BEGIN, STATE_PENDING, STATE_END} SRState;
44: #define REDUCE_SUM 0
45: #define REDUCE_MAX 1
46: #define REDUCE_MIN 2
48: typedef struct {
49: MPI_Comm comm;
50: MPI_Request request;
51: PetscBool async;
52: PetscScalar *lvalues; /* this are the reduced values before call to MPI_Allreduce() */
53: PetscScalar *gvalues; /* values after call to MPI_Allreduce() */
54: void **invecs; /* for debugging only, vector/memory used with each op */
55: PetscInt *reducetype; /* is particular value to be summed or maxed? */
56: SRState state; /* are we calling xxxBegin() or xxxEnd()? */
57: PetscInt maxops; /* total amount of space we have for requests */
58: PetscInt numopsbegin; /* number of requests that have been queued in */
59: PetscInt numopsend; /* number of requests that have been gotten by user */
60: } PetscSplitReduction;
61: /*
62: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
63: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
64: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
65: some of each.
66: */
68: static PetscErrorCode PetscSplitReductionGet(MPI_Comm,PetscSplitReduction**);
69: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);
73: /*
74: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
75: */
76: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
77: {
81: PetscNew(sr);
82: (*sr)->numopsbegin = 0;
83: (*sr)->numopsend = 0;
84: (*sr)->state = STATE_BEGIN;
85: (*sr)->maxops = 32;
86: PetscMalloc1(2*32,&(*sr)->lvalues);
87: PetscMalloc1(2*32,&(*sr)->gvalues);
88: PetscMalloc1(32,&(*sr)->invecs);
89: (*sr)->comm = comm;
90: (*sr)->request = MPI_REQUEST_NULL;
91: PetscMalloc1(32,&(*sr)->reducetype);
92: (*sr)->async = PETSC_FALSE;
93: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
94: (*sr)->async = PETSC_TRUE; /* Enable by default */
95: PetscOptionsGetBool(NULL,"-splitreduction_async",&(*sr)->async,NULL);
96: #endif
97: return(0);
98: }
100: /*
101: This function is the MPI reduction operation used when there is
102: a combination of sums and max in the reduction. The call below to
103: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
104: MPI operator PetscSplitReduction_Op.
105: */
106: MPI_Op PetscSplitReduction_Op = 0;
110: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
111: {
112: PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
113: PetscInt i,count = (PetscInt)*cnt;
116: if (*datatype != MPIU_SCALAR) {
117: (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
118: MPI_Abort(MPI_COMM_SELF,1);
119: }
120: count = count/2;
121: for (i=0; i<count; i++) {
122: /* second half of xin[] is flags for reduction type */
123: if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_SUM) xout[i] += xin[i];
124: else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
125: else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
126: else {
127: (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
128: MPI_Abort(MPI_COMM_SELF,1);
129: }
130: }
131: PetscFunctionReturnVoid();
132: }
136: /*@
137: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
139: Collective but not synchronizing
141: Input Arguments:
142: comm - communicator on which split reduction has been queued
144: Level: advanced
146: Note:
147: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
148: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
150: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
151: @*/
152: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
153: {
154: PetscErrorCode ierr;
155: PetscSplitReduction *sr;
158: PetscSplitReductionGet(comm,&sr);
159: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
160: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
161: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
162: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
163: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
164: MPI_Comm comm = sr->comm;
165: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
166: PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
167: MPI_Comm_size(sr->comm,&size);
168: if (size == 1) {
169: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
170: } else {
171: /* determine if all reductions are sum, max, or min */
172: for (i=0; i<numops; i++) {
173: if (reducetype[i] == REDUCE_MAX) max_flg = 1;
174: else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
175: else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
176: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
177: }
178: if (sum_flg + max_flg + min_flg > 1) {
179: /*
180: after all the entires in lvalues we store the reducetype flags to indicate
181: to the reduction operations what are sums and what are max
182: */
183: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
185: MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
186: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
187: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
188: } else if (min_flg) {
189: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
190: } else {
191: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
192: }
193: }
194: sr->state = STATE_PENDING;
195: sr->numopsend = 0;
196: PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
197: } else {
198: PetscSplitReductionApply(sr);
199: }
200: return(0);
201: }
205: static PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
206: {
210: switch (sr->state) {
211: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
212: PetscSplitReductionApply(sr);
213: break;
214: case STATE_PENDING:
215: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
216: PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
217: if (sr->request != MPI_REQUEST_NULL) {
218: MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
219: }
220: sr->state = STATE_END;
221: PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
222: break;
223: default: break; /* everything is already done */
224: }
225: return(0);
226: }
230: /*
231: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
232: */
233: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
234: {
236: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
237: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
238: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
239: MPI_Comm comm = sr->comm;
240: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
243: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
244: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
245: MPI_Comm_size(sr->comm,&size);
246: if (size == 1) {
247: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
248: } else {
249: /* determine if all reductions are sum, max, or min */
250: for (i=0; i<numops; i++) {
251: if (reducetype[i] == REDUCE_MAX) max_flg = 1;
252: else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
253: else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
254: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
255: }
256: if (sum_flg + max_flg + min_flg > 1) {
257: /*
258: after all the entires in lvalues we store the reducetype flags to indicate
259: to the reduction operations what are sums and what are max
260: */
261: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
262: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
263: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
264: MPI_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
265: } else if (min_flg) {
266: MPI_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
267: } else {
268: MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
269: }
270: }
271: sr->state = STATE_END;
272: sr->numopsend = 0;
273: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
274: return(0);
275: }
279: /*
280: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
281: */
282: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
283: {
285: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
286: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
287: void *invecs = sr->invecs;
290: sr->maxops = 2*maxops;
291: PetscMalloc1(2*2*maxops,&sr->lvalues);
292: PetscMalloc1(2*2*maxops,&sr->gvalues);
293: PetscMalloc1(2*maxops,&sr->reducetype);
294: PetscMalloc1(2*maxops,&sr->invecs);
295: PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
296: PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
297: PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
298: PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
299: PetscFree(lvalues);
300: PetscFree(gvalues);
301: PetscFree(reducetype);
302: PetscFree(invecs);
303: return(0);
304: }
308: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
309: {
313: PetscFree(sr->lvalues);
314: PetscFree(sr->gvalues);
315: PetscFree(sr->reducetype);
316: PetscFree(sr->invecs);
317: PetscFree(sr);
318: return(0);
319: }
321: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
325: /*
326: Private routine to delete internal storage when a communicator is freed.
327: This is called by MPI, not by users.
329: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
330: it was MPI_Comm *comm.
331: */
332: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
333: {
337: PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
338: PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
339: return(0);
340: }
342: /*
343: PetscSplitReductionGet - Gets the split reduction object from a
344: PETSc vector, creates if it does not exit.
346: */
349: static PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
350: {
352: PetscMPIInt flag;
355: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
356: /*
357: The calling sequence of the 2nd argument to this function changed
358: between MPI Standard 1.0 and the revisions 1.1 Here we match the
359: new standard, if you are using an MPI implementation that uses
360: the older version you will get a warning message about the next line;
361: it is only a warning message and should do no harm.
362: */
363: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
364: }
365: MPI_Attr_get(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
366: if (!flag) { /* doesn't exist yet so create it and put it in */
367: PetscSplitReductionCreate(comm,sr);
368: MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
369: PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
370: }
371: return(0);
372: }
374: /* ----------------------------------------------------------------------------------------------------*/
378: /*@
379: VecDotBegin - Starts a split phase dot product computation.
381: Input Parameters:
382: + x - the first vector
383: . y - the second vector
384: - result - where the result will go (can be NULL)
386: Level: advanced
388: Notes:
389: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
391: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
392: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
393: @*/
394: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
395: {
396: PetscErrorCode ierr;
397: PetscSplitReduction *sr;
398: MPI_Comm comm;
401: PetscObjectGetComm((PetscObject)x,&comm);
402: PetscSplitReductionGet(comm,&sr);
403: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
404: if (sr->numopsbegin >= sr->maxops) {
405: PetscSplitReductionExtend(sr);
406: }
407: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
408: sr->invecs[sr->numopsbegin] = (void*)x;
409: if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
410: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
411: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
412: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
413: return(0);
414: }
418: /*@
419: VecDotEnd - Ends a split phase dot product computation.
421: Input Parameters:
422: + x - the first vector (can be NULL)
423: . y - the second vector (can be NULL)
424: - result - where the result will go
426: Level: advanced
428: Notes:
429: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
431: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
432: VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()
434: @*/
435: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
436: {
437: PetscErrorCode ierr;
438: PetscSplitReduction *sr;
439: MPI_Comm comm;
442: PetscObjectGetComm((PetscObject)x,&comm);
443: PetscSplitReductionGet(comm,&sr);
444: PetscSplitReductionEnd(sr);
446: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
447: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
448: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
449: *result = sr->gvalues[sr->numopsend++];
451: /*
452: We are finished getting all the results so reset to no outstanding requests
453: */
454: if (sr->numopsend == sr->numopsbegin) {
455: sr->state = STATE_BEGIN;
456: sr->numopsend = 0;
457: sr->numopsbegin = 0;
458: }
459: return(0);
460: }
464: /*@
465: VecTDotBegin - Starts a split phase transpose dot product computation.
467: Input Parameters:
468: + x - the first vector
469: . y - the second vector
470: - result - where the result will go (can be NULL)
472: Level: advanced
474: Notes:
475: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
477: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
478: VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
480: @*/
481: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
482: {
483: PetscErrorCode ierr;
484: PetscSplitReduction *sr;
485: MPI_Comm comm;
488: PetscObjectGetComm((PetscObject)x,&comm);
489: PetscSplitReductionGet(comm,&sr);
490: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
491: if (sr->numopsbegin >= sr->maxops) {
492: PetscSplitReductionExtend(sr);
493: }
494: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
495: sr->invecs[sr->numopsbegin] = (void*)x;
496: if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
497: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
498: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
499: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
500: return(0);
501: }
505: /*@
506: VecTDotEnd - Ends a split phase transpose dot product computation.
508: Input Parameters:
509: + x - the first vector (can be NULL)
510: . y - the second vector (can be NULL)
511: - result - where the result will go
513: Level: advanced
515: Notes:
516: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
518: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
519: VecDotBegin(), VecDotEnd()
520: @*/
521: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
522: {
526: /*
527: TDotEnd() is the same as DotEnd() so reuse the code
528: */
529: VecDotEnd(x,y,result);
530: return(0);
531: }
533: /* -------------------------------------------------------------------------*/
537: /*@
538: VecNormBegin - Starts a split phase norm computation.
540: Input Parameters:
541: + x - the first vector
542: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
543: - result - where the result will go (can be NULL)
545: Level: advanced
547: Notes:
548: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
550: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
552: @*/
553: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
554: {
555: PetscErrorCode ierr;
556: PetscSplitReduction *sr;
557: PetscReal lresult[2];
558: MPI_Comm comm;
561: PetscObjectGetComm((PetscObject)x,&comm);
562: PetscSplitReductionGet(comm,&sr);
563: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
564: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
565: PetscSplitReductionExtend(sr);
566: }
568: sr->invecs[sr->numopsbegin] = (void*)x;
569: if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
570: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
571: (*x->ops->norm_local)(x,ntype,lresult);
572: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
573: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
574: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
575: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
576: else sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
577: sr->lvalues[sr->numopsbegin++] = lresult[0];
578: if (ntype == NORM_1_AND_2) {
579: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
580: sr->lvalues[sr->numopsbegin++] = lresult[1];
581: }
582: return(0);
583: }
587: /*@
588: VecNormEnd - Ends a split phase norm computation.
590: Input Parameters:
591: + x - the first vector (can be NULL)
592: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
593: - result - where the result will go
595: Level: advanced
597: Notes:
598: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
600: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
602: @*/
603: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
604: {
605: PetscErrorCode ierr;
606: PetscSplitReduction *sr;
607: MPI_Comm comm;
610: PetscObjectGetComm((PetscObject)x,&comm);
611: PetscSplitReductionGet(comm,&sr);
612: PetscSplitReductionEnd(sr);
614: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
615: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
616: if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
617: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
619: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
620: else if (ntype == NORM_1_AND_2) {
621: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
622: result[1] = PetscSqrtReal(result[1]);
623: }
624: if (ntype!=NORM_1_AND_2) {
625: PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
626: }
628: if (sr->numopsend == sr->numopsbegin) {
629: sr->state = STATE_BEGIN;
630: sr->numopsend = 0;
631: sr->numopsbegin = 0;
632: }
633: return(0);
634: }
636: /*
637: Possibly add
639: PetscReductionSumBegin/End()
640: PetscReductionMaxBegin/End()
641: PetscReductionMinBegin/End()
642: or have more like MPI with a single function with flag for Op? Like first better
643: */
647: /*@
648: VecMDotBegin - Starts a split phase multiple dot product computation.
650: Input Parameters:
651: + x - the first vector
652: . nv - number of vectors
653: . y - array of vectors
654: - result - where the result will go (can be NULL)
656: Level: advanced
658: Notes:
659: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
661: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
662: VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
663: @*/
664: PetscErrorCode VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
665: {
666: PetscErrorCode ierr;
667: PetscSplitReduction *sr;
668: MPI_Comm comm;
669: int i;
672: PetscObjectGetComm((PetscObject)x,&comm);
673: PetscSplitReductionGet(comm,&sr);
674: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
675: for (i=0; i<nv; i++) {
676: if (sr->numopsbegin+i >= sr->maxops) {
677: PetscSplitReductionExtend(sr);
678: }
679: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
680: sr->invecs[sr->numopsbegin+i] = (void*)x;
681: }
682: if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
683: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
684: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
685: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
686: sr->numopsbegin += nv;
687: return(0);
688: }
692: /*@
693: VecMDotEnd - Ends a split phase multiple dot product computation.
695: Input Parameters:
696: + x - the first vector (can be NULL)
697: . nv - number of vectors
698: - y - array of vectors (can be NULL)
700: Output Parameters:
701: . result - where the result will go
703: Level: advanced
705: Notes:
706: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
708: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
709: VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
711: @*/
712: PetscErrorCode VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
713: {
714: PetscErrorCode ierr;
715: PetscSplitReduction *sr;
716: MPI_Comm comm;
717: int i;
720: PetscObjectGetComm((PetscObject)x,&comm);
721: PetscSplitReductionGet(comm,&sr);
722: PetscSplitReductionEnd(sr);
724: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
725: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
726: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
727: for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];
729: /*
730: We are finished getting all the results so reset to no outstanding requests
731: */
732: if (sr->numopsend == sr->numopsbegin) {
733: sr->state = STATE_BEGIN;
734: sr->numopsend = 0;
735: sr->numopsbegin = 0;
736: }
737: return(0);
738: }
742: /*@
743: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
745: Input Parameters:
746: + x - the first vector
747: . nv - number of vectors
748: . y - array of vectors
749: - result - where the result will go (can be NULL)
751: Level: advanced
753: Notes:
754: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
756: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
757: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
759: @*/
760: PetscErrorCode VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
761: {
762: PetscErrorCode ierr;
763: PetscSplitReduction *sr;
764: MPI_Comm comm;
765: int i;
768: PetscObjectGetComm((PetscObject)x,&comm);
769: PetscSplitReductionGet(comm,&sr);
770: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
771: for (i=0; i<nv; i++) {
772: if (sr->numopsbegin+i >= sr->maxops) {
773: PetscSplitReductionExtend(sr);
774: }
775: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
776: sr->invecs[sr->numopsbegin+i] = (void*)x;
777: }
778: if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
779: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
780: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
781: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
782: sr->numopsbegin += nv;
783: return(0);
784: }
788: /*@
789: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
791: Input Parameters:
792: + x - the first vector (can be NULL)
793: . nv - number of vectors
794: - y - array of vectors (can be NULL)
796: Output Parameters
797: . result - where the result will go
799: Level: advanced
801: Notes:
802: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
804: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
805: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
806: @*/
807: PetscErrorCode VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
808: {
812: /*
813: MTDotEnd() is the same as MDotEnd() so reuse the code
814: */
815: VecMDotEnd(x,nv,y,result);
816: return(0);
817: }