Actual source code: comb.c

petsc-master 2019-07-18
Report Typos and Errors

  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22:  #include <petsc/private/vecimpl.h>

 24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
 25: {

 29: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
 30:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 31: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
 32:   MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 33: #else
 34:   MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
 35:   *request = MPI_REQUEST_NULL;
 36: #endif
 37:   return(0);
 38: }

 40: /*
 41:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 42: the entries to have a flag indicating if they are PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN these are used by
 43: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 44: some of each.
 45: */

 47: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);

 49: /*
 50:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 51: */
 52: static PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 53: {

 57:   PetscNew(sr);
 58:   (*sr)->numopsbegin = 0;
 59:   (*sr)->numopsend   = 0;
 60:   (*sr)->state       = STATE_BEGIN;
 61: #define MAXOPS 32
 62:   (*sr)->maxops      = MAXOPS;
 63:   PetscMalloc4(2*MAXOPS,&(*sr)->lvalues,2*MAXOPS,&(*sr)->gvalues,MAXOPS,&(*sr)->invecs,MAXOPS,&(*sr)->reducetype);
 64: #undef MAXOPS
 65:   (*sr)->comm        = comm;
 66:   (*sr)->request     = MPI_REQUEST_NULL;
 67:   (*sr)->async       = PETSC_FALSE;
 68: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
 69:   (*sr)->async = PETSC_TRUE;    /* Enable by default */
 70: #endif
 71:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 72:   PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
 73:   return(0);
 74: }

 76: /*
 77:        This function is the MPI reduction operation used when there is
 78:    a combination of sums and max in the reduction. The call below to
 79:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 80:    MPI operator PetscSplitReduction_Op.
 81: */
 82: MPI_Op PetscSplitReduction_Op = 0;

 84: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
 85: {
 86:   PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
 87:   PetscInt    i,count = (PetscInt)*cnt;

 90:   if (*datatype != MPIU_SCALAR) {
 91:     (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
 92:     MPI_Abort(MPI_COMM_SELF,1);
 93:   }
 94:   count = count/2;
 95:   for (i=0; i<count; i++) {
 96:      /* second half of xin[] is flags for reduction type */
 97:     if      ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_SUM) xout[i] += xin[i];
 98:     else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
 99:     else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
100:     else {
101:       (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
102:       MPI_Abort(MPI_COMM_SELF,1);
103:     }
104:   }
105:   PetscFunctionReturnVoid();
106: }

108: /*@
109:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

111:    Collective but not synchronizing

113:    Input Arguments:
114:    comm - communicator on which split reduction has been queued

116:    Level: advanced

118:    Note:
119:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
120:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

122: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
123: @*/
124: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
125: {
126:   PetscErrorCode      ierr;
127:   PetscSplitReduction *sr;

130:   PetscSplitReductionGet(comm,&sr);
131:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
132:   if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
133:     PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
134:     PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
135:     PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
136:     MPI_Comm       comm = sr->comm;
137:     PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
138:     PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
139:     MPI_Comm_size(sr->comm,&size);
140:     if (size == 1) {
141:       PetscArraycpy(gvalues,lvalues,numops);
142:     } else {
143:       /* determine if all reductions are sum, max, or min */
144:       for (i=0; i<numops; i++) {
145:         if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
146:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
147:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
148:         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
149:       }
150:       if (sum_flg + max_flg + min_flg > 1) {
151:         /*
152:          after all the entires in lvalues we store the reducetype flags to indicate
153:          to the reduction operations what are sums and what are max
154:          */
155:         for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];

157:         MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
158:       } else if (max_flg) {   /* Compute max of real and imag parts separately, presumably only the real part is used */
159:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
160:       } else if (min_flg) {
161:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
162:       } else {
163:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
164:       }
165:     }
166:     sr->state     = STATE_PENDING;
167:     sr->numopsend = 0;
168:     PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
169:   } else {
170:     PetscSplitReductionApply(sr);
171:   }
172:   return(0);
173: }

175: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
176: {

180:   switch (sr->state) {
181:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
182:     PetscSplitReductionApply(sr);
183:     break;
184:   case STATE_PENDING:
185:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
186:     PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
187:     if (sr->request != MPI_REQUEST_NULL) {
188:       MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
189:     }
190:     sr->state = STATE_END;
191:     PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
192:     break;
193:   default: break;            /* everything is already done */
194:   }
195:   return(0);
196: }

198: /*
199:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
200: */
201: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
202: {
204:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
205:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
206:   PetscInt       sum_flg  = 0,max_flg = 0, min_flg = 0;
207:   MPI_Comm       comm     = sr->comm;
208:   PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

211:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
212:   PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
213:   MPI_Comm_size(sr->comm,&size);
214:   if (size == 1) {
215:     PetscArraycpy(gvalues,lvalues,numops);
216:   } else {
217:     /* determine if all reductions are sum, max, or min */
218:     for (i=0; i<numops; i++) {
219:       if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
220:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
221:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
222:       else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
223:     }
224:     if (sum_flg + max_flg + min_flg > 1) {
225:       /*
226:          after all the entires in lvalues we store the reducetype flags to indicate
227:          to the reduction operations what are sums and what are max
228:       */
229:       for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
230:       MPIU_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
231:     } else if (max_flg) {     /* Compute max of real and imag parts separately, presumably only the real part is used */
232:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
233:     } else if (min_flg) {
234:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
235:     } else {
236:       MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
237:     }
238:   }
239:   sr->state     = STATE_END;
240:   sr->numopsend = 0;
241:   PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
242:   return(0);
243: }

245: /*
246:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
247: */
248: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
249: {
251:   PetscInt       maxops   = sr->maxops,*reducetype = sr->reducetype;
252:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
253:   void           **invecs = sr->invecs;

256:   sr->maxops     = 2*maxops;
257:   PetscMalloc4(2*2*maxops,&sr->lvalues,2*2*maxops,&sr->gvalues,2*maxops,&sr->reducetype,2*maxops,&sr->invecs);
258:   PetscArraycpy(sr->lvalues,lvalues,maxops);
259:   PetscArraycpy(sr->gvalues,gvalues,maxops);
260:   PetscArraycpy(sr->reducetype,reducetype,maxops);
261:   PetscArraycpy(sr->invecs,invecs,maxops);
262:   PetscFree4(lvalues,gvalues,reducetype,invecs);
263:   return(0);
264: }

266: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
267: {

271:   PetscFree4(sr->lvalues,sr->gvalues,sr->reducetype,sr->invecs);
272:   PetscFree(sr);
273:   return(0);
274: }

276: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

278: /*
279:    Private routine to delete internal storage when a communicator is freed.
280:   This is called by MPI, not by users.

282:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
283:   it was MPI_Comm *comm.
284: */
285: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
286: {

290:   PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr);
291:   PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);CHKERRMPI(ierr);
292:   return(0);
293: }

295: /*
296:      PetscSplitReductionGet - Gets the split reduction object from a
297:         PETSc vector, creates if it does not exit.

299: */
300: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
301: {
303:   PetscMPIInt    flag;

306:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
307:     /*
308:        The calling sequence of the 2nd argument to this function changed
309:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
310:        new standard, if you are using an MPI implementation that uses
311:        the older version you will get a warning message about the next line;
312:        it is only a warning message and should do no harm.
313:     */
314:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
315:   }
316:   MPI_Comm_get_attr(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
317:   if (!flag) {  /* doesn't exist yet so create it and put it in */
318:     PetscSplitReductionCreate(comm,sr);
319:     MPI_Comm_set_attr(comm,Petsc_Reduction_keyval,*sr);
320:     PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
321:   }
322:   return(0);
323: }

325: /* ----------------------------------------------------------------------------------------------------*/

327: /*@
328:    VecDotBegin - Starts a split phase dot product computation.

330:    Input Parameters:
331: +   x - the first vector
332: .   y - the second vector
333: -   result - where the result will go (can be NULL)

335:    Level: advanced

337:    Notes:
338:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

340: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
341:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
342: @*/
343: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
344: {
345:   PetscErrorCode      ierr;
346:   PetscSplitReduction *sr;
347:   MPI_Comm            comm;

352:   PetscObjectGetComm((PetscObject)x,&comm);
353:   PetscSplitReductionGet(comm,&sr);
354:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
355:   if (sr->numopsbegin >= sr->maxops) {
356:     PetscSplitReductionExtend(sr);
357:   }
358:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
359:   sr->invecs[sr->numopsbegin]     = (void*)x;
360:   if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
361:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
362:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
363:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
364:   return(0);
365: }

367: /*@
368:    VecDotEnd - Ends a split phase dot product computation.

370:    Input Parameters:
371: +  x - the first vector (can be NULL)
372: .  y - the second vector (can be NULL)
373: -  result - where the result will go

375:    Level: advanced

377:    Notes:
378:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

380: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
381:          VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()

383: @*/
384: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
385: {
386:   PetscErrorCode      ierr;
387:   PetscSplitReduction *sr;
388:   MPI_Comm            comm;

391:   PetscObjectGetComm((PetscObject)x,&comm);
392:   PetscSplitReductionGet(comm,&sr);
393:   PetscSplitReductionEnd(sr);

395:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
396:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
397:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
398:   *result = sr->gvalues[sr->numopsend++];

400:   /*
401:      We are finished getting all the results so reset to no outstanding requests
402:   */
403:   if (sr->numopsend == sr->numopsbegin) {
404:     sr->state       = STATE_BEGIN;
405:     sr->numopsend   = 0;
406:     sr->numopsbegin = 0;
407:   }
408:   return(0);
409: }

411: /*@
412:    VecTDotBegin - Starts a split phase transpose dot product computation.

414:    Input Parameters:
415: +  x - the first vector
416: .  y - the second vector
417: -  result - where the result will go (can be NULL)

419:    Level: advanced

421:    Notes:
422:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

424: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
425:          VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

427: @*/
428: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
429: {
430:   PetscErrorCode      ierr;
431:   PetscSplitReduction *sr;
432:   MPI_Comm            comm;

435:   PetscObjectGetComm((PetscObject)x,&comm);
436:   PetscSplitReductionGet(comm,&sr);
437:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
438:   if (sr->numopsbegin >= sr->maxops) {
439:     PetscSplitReductionExtend(sr);
440:   }
441:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
442:   sr->invecs[sr->numopsbegin]     = (void*)x;
443:   if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
444:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
445:   (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
446:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
447:   return(0);
448: }

450: /*@
451:    VecTDotEnd - Ends a split phase transpose dot product computation.

453:    Input Parameters:
454: +  x - the first vector (can be NULL)
455: .  y - the second vector (can be NULL)
456: -  result - where the result will go

458:    Level: advanced

460:    Notes:
461:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

463: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
464:          VecDotBegin(), VecDotEnd()
465: @*/
466: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
467: {

471:   /*
472:       TDotEnd() is the same as DotEnd() so reuse the code
473:   */
474:   VecDotEnd(x,y,result);
475:   return(0);
476: }

478: /* -------------------------------------------------------------------------*/

480: /*@
481:    VecNormBegin - Starts a split phase norm computation.

483:    Input Parameters:
484: +  x - the first vector
485: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
486: -  result - where the result will go (can be NULL)

488:    Level: advanced

490:    Notes:
491:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

493: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

495: @*/
496: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
497: {
498:   PetscErrorCode      ierr;
499:   PetscSplitReduction *sr;
500:   PetscReal           lresult[2];
501:   MPI_Comm            comm;

505:   PetscObjectGetComm((PetscObject)x,&comm);
506:   PetscSplitReductionGet(comm,&sr);
507:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
508:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
509:     PetscSplitReductionExtend(sr);
510:   }

512:   sr->invecs[sr->numopsbegin] = (void*)x;
513:   if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
514:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
515:   (*x->ops->norm_local)(x,ntype,lresult);
516:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
517:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
518:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
519:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
520:   else                   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
521:   sr->lvalues[sr->numopsbegin++] = lresult[0];
522:   if (ntype == NORM_1_AND_2) {
523:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
524:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
525:   }
526:   return(0);
527: }

529: /*@
530:    VecNormEnd - Ends a split phase norm computation.

532:    Input Parameters:
533: +  x - the first vector 
534: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
535: -  result - where the result will go

537:    Level: advanced

539:    Notes:
540:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

542:    The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value

544: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

546: @*/
547: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
548: {
549:   PetscErrorCode      ierr;
550:   PetscSplitReduction *sr;
551:   MPI_Comm            comm;

555:   PetscObjectGetComm((PetscObject)x,&comm);
556:   PetscSplitReductionGet(comm,&sr);
557:   PetscSplitReductionEnd(sr);

559:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
560:   if ((void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
561:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
562:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

564:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
565:   else if (ntype == NORM_1_AND_2) {
566:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
567:     result[1] = PetscSqrtReal(result[1]);
568:   }
569:   if (ntype!=NORM_1_AND_2) {
570:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
571:   }

573:   if (sr->numopsend == sr->numopsbegin) {
574:     sr->state       = STATE_BEGIN;
575:     sr->numopsend   = 0;
576:     sr->numopsbegin = 0;
577:   }
578:   return(0);
579: }

581: /*
582:    Possibly add

584:      PetscReductionSumBegin/End()
585:      PetscReductionMaxBegin/End()
586:      PetscReductionMinBegin/End()
587:    or have more like MPI with a single function with flag for Op? Like first better
588: */

590: /*@
591:    VecMDotBegin - Starts a split phase multiple dot product computation.

593:    Input Parameters:
594: +   x - the first vector
595: .   nv - number of vectors
596: .   y - array of vectors
597: -   result - where the result will go (can be NULL)

599:    Level: advanced

601:    Notes:
602:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

604: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
605:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
606: @*/
607: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
608: {
609:   PetscErrorCode      ierr;
610:   PetscSplitReduction *sr;
611:   MPI_Comm            comm;
612:   int                 i;

615:   PetscObjectGetComm((PetscObject)x,&comm);
616:   PetscSplitReductionGet(comm,&sr);
617:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
618:   for (i=0; i<nv; i++) {
619:     if (sr->numopsbegin+i >= sr->maxops) {
620:       PetscSplitReductionExtend(sr);
621:     }
622:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
623:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
624:   }
625:   if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
626:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
627:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
628:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
629:   sr->numopsbegin += nv;
630:   return(0);
631: }

633: /*@
634:    VecMDotEnd - Ends a split phase multiple dot product computation.

636:    Input Parameters:
637: +   x - the first vector (can be NULL)
638: .   nv - number of vectors
639: -   y - array of vectors (can be NULL)

641:    Output Parameters:
642: .   result - where the result will go

644:    Level: advanced

646:    Notes:
647:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

649: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
650:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()

652: @*/
653: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
654: {
655:   PetscErrorCode      ierr;
656:   PetscSplitReduction *sr;
657:   MPI_Comm            comm;
658:   int                 i;

661:   PetscObjectGetComm((PetscObject)x,&comm);
662:   PetscSplitReductionGet(comm,&sr);
663:   PetscSplitReductionEnd(sr);

665:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
666:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
667:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
668:   for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];

670:   /*
671:      We are finished getting all the results so reset to no outstanding requests
672:   */
673:   if (sr->numopsend == sr->numopsbegin) {
674:     sr->state       = STATE_BEGIN;
675:     sr->numopsend   = 0;
676:     sr->numopsbegin = 0;
677:   }
678:   return(0);
679: }

681: /*@
682:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

684:    Input Parameters:
685: +  x - the first vector
686: .  nv - number of vectors
687: .  y - array of  vectors
688: -  result - where the result will go (can be NULL)

690:    Level: advanced

692:    Notes:
693:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

695: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
696:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()

698: @*/
699: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
700: {
701:   PetscErrorCode      ierr;
702:   PetscSplitReduction *sr;
703:   MPI_Comm            comm;
704:   int                 i;

707:   PetscObjectGetComm((PetscObject)x,&comm);
708:   PetscSplitReductionGet(comm,&sr);
709:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
710:   for (i=0; i<nv; i++) {
711:     if (sr->numopsbegin+i >= sr->maxops) {
712:       PetscSplitReductionExtend(sr);
713:     }
714:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
715:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
716:   }
717:   if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
718:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
719:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
720:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
721:   sr->numopsbegin += nv;
722:   return(0);
723: }

725: /*@
726:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

728:    Input Parameters:
729: +  x - the first vector (can be NULL)
730: .  nv - number of vectors
731: -  y - array of  vectors (can be NULL)

733:    Output Parameters
734: .  result - where the result will go

736:    Level: advanced

738:    Notes:
739:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

741: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
742:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
743: @*/
744: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
745: {

749:   /*
750:       MTDotEnd() is the same as MDotEnd() so reuse the code
751:   */
752:   VecMDotEnd(x,nv,y,result);
753:   return(0);
754: }