Actual source code: comb.c

petsc-main 2021-04-16
Report Typos and Errors

  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22: #include <petsc/private/vecimpl.h>

 24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
 25: {

 29: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
 30:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 31: #else
 32:   MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
 33:   *request = MPI_REQUEST_NULL;
 34: #endif
 35:   return(0);
 36: }

 38: /*
 39:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 40: the entries to have a flag indicating if they are PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN these are used by
 41: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 42: some of each.
 43: */

 45: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);

 47: /*
 48:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 49: */
 50: static PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 51: {

 55:   PetscNew(sr);
 56:   (*sr)->numopsbegin = 0;
 57:   (*sr)->numopsend   = 0;
 58:   (*sr)->state       = STATE_BEGIN;
 59: #define MAXOPS 32
 60:   (*sr)->maxops      = MAXOPS;
 61:   PetscMalloc4(2*MAXOPS,&(*sr)->lvalues,2*MAXOPS,&(*sr)->gvalues,MAXOPS,&(*sr)->invecs,MAXOPS,&(*sr)->reducetype);
 62: #undef MAXOPS
 63:   (*sr)->comm        = comm;
 64:   (*sr)->request     = MPI_REQUEST_NULL;
 65:   (*sr)->async       = PETSC_FALSE;
 66: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
 67:   (*sr)->async = PETSC_TRUE;    /* Enable by default */
 68: #endif
 69:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 70:   PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
 71:   return(0);
 72: }

 74: /*
 75:        This function is the MPI reduction operation used when there is
 76:    a combination of sums and max in the reduction. The call below to
 77:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 78:    MPI operator PetscSplitReduction_Op.
 79: */
 80: MPI_Op PetscSplitReduction_Op = 0;

 82: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
 83: {
 84:   PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
 85:   PetscInt    i,count = (PetscInt)*cnt;

 88:   if (*datatype != MPIU_SCALAR) {
 89:     (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
 90:     PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
 91:   }
 92:   count = count/2;
 93:   for (i=0; i<count; i++) {
 94:      /* second half of xin[] is flags for reduction type */
 95:     if      ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_SUM) xout[i] += xin[i];
 96:     else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
 97:     else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
 98:     else {
 99:       (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
100:       PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
101:     }
102:   }
103:   PetscFunctionReturnVoid();
104: }

106: /*@
107:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

109:    Collective but not synchronizing

111:    Input Arguments:
112:    comm - communicator on which split reduction has been queued

114:    Level: advanced

116:    Note:
117:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
118:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

120: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
121: @*/
122: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
123: {
124:   PetscErrorCode      ierr;
125:   PetscSplitReduction *sr;

128:   PetscSplitReductionGet(comm,&sr);
129:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
130:   if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
131:     PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
132:     PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
133:     PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
134:     MPI_Comm       comm = sr->comm;
135:     PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
136:     PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
137:     MPI_Comm_size(sr->comm,&size);
138:     if (size == 1) {
139:       PetscArraycpy(gvalues,lvalues,numops);
140:     } else {
141:       /* determine if all reductions are sum, max, or min */
142:       for (i=0; i<numops; i++) {
143:         if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
144:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
145:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
146:         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
147:       }
148:       if (sum_flg + max_flg + min_flg > 1) {
149:         /*
150:          after all the entires in lvalues we store the reducetype flags to indicate
151:          to the reduction operations what are sums and what are max
152:          */
153:         for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];

155:         MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
156:       } else if (max_flg) {   /* Compute max of real and imag parts separately, presumably only the real part is used */
157:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
158:       } else if (min_flg) {
159:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
160:       } else {
161:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
162:       }
163:     }
164:     sr->state     = STATE_PENDING;
165:     sr->numopsend = 0;
166:     PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
167:   } else {
168:     PetscSplitReductionApply(sr);
169:   }
170:   return(0);
171: }

173: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
174: {

178:   switch (sr->state) {
179:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
180:     PetscSplitReductionApply(sr);
181:     break;
182:   case STATE_PENDING:
183:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
184:     PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
185:     if (sr->request != MPI_REQUEST_NULL) {
186:       MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
187:     }
188:     sr->state = STATE_END;
189:     PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
190:     break;
191:   default: break;            /* everything is already done */
192:   }
193:   return(0);
194: }

196: /*
197:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
198: */
199: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
200: {
202:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
203:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
204:   PetscInt       sum_flg  = 0,max_flg = 0, min_flg = 0;
205:   MPI_Comm       comm     = sr->comm;
206:   PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

209:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
210:   PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
211:   MPI_Comm_size(sr->comm,&size);
212:   if (size == 1) {
213:     PetscArraycpy(gvalues,lvalues,numops);
214:   } else {
215:     /* determine if all reductions are sum, max, or min */
216:     for (i=0; i<numops; i++) {
217:       if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
218:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
219:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
220:       else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
221:     }
222:     if (sum_flg + max_flg + min_flg > 1) {
223:       /*
224:          after all the entires in lvalues we store the reducetype flags to indicate
225:          to the reduction operations what are sums and what are max
226:       */
227:       for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
228:       MPIU_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
229:     } else if (max_flg) {     /* Compute max of real and imag parts separately, presumably only the real part is used */
230:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
231:     } else if (min_flg) {
232:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
233:     } else {
234:       MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
235:     }
236:   }
237:   sr->state     = STATE_END;
238:   sr->numopsend = 0;
239:   PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
240:   return(0);
241: }

243: /*
244:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
245: */
246: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
247: {
249:   PetscInt       maxops   = sr->maxops,*reducetype = sr->reducetype;
250:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
251:   void           **invecs = sr->invecs;

254:   sr->maxops     = 2*maxops;
255:   PetscMalloc4(2*2*maxops,&sr->lvalues,2*2*maxops,&sr->gvalues,2*maxops,&sr->reducetype,2*maxops,&sr->invecs);
256:   PetscArraycpy(sr->lvalues,lvalues,maxops);
257:   PetscArraycpy(sr->gvalues,gvalues,maxops);
258:   PetscArraycpy(sr->reducetype,reducetype,maxops);
259:   PetscArraycpy(sr->invecs,invecs,maxops);
260:   PetscFree4(lvalues,gvalues,reducetype,invecs);
261:   return(0);
262: }

264: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
265: {

269:   PetscFree4(sr->lvalues,sr->gvalues,sr->reducetype,sr->invecs);
270:   PetscFree(sr);
271:   return(0);
272: }

274: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

276: /*
277:    Private routine to delete internal storage when a communicator is freed.
278:   This is called by MPI, not by users.

280:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
281:   it was MPI_Comm *comm.
282: */
283: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
284: {

288:   PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
289:   PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
290:   return(0);
291: }

293: /*
294:      PetscSplitReductionGet - Gets the split reduction object from a
295:         PETSc vector, creates if it does not exit.

297: */
298: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
299: {
301:   PetscMPIInt    flag;

304:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
305:     /*
306:        The calling sequence of the 2nd argument to this function changed
307:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
308:        new standard, if you are using an MPI implementation that uses
309:        the older version you will get a warning message about the next line;
310:        it is only a warning message and should do no harm.
311:     */
312:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,NULL);
313:   }
314:   MPI_Comm_get_attr(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
315:   if (!flag) {  /* doesn't exist yet so create it and put it in */
316:     PetscSplitReductionCreate(comm,sr);
317:     MPI_Comm_set_attr(comm,Petsc_Reduction_keyval,*sr);
318:     PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
319:   }
320:   return(0);
321: }

323: /* ----------------------------------------------------------------------------------------------------*/

325: /*@
326:    VecDotBegin - Starts a split phase dot product computation.

328:    Input Parameters:
329: +   x - the first vector
330: .   y - the second vector
331: -   result - where the result will go (can be NULL)

333:    Level: advanced

335:    Notes:
336:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

338: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
339:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
340: @*/
341: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
342: {
343:   PetscErrorCode      ierr;
344:   PetscSplitReduction *sr;
345:   MPI_Comm            comm;

350:   PetscObjectGetComm((PetscObject)x,&comm);
351:   PetscSplitReductionGet(comm,&sr);
352:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
353:   if (sr->numopsbegin >= sr->maxops) {
354:     PetscSplitReductionExtend(sr);
355:   }
356:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
357:   sr->invecs[sr->numopsbegin]     = (void*)x;
358:   if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
359:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
360:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
361:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
362:   return(0);
363: }

365: /*@
366:    VecDotEnd - Ends a split phase dot product computation.

368:    Input Parameters:
369: +  x - the first vector (can be NULL)
370: .  y - the second vector (can be NULL)
371: -  result - where the result will go

373:    Level: advanced

375:    Notes:
376:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

378: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
379:          VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()

381: @*/
382: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
383: {
384:   PetscErrorCode      ierr;
385:   PetscSplitReduction *sr;
386:   MPI_Comm            comm;

389:   PetscObjectGetComm((PetscObject)x,&comm);
390:   PetscSplitReductionGet(comm,&sr);
391:   PetscSplitReductionEnd(sr);

393:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
394:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
395:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
396:   *result = sr->gvalues[sr->numopsend++];

398:   /*
399:      We are finished getting all the results so reset to no outstanding requests
400:   */
401:   if (sr->numopsend == sr->numopsbegin) {
402:     sr->state       = STATE_BEGIN;
403:     sr->numopsend   = 0;
404:     sr->numopsbegin = 0;
405:   }
406:   return(0);
407: }

409: /*@
410:    VecTDotBegin - Starts a split phase transpose dot product computation.

412:    Input Parameters:
413: +  x - the first vector
414: .  y - the second vector
415: -  result - where the result will go (can be NULL)

417:    Level: advanced

419:    Notes:
420:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

422: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
423:          VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

425: @*/
426: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
427: {
428:   PetscErrorCode      ierr;
429:   PetscSplitReduction *sr;
430:   MPI_Comm            comm;

433:   PetscObjectGetComm((PetscObject)x,&comm);
434:   PetscSplitReductionGet(comm,&sr);
435:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
436:   if (sr->numopsbegin >= sr->maxops) {
437:     PetscSplitReductionExtend(sr);
438:   }
439:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
440:   sr->invecs[sr->numopsbegin]     = (void*)x;
441:   if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
442:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
443:   (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
444:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
445:   return(0);
446: }

448: /*@
449:    VecTDotEnd - Ends a split phase transpose dot product computation.

451:    Input Parameters:
452: +  x - the first vector (can be NULL)
453: .  y - the second vector (can be NULL)
454: -  result - where the result will go

456:    Level: advanced

458:    Notes:
459:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

461: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
462:          VecDotBegin(), VecDotEnd()
463: @*/
464: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
465: {

469:   /*
470:       TDotEnd() is the same as DotEnd() so reuse the code
471:   */
472:   VecDotEnd(x,y,result);
473:   return(0);
474: }

476: /* -------------------------------------------------------------------------*/

478: /*@
479:    VecNormBegin - Starts a split phase norm computation.

481:    Input Parameters:
482: +  x - the first vector
483: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
484: -  result - where the result will go (can be NULL)

486:    Level: advanced

488:    Notes:
489:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

491: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

493: @*/
494: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
495: {
496:   PetscErrorCode      ierr;
497:   PetscSplitReduction *sr;
498:   PetscReal           lresult[2];
499:   MPI_Comm            comm;

503:   PetscObjectGetComm((PetscObject)x,&comm);
504:   PetscSplitReductionGet(comm,&sr);
505:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
506:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
507:     PetscSplitReductionExtend(sr);
508:   }

510:   sr->invecs[sr->numopsbegin] = (void*)x;
511:   if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
512:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
513:   (*x->ops->norm_local)(x,ntype,lresult);
514:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
515:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
516:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
517:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
518:   else                   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
519:   sr->lvalues[sr->numopsbegin++] = lresult[0];
520:   if (ntype == NORM_1_AND_2) {
521:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
522:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
523:   }
524:   return(0);
525: }

527: /*@
528:    VecNormEnd - Ends a split phase norm computation.

530:    Input Parameters:
531: +  x - the first vector
532: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
533: -  result - where the result will go

535:    Level: advanced

537:    Notes:
538:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

540:    The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value

542: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

544: @*/
545: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
546: {
547:   PetscErrorCode      ierr;
548:   PetscSplitReduction *sr;
549:   MPI_Comm            comm;

553:   PetscObjectGetComm((PetscObject)x,&comm);
554:   PetscSplitReductionGet(comm,&sr);
555:   PetscSplitReductionEnd(sr);

557:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
558:   if ((void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
559:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
560:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

562:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
563:   else if (ntype == NORM_1_AND_2) {
564:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
565:     result[1] = PetscSqrtReal(result[1]);
566:   }
567:   if (ntype!=NORM_1_AND_2) {
568:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
569:   }

571:   if (sr->numopsend == sr->numopsbegin) {
572:     sr->state       = STATE_BEGIN;
573:     sr->numopsend   = 0;
574:     sr->numopsbegin = 0;
575:   }
576:   return(0);
577: }

579: /*
580:    Possibly add

582:      PetscReductionSumBegin/End()
583:      PetscReductionMaxBegin/End()
584:      PetscReductionMinBegin/End()
585:    or have more like MPI with a single function with flag for Op? Like first better
586: */

588: /*@
589:    VecMDotBegin - Starts a split phase multiple dot product computation.

591:    Input Parameters:
592: +   x - the first vector
593: .   nv - number of vectors
594: .   y - array of vectors
595: -   result - where the result will go (can be NULL)

597:    Level: advanced

599:    Notes:
600:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

602: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
603:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
604: @*/
605: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
606: {
607:   PetscErrorCode      ierr;
608:   PetscSplitReduction *sr;
609:   MPI_Comm            comm;
610:   int                 i;

613:   PetscObjectGetComm((PetscObject)x,&comm);
614:   PetscSplitReductionGet(comm,&sr);
615:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
616:   for (i=0; i<nv; i++) {
617:     if (sr->numopsbegin+i >= sr->maxops) {
618:       PetscSplitReductionExtend(sr);
619:     }
620:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
621:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
622:   }
623:   if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
624:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
625:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
626:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
627:   sr->numopsbegin += nv;
628:   return(0);
629: }

631: /*@
632:    VecMDotEnd - Ends a split phase multiple dot product computation.

634:    Input Parameters:
635: +   x - the first vector (can be NULL)
636: .   nv - number of vectors
637: -   y - array of vectors (can be NULL)

639:    Output Parameters:
640: .   result - where the result will go

642:    Level: advanced

644:    Notes:
645:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

647: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
648:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()

650: @*/
651: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
652: {
653:   PetscErrorCode      ierr;
654:   PetscSplitReduction *sr;
655:   MPI_Comm            comm;
656:   int                 i;

659:   PetscObjectGetComm((PetscObject)x,&comm);
660:   PetscSplitReductionGet(comm,&sr);
661:   PetscSplitReductionEnd(sr);

663:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
664:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
665:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
666:   for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];

668:   /*
669:      We are finished getting all the results so reset to no outstanding requests
670:   */
671:   if (sr->numopsend == sr->numopsbegin) {
672:     sr->state       = STATE_BEGIN;
673:     sr->numopsend   = 0;
674:     sr->numopsbegin = 0;
675:   }
676:   return(0);
677: }

679: /*@
680:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

682:    Input Parameters:
683: +  x - the first vector
684: .  nv - number of vectors
685: .  y - array of  vectors
686: -  result - where the result will go (can be NULL)

688:    Level: advanced

690:    Notes:
691:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

693: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
694:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()

696: @*/
697: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
698: {
699:   PetscErrorCode      ierr;
700:   PetscSplitReduction *sr;
701:   MPI_Comm            comm;
702:   int                 i;

705:   PetscObjectGetComm((PetscObject)x,&comm);
706:   PetscSplitReductionGet(comm,&sr);
707:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
708:   for (i=0; i<nv; i++) {
709:     if (sr->numopsbegin+i >= sr->maxops) {
710:       PetscSplitReductionExtend(sr);
711:     }
712:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
713:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
714:   }
715:   if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
716:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
717:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
718:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
719:   sr->numopsbegin += nv;
720:   return(0);
721: }

723: /*@
724:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

726:    Input Parameters:
727: +  x - the first vector (can be NULL)
728: .  nv - number of vectors
729: -  y - array of  vectors (can be NULL)

731:    Output Parameters:
732: .  result - where the result will go

734:    Level: advanced

736:    Notes:
737:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

739: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
740:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
741: @*/
742: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
743: {

747:   /*
748:       MTDotEnd() is the same as MDotEnd() so reuse the code
749:   */
750:   VecMDotEnd(x,nv,y,result);
751:   return(0);
752: }