Actual source code: comb.c

petsc-3.5.4 2015-05-23
Report Typos and Errors
  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22: #include <petsc-private/vecimpl.h>    /*I   "petscvec.h"    I*/

 26: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
 27: {

 31: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
 32:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 33: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
 34:   MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 35: #else
 36:   MPI_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
 37:   *request = MPI_REQUEST_NULL;
 38: #endif
 39:   return(0);
 40: }

 42: typedef enum {STATE_BEGIN, STATE_PENDING, STATE_END} SRState;

 44: #define REDUCE_SUM  0
 45: #define REDUCE_MAX  1
 46: #define REDUCE_MIN  2

 48: typedef struct {
 49:   MPI_Comm    comm;
 50:   MPI_Request request;
 51:   PetscBool   async;
 52:   PetscScalar *lvalues;     /* this are the reduced values before call to MPI_Allreduce() */
 53:   PetscScalar *gvalues;     /* values after call to MPI_Allreduce() */
 54:   void        **invecs;     /* for debugging only, vector/memory used with each op */
 55:   PetscInt    *reducetype;  /* is particular value to be summed or maxed? */
 56:   SRState     state;        /* are we calling xxxBegin() or xxxEnd()? */
 57:   PetscInt    maxops;       /* total amount of space we have for requests */
 58:   PetscInt    numopsbegin;  /* number of requests that have been queued in */
 59:   PetscInt    numopsend;    /* number of requests that have been gotten by user */
 60: } PetscSplitReduction;
 61: /*
 62:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 63: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
 64: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 65: some of each.
 66: */

 68: static PetscErrorCode PetscSplitReductionGet(MPI_Comm,PetscSplitReduction**);
 69: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);

 73: /*
 74:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 75: */
 76: static PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 77: {

 81:   PetscNew(sr);
 82:   (*sr)->numopsbegin = 0;
 83:   (*sr)->numopsend   = 0;
 84:   (*sr)->state       = STATE_BEGIN;
 85:   (*sr)->maxops      = 32;
 86:   PetscMalloc1(2*32,&(*sr)->lvalues);
 87:   PetscMalloc1(2*32,&(*sr)->gvalues);
 88:   PetscMalloc1(32,&(*sr)->invecs);
 89:   (*sr)->comm        = comm;
 90:   (*sr)->request     = MPI_REQUEST_NULL;
 91:   PetscMalloc1(32,&(*sr)->reducetype);
 92:   (*sr)->async       = PETSC_FALSE;
 93: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
 94:   (*sr)->async = PETSC_TRUE;    /* Enable by default */
 95:   PetscOptionsGetBool(NULL,"-splitreduction_async",&(*sr)->async,NULL);
 96: #endif
 97:   return(0);
 98: }

100: /*
101:        This function is the MPI reduction operation used when there is
102:    a combination of sums and max in the reduction. The call below to
103:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
104:    MPI operator PetscSplitReduction_Op.
105: */
106: MPI_Op PetscSplitReduction_Op = 0;

110: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
111: {
112:   PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
113:   PetscInt    i,count = (PetscInt)*cnt;

116:   if (*datatype != MPIU_SCALAR) {
117:     (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
118:     MPI_Abort(MPI_COMM_SELF,1);
119:   }
120:   count = count/2;
121:   for (i=0; i<count; i++) {
122:      /* second half of xin[] is flags for reduction type */
123:     if      ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_SUM) xout[i] += xin[i];
124:     else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
125:     else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
126:     else {
127:       (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
128:       MPI_Abort(MPI_COMM_SELF,1);
129:     }
130:   }
131:   PetscFunctionReturnVoid();
132: }

136: /*@
137:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

139:    Collective but not synchronizing

141:    Input Arguments:
142:    comm - communicator on which split reduction has been queued

144:    Level: advanced

146:    Note:
147:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
148:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

150: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
151: @*/
152: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
153: {
154:   PetscErrorCode      ierr;
155:   PetscSplitReduction *sr;

158:   PetscSplitReductionGet(comm,&sr);
159:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
160:   if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
161:     PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
162:     PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
163:     PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
164:     MPI_Comm       comm = sr->comm;
165:     PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
166:     PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
167:     MPI_Comm_size(sr->comm,&size);
168:     if (size == 1) {
169:       PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
170:     } else {
171:       /* determine if all reductions are sum, max, or min */
172:       for (i=0; i<numops; i++) {
173:         if      (reducetype[i] == REDUCE_MAX) max_flg = 1;
174:         else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
175:         else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
176:         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
177:       }
178:       if (sum_flg + max_flg + min_flg > 1) {
179:         /*
180:          after all the entires in lvalues we store the reducetype flags to indicate
181:          to the reduction operations what are sums and what are max
182:          */
183:         for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];

185:         MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
186:       } else if (max_flg) {   /* Compute max of real and imag parts separately, presumably only the real part is used */
187:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
188:       } else if (min_flg) {
189:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
190:       } else {
191:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
192:       }
193:     }
194:     sr->state     = STATE_PENDING;
195:     sr->numopsend = 0;
196:     PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
197:   } else {
198:     PetscSplitReductionApply(sr);
199:   }
200:   return(0);
201: }

205: static PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
206: {

210:   switch (sr->state) {
211:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
212:     PetscSplitReductionApply(sr);
213:     break;
214:   case STATE_PENDING:
215:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
216:     PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
217:     if (sr->request != MPI_REQUEST_NULL) {
218:       MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
219:     }
220:     sr->state = STATE_END;
221:     PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
222:     break;
223:   default: break;            /* everything is already done */
224:   }
225:   return(0);
226: }

230: /*
231:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
232: */
233: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
234: {
236:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
237:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
238:   PetscInt       sum_flg  = 0,max_flg = 0, min_flg = 0;
239:   MPI_Comm       comm     = sr->comm;
240:   PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

243:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
244:   PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
245:   MPI_Comm_size(sr->comm,&size);
246:   if (size == 1) {
247:     PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
248:   } else {
249:     /* determine if all reductions are sum, max, or min */
250:     for (i=0; i<numops; i++) {
251:       if      (reducetype[i] == REDUCE_MAX) max_flg = 1;
252:       else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
253:       else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
254:       else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
255:     }
256:     if (sum_flg + max_flg + min_flg > 1) {
257:       /*
258:          after all the entires in lvalues we store the reducetype flags to indicate
259:          to the reduction operations what are sums and what are max
260:       */
261:       for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
262:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
263:     } else if (max_flg) {     /* Compute max of real and imag parts separately, presumably only the real part is used */
264:       MPI_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
265:     } else if (min_flg) {
266:       MPI_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
267:     } else {
268:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
269:     }
270:   }
271:   sr->state     = STATE_END;
272:   sr->numopsend = 0;
273:   PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
274:   return(0);
275: }

279: /*
280:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
281: */
282: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
283: {
285:   PetscInt       maxops   = sr->maxops,*reducetype = sr->reducetype;
286:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
287:   void           *invecs  = sr->invecs;

290:   sr->maxops     = 2*maxops;
291:   PetscMalloc1(2*2*maxops,&sr->lvalues);
292:   PetscMalloc1(2*2*maxops,&sr->gvalues);
293:   PetscMalloc1(2*maxops,&sr->reducetype);
294:   PetscMalloc1(2*maxops,&sr->invecs);
295:   PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
296:   PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
297:   PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
298:   PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
299:   PetscFree(lvalues);
300:   PetscFree(gvalues);
301:   PetscFree(reducetype);
302:   PetscFree(invecs);
303:   return(0);
304: }

308: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
309: {

313:   PetscFree(sr->lvalues);
314:   PetscFree(sr->gvalues);
315:   PetscFree(sr->reducetype);
316:   PetscFree(sr->invecs);
317:   PetscFree(sr);
318:   return(0);
319: }

321: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

325: /*
326:    Private routine to delete internal storage when a communicator is freed.
327:   This is called by MPI, not by users.

329:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
330:   it was MPI_Comm *comm.
331: */
332: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
333: {

337:   PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
338:   PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
339:   return(0);
340: }

342: /*
343:      PetscSplitReductionGet - Gets the split reduction object from a
344:         PETSc vector, creates if it does not exit.

346: */
349: static PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
350: {
352:   PetscMPIInt    flag;

355:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
356:     /*
357:        The calling sequence of the 2nd argument to this function changed
358:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
359:        new standard, if you are using an MPI implementation that uses
360:        the older version you will get a warning message about the next line;
361:        it is only a warning message and should do no harm.
362:     */
363:     MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
364:   }
365:   MPI_Attr_get(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
366:   if (!flag) {  /* doesn't exist yet so create it and put it in */
367:     PetscSplitReductionCreate(comm,sr);
368:     MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
369:     PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
370:   }
371:   return(0);
372: }

374: /* ----------------------------------------------------------------------------------------------------*/

378: /*@
379:    VecDotBegin - Starts a split phase dot product computation.

381:    Input Parameters:
382: +   x - the first vector
383: .   y - the second vector
384: -   result - where the result will go (can be NULL)

386:    Level: advanced

388:    Notes:
389:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

391: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
392:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
393: @*/
394: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
395: {
396:   PetscErrorCode      ierr;
397:   PetscSplitReduction *sr;
398:   MPI_Comm            comm;

401:   PetscObjectGetComm((PetscObject)x,&comm);
402:   PetscSplitReductionGet(comm,&sr);
403:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
404:   if (sr->numopsbegin >= sr->maxops) {
405:     PetscSplitReductionExtend(sr);
406:   }
407:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
408:   sr->invecs[sr->numopsbegin]     = (void*)x;
409:   if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
410:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
411:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
412:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
413:   return(0);
414: }

418: /*@
419:    VecDotEnd - Ends a split phase dot product computation.

421:    Input Parameters:
422: +  x - the first vector (can be NULL)
423: .  y - the second vector (can be NULL)
424: -  result - where the result will go

426:    Level: advanced

428:    Notes:
429:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

431: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
432:          VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()

434: @*/
435: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
436: {
437:   PetscErrorCode      ierr;
438:   PetscSplitReduction *sr;
439:   MPI_Comm            comm;

442:   PetscObjectGetComm((PetscObject)x,&comm);
443:   PetscSplitReductionGet(comm,&sr);
444:   PetscSplitReductionEnd(sr);

446:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
447:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
448:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
449:   *result = sr->gvalues[sr->numopsend++];

451:   /*
452:      We are finished getting all the results so reset to no outstanding requests
453:   */
454:   if (sr->numopsend == sr->numopsbegin) {
455:     sr->state       = STATE_BEGIN;
456:     sr->numopsend   = 0;
457:     sr->numopsbegin = 0;
458:   }
459:   return(0);
460: }

464: /*@
465:    VecTDotBegin - Starts a split phase transpose dot product computation.

467:    Input Parameters:
468: +  x - the first vector
469: .  y - the second vector
470: -  result - where the result will go (can be NULL)

472:    Level: advanced

474:    Notes:
475:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

477: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
478:          VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

480: @*/
481: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
482: {
483:   PetscErrorCode      ierr;
484:   PetscSplitReduction *sr;
485:   MPI_Comm            comm;

488:   PetscObjectGetComm((PetscObject)x,&comm);
489:   PetscSplitReductionGet(comm,&sr);
490:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
491:   if (sr->numopsbegin >= sr->maxops) {
492:     PetscSplitReductionExtend(sr);
493:   }
494:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
495:   sr->invecs[sr->numopsbegin]     = (void*)x;
496:   if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
497:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
498:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
499:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
500:   return(0);
501: }

505: /*@
506:    VecTDotEnd - Ends a split phase transpose dot product computation.

508:    Input Parameters:
509: +  x - the first vector (can be NULL)
510: .  y - the second vector (can be NULL)
511: -  result - where the result will go

513:    Level: advanced

515:    Notes:
516:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

518: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
519:          VecDotBegin(), VecDotEnd()
520: @*/
521: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
522: {

526:   /*
527:       TDotEnd() is the same as DotEnd() so reuse the code
528:   */
529:   VecDotEnd(x,y,result);
530:   return(0);
531: }

533: /* -------------------------------------------------------------------------*/

537: /*@
538:    VecNormBegin - Starts a split phase norm computation.

540:    Input Parameters:
541: +  x - the first vector
542: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
543: -  result - where the result will go (can be NULL)

545:    Level: advanced

547:    Notes:
548:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

550: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

552: @*/
553: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
554: {
555:   PetscErrorCode      ierr;
556:   PetscSplitReduction *sr;
557:   PetscReal           lresult[2];
558:   MPI_Comm            comm;

561:   PetscObjectGetComm((PetscObject)x,&comm);
562:   PetscSplitReductionGet(comm,&sr);
563:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
564:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
565:     PetscSplitReductionExtend(sr);
566:   }

568:   sr->invecs[sr->numopsbegin] = (void*)x;
569:   if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
570:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
571:   (*x->ops->norm_local)(x,ntype,lresult);
572:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
573:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
574:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
575:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
576:   else                   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
577:   sr->lvalues[sr->numopsbegin++] = lresult[0];
578:   if (ntype == NORM_1_AND_2) {
579:     sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
580:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
581:   }
582:   return(0);
583: }

587: /*@
588:    VecNormEnd - Ends a split phase norm computation.

590:    Input Parameters:
591: +  x - the first vector (can be NULL)
592: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
593: -  result - where the result will go

595:    Level: advanced

597:    Notes:
598:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

600: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

602: @*/
603: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
604: {
605:   PetscErrorCode      ierr;
606:   PetscSplitReduction *sr;
607:   MPI_Comm            comm;

610:   PetscObjectGetComm((PetscObject)x,&comm);
611:   PetscSplitReductionGet(comm,&sr);
612:   PetscSplitReductionEnd(sr);

614:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
615:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
616:   if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
617:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

619:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
620:   else if (ntype == NORM_1_AND_2) {
621:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
622:     result[1] = PetscSqrtReal(result[1]);
623:   }
624:   if (ntype!=NORM_1_AND_2) {
625:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
626:   }

628:   if (sr->numopsend == sr->numopsbegin) {
629:     sr->state       = STATE_BEGIN;
630:     sr->numopsend   = 0;
631:     sr->numopsbegin = 0;
632:   }
633:   return(0);
634: }

636: /*
637:    Possibly add

639:      PetscReductionSumBegin/End()
640:      PetscReductionMaxBegin/End()
641:      PetscReductionMinBegin/End()
642:    or have more like MPI with a single function with flag for Op? Like first better
643: */

647: /*@
648:    VecMDotBegin - Starts a split phase multiple dot product computation.

650:    Input Parameters:
651: +   x - the first vector
652: .   nv - number of vectors
653: .   y - array of vectors
654: -   result - where the result will go (can be NULL)

656:    Level: advanced

658:    Notes:
659:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

661: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
662:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
663: @*/
664: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
665: {
666:   PetscErrorCode      ierr;
667:   PetscSplitReduction *sr;
668:   MPI_Comm            comm;
669:   int                 i;

672:   PetscObjectGetComm((PetscObject)x,&comm);
673:   PetscSplitReductionGet(comm,&sr);
674:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
675:   for (i=0; i<nv; i++) {
676:     if (sr->numopsbegin+i >= sr->maxops) {
677:       PetscSplitReductionExtend(sr);
678:     }
679:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
680:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
681:   }
682:   if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
683:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
684:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
685:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
686:   sr->numopsbegin += nv;
687:   return(0);
688: }

692: /*@
693:    VecMDotEnd - Ends a split phase multiple dot product computation.

695:    Input Parameters:
696: +   x - the first vector (can be NULL)
697: .   nv - number of vectors
698: -   y - array of vectors (can be NULL)

700:    Output Parameters:
701: .   result - where the result will go

703:    Level: advanced

705:    Notes:
706:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

708: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
709:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()

711: @*/
712: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
713: {
714:   PetscErrorCode      ierr;
715:   PetscSplitReduction *sr;
716:   MPI_Comm            comm;
717:   int                 i;

720:   PetscObjectGetComm((PetscObject)x,&comm);
721:   PetscSplitReductionGet(comm,&sr);
722:   PetscSplitReductionEnd(sr);

724:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
725:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
726:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
727:   for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];

729:   /*
730:      We are finished getting all the results so reset to no outstanding requests
731:   */
732:   if (sr->numopsend == sr->numopsbegin) {
733:     sr->state       = STATE_BEGIN;
734:     sr->numopsend   = 0;
735:     sr->numopsbegin = 0;
736:   }
737:   return(0);
738: }

742: /*@
743:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

745:    Input Parameters:
746: +  x - the first vector
747: .  nv - number of vectors
748: .  y - array of  vectors
749: -  result - where the result will go (can be NULL)

751:    Level: advanced

753:    Notes:
754:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

756: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
757:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()

759: @*/
760: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
761: {
762:   PetscErrorCode      ierr;
763:   PetscSplitReduction *sr;
764:   MPI_Comm            comm;
765:   int                 i;

768:   PetscObjectGetComm((PetscObject)x,&comm);
769:   PetscSplitReductionGet(comm,&sr);
770:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
771:   for (i=0; i<nv; i++) {
772:     if (sr->numopsbegin+i >= sr->maxops) {
773:       PetscSplitReductionExtend(sr);
774:     }
775:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
776:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
777:   }
778:   if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
779:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
780:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
781:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
782:   sr->numopsbegin += nv;
783:   return(0);
784: }

788: /*@
789:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

791:    Input Parameters:
792: +  x - the first vector (can be NULL)
793: .  nv - number of vectors
794: -  y - array of  vectors (can be NULL)

796:    Output Parameters
797: .  result - where the result will go

799:    Level: advanced

801:    Notes:
802:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

804: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
805:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
806: @*/
807: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
808: {

812:   /*
813:       MTDotEnd() is the same as MDotEnd() so reuse the code
814:   */
815:   VecMDotEnd(x,nv,y,result);
816:   return(0);
817: }