Actual source code: comb.c

petsc-3.3-p7 2013-05-11
  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by 
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations: 
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to 
 19:            insure that the user calls the routines in the correct order
 20: */

 22: #include <petsc-private/vecimpl.h>                              /*I   "petscvec.h"   I*/

 24: #if defined(PETSC_HAVE_PAMI)
 25: PetscErrorCode MPIPetsc_Iallreduce_PAMI(void*,void*,PetscMPIInt,MPI_Datatype,MPI_Op,MPI_Comm,MPI_Request*);
 26: #endif
 27: #if defined(PETSC_HAVE_DCMF)
 28: PetscErrorCode MPIPetsc_Iallreduce_DCMF(void*,void*,PetscMPIInt,MPI_Datatype,MPI_Op,MPI_Comm,MPI_Request*);
 29: #endif

 31: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
 32: {
 35: #if defined(PETSC_HAVE_MPIX_IALLREDUCE)
 36:   MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 37: #elif defined(PETSC_HAVE_PAMI)
 38:   MPIPetsc_Iallreduce_PAMI(sendbuf,recvbuf,count,datatype,op,comm,request);
 39: #elif defined(PETSC_HAVE_DCMF)
 40:   MPIPetsc_Iallreduce_DCMF(sendbuf,recvbuf,count,datatype,op,comm,request);
 41: #else
 42:   MPI_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
 43:   *request = MPI_REQUEST_NULL;
 44: #endif
 45:   return(0);
 46: }

 48: typedef enum {STATE_BEGIN, STATE_PENDING, STATE_END} SRState;

 50: #define REDUCE_SUM  0
 51: #define REDUCE_MAX  1
 52: #define REDUCE_MIN  2

 54: typedef struct {
 55:   MPI_Comm     comm;
 56:   MPI_Request  request;
 57:   PetscBool    async;
 58:   PetscScalar  *lvalues;    /* this are the reduced values before call to MPI_Allreduce() */
 59:   PetscScalar  *gvalues;    /* values after call to MPI_Allreduce() */
 60:   void         **invecs;    /* for debugging only, vector/memory used with each op */
 61:   PetscInt     *reducetype; /* is particular value to be summed or maxed? */
 62:   SRState      state;       /* are we calling xxxBegin() or xxxEnd()? */
 63:   PetscInt     maxops;      /* total amount of space we have for requests */
 64:   PetscInt     numopsbegin; /* number of requests that have been queued in */
 65:   PetscInt     numopsend;   /* number of requests that have been gotten by user */
 66: } PetscSplitReduction;
 67: /*
 68:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 69: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by 
 70: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 71: some of each.
 72: */

 74: static PetscErrorCode PetscSplitReductionGet(MPI_Comm,PetscSplitReduction**);
 75: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);

 79: /*
 80:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 81: */
 82: static PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 83: {

 87:   PetscNew(PetscSplitReduction,sr);
 88:   (*sr)->numopsbegin = 0;
 89:   (*sr)->numopsend   = 0;
 90:   (*sr)->state       = STATE_BEGIN;
 91:   (*sr)->maxops      = 32;
 92:   PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
 93:   PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
 94:   PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
 95:   (*sr)->comm        = comm;
 96:   (*sr)->request     = MPI_REQUEST_NULL;
 97:   PetscMalloc(32*sizeof(PetscInt),&(*sr)->reducetype);
 98:   (*sr)->async = PETSC_FALSE;
 99: #if defined(PETSC_HAVE_MPIX_IALLREDUCE) || defined(PETSC_HAVE_PAMI) || defined(PETSC_HAVE_DCMF)
100:   (*sr)->async = PETSC_TRUE;    /* Enable by default */
101:   PetscOptionsGetBool(PETSC_NULL,"-splitreduction_async",&(*sr)->async,PETSC_NULL);
102: #endif
103:   return(0);
104: }

106: /*
107:        This function is the MPI reduction operation used when there is 
108:    a combination of sums and max in the reduction. The call below to 
109:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the 
110:    MPI operator PetscSplitReduction_Op.
111: */
112: MPI_Op PetscSplitReduction_Op = 0;

117: {
118:   PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
119:   PetscInt    i,count = (PetscInt)*cnt;

122:   if (*datatype != MPIU_REAL) {
123:     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
124:     MPI_Abort(MPI_COMM_SELF,1);
125:   }
126: #if defined(PETSC_USE_COMPLEX)
127:   count = count/2;
128: #endif
129:   count = count/2;
130:   for (i=0; i<count; i++) {
131:     if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
132:       xout[i] += xin[i];
133:     } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
134:       xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
135:     } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
136:       xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
137:     } else {
138:       (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
139:       MPI_Abort(MPI_COMM_SELF,1);
140:     }
141:   }
142:   PetscFunctionReturnVoid();
143: }

147: /*@
148:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

150:    Collective but not synchronizing

152:    Input Arguments:
153:    comm - communicator on which split reduction has been queued

155:    Level: advanced

157:    Note:
158:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
159:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

161: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
162: @*/
163: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
164: {
166:   PetscSplitReduction *sr;

169:   PetscSplitReductionGet(comm,&sr);
170:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
171:   if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
172:     PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
173:     PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
174:     PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
175:     MPI_Comm       comm = sr->comm;
176:     PetscMPIInt    size;
177:     PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
178:     MPI_Comm_size(sr->comm,&size);
179:     if (size == 1) {
180:       PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
181:     } else {
182:       /* determine if all reductions are sum, max, or min */
183:       for (i=0; i<numops; i++) {
184:         if (reducetype[i] == REDUCE_MAX) {
185:           max_flg = 1;
186:         } else if (reducetype[i] == REDUCE_SUM) {
187:           sum_flg = 1;
188:         } else if (reducetype[i] == REDUCE_MIN) {
189:           min_flg = 1;
190:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
191:       }
192:       if (sum_flg + max_flg + min_flg > 1) {
193:         /* 
194:          after all the entires in lvalues we store the reducetype flags to indicate
195:          to the reduction operations what are sums and what are max
196:          */
197:         for (i=0; i<numops; i++) {
198:           lvalues[numops+i] = reducetype[i];
199:         }
200: #if defined(PETSC_USE_COMPLEX)
201:         MPIPetsc_Iallreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm,&sr->request);
202: #else
203:         MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm,&sr->request);
204: #endif
205:       } else if (max_flg) {
206: #if defined(PETSC_USE_COMPLEX)
207:         /* 
208:          complex case we max both the real and imaginary parts, the imaginary part
209:          is just ignored later
210:          */
211:         MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
212: #else
213:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
214: #endif
215:       } else if (min_flg) {
216: #if defined(PETSC_USE_COMPLEX)
217:         /* 
218:          complex case we min both the real and imaginary parts, the imaginary part
219:          is just ignored later
220:          */
221:         MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
222: #else
223:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
224: #endif
225:       } else {
226:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
227:       }
228:     }
229:     sr->state     = STATE_PENDING;
230:     sr->numopsend = 0;
231:     PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
232:   } else {
233:     PetscSplitReductionApply(sr);
234:   }
235:   return(0);
236: }

240: static PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
241: {

245:   switch (sr->state) {
246:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
247:     PetscSplitReductionApply(sr);
248:     break;
249:   case STATE_PENDING:
250:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
251:     PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
252:     if (sr->request != MPI_REQUEST_NULL) {
253:       MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
254:     }
255:     sr->state = STATE_END;
256:     PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
257:     break;
258:   default: break;            /* everything is already done */
259:   }
260:   return(0);
261: }

265: /*
266:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
267: */
268: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
269: {
271:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
272:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
273:   PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
274:   MPI_Comm       comm = sr->comm;
275:   PetscMPIInt    size;

278:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
279:   PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
280:   MPI_Comm_size(sr->comm,&size);
281:   if (size == 1) {
282:     PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
283:   } else {
284:     /* determine if all reductions are sum, max, or min */
285:     for (i=0; i<numops; i++) {
286:       if (reducetype[i] == REDUCE_MAX) {
287:         max_flg = 1;
288:       } else if (reducetype[i] == REDUCE_SUM) {
289:         sum_flg = 1;
290:       } else if (reducetype[i] == REDUCE_MIN) {
291:         min_flg = 1;
292:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
293:     }
294:     if (sum_flg + max_flg + min_flg > 1) {
295:       /* 
296:          after all the entires in lvalues we store the reducetype flags to indicate
297:          to the reduction operations what are sums and what are max
298:       */
299:       for (i=0; i<numops; i++) {
300:         lvalues[numops+i] = reducetype[i];
301:       }
302: #if defined(PETSC_USE_COMPLEX)
303:       MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
304: #else
305:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
306: #endif
307:     } else if (max_flg) {
308: #if defined(PETSC_USE_COMPLEX)
309:       /* 
310:         complex case we max both the real and imaginary parts, the imaginary part
311:         is just ignored later
312:       */
313:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPIU_MAX,comm);
314: #else
315:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPIU_MAX,comm);
316: #endif
317:     } else if (min_flg) {
318: #if defined(PETSC_USE_COMPLEX)
319:       /* 
320:         complex case we min both the real and imaginary parts, the imaginary part
321:         is just ignored later
322:       */
323:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPIU_MIN,comm);
324: #else
325:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPIU_MIN,comm);
326: #endif
327:     } else {
328:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
329:     }
330:   }
331:   sr->state     = STATE_END;
332:   sr->numopsend = 0;
333:   PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
334:   return(0);
335: }

339: /*
340:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
341: */
342: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
343: {
345:   PetscInt       maxops = sr->maxops,*reducetype = sr->reducetype;
346:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
347:   void           *invecs = sr->invecs;

350:   sr->maxops     = 2*maxops;
351:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
352:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
353:   PetscMalloc(2*maxops*sizeof(PetscInt),&sr->reducetype);
354:   PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
355:   PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
356:   PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
357:   PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
358:   PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
359:   PetscFree(lvalues);
360:   PetscFree(gvalues);
361:   PetscFree(reducetype);
362:   PetscFree(invecs);
363:   return(0);
364: }

368: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
369: {

373:   PetscFree(sr->lvalues);
374:   PetscFree(sr->gvalues);
375:   PetscFree(sr->reducetype);
376:   PetscFree(sr->invecs);
377:   PetscFree(sr);
378:   return(0);
379: }

381: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

385: /*
386:    Private routine to delete internal storage when a communicator is freed.
387:   This is called by MPI, not by users.

389:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
390:   it was MPI_Comm *comm.  
391: */
393: {

397:   PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
398:   PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
399:   return(0);
400: }

402: /*
403:      PetscSplitReductionGet - Gets the split reduction object from a 
404:         PETSc vector, creates if it does not exit.

406: */
409: static PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
410: {
412:   PetscMPIInt    flag;

415:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
416:     /* 
417:        The calling sequence of the 2nd argument to this function changed
418:        between MPI Standard 1.0 and the revisions 1.1 Here we match the 
419:        new standard, if you are using an MPI implementation that uses 
420:        the older version you will get a warning message about the next line;
421:        it is only a warning message and should do no harm.
422:     */
423:     MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
424:   }
425:   MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
426:   if (!flag) {  /* doesn't exist yet so create it and put it in */
427:     PetscSplitReductionCreate(comm,sr);
428:     MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
429:     PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
430:   }
431:   return(0);
432: }

434: /* ----------------------------------------------------------------------------------------------------*/

438: /*@
439:    VecDotBegin - Starts a split phase dot product computation.

441:    Input Parameters:
442: +   x - the first vector
443: .   y - the second vector
444: -   result - where the result will go (can be PETSC_NULL)

446:    Level: advanced

448:    Notes:
449:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

451: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
452:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
453: @*/
454: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
455: {
456:   PetscErrorCode      ierr;
457:   PetscSplitReduction *sr;
458:   MPI_Comm            comm;

461:   PetscObjectGetComm((PetscObject)x,&comm);
462:   PetscSplitReductionGet(comm,&sr);
463:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
464:   if (sr->numopsbegin >= sr->maxops) {
465:     PetscSplitReductionExtend(sr);
466:   }
467:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
468:   sr->invecs[sr->numopsbegin]     = (void*)x;
469:   if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
470:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
471:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
472:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
473:   return(0);
474: }

478: /*@
479:    VecDotEnd - Ends a split phase dot product computation.

481:    Input Parameters:
482: +  x - the first vector (can be PETSC_NULL)
483: .  y - the second vector (can be PETSC_NULL)
484: -  result - where the result will go

486:    Level: advanced

488:    Notes:
489:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

491: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
492:          VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()

494: @*/
495: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
496: {
497:   PetscErrorCode      ierr;
498:   PetscSplitReduction *sr;
499:   MPI_Comm            comm;

502:   PetscObjectGetComm((PetscObject)x,&comm);
503:   PetscSplitReductionGet(comm,&sr);
504:   PetscSplitReductionEnd(sr);

506:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
507:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
508:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
509:   *result = sr->gvalues[sr->numopsend++];

511:   /*
512:      We are finished getting all the results so reset to no outstanding requests
513:   */
514:   if (sr->numopsend == sr->numopsbegin) {
515:     sr->state        = STATE_BEGIN;
516:     sr->numopsend    = 0;
517:     sr->numopsbegin  = 0;
518:   }
519:   return(0);
520: }

524: /*@
525:    VecTDotBegin - Starts a split phase transpose dot product computation.

527:    Input Parameters:
528: +  x - the first vector
529: .  y - the second vector
530: -  result - where the result will go (can be PETSC_NULL)

532:    Level: advanced

534:    Notes:
535:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

537: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
538:          VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

540: @*/
541: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
542: {
543:   PetscErrorCode      ierr;
544:   PetscSplitReduction *sr;
545:   MPI_Comm            comm;

548:   PetscObjectGetComm((PetscObject)x,&comm);
549:   PetscSplitReductionGet(comm,&sr);
550:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
551:   if (sr->numopsbegin >= sr->maxops) {
552:     PetscSplitReductionExtend(sr);
553:   }
554:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
555:   sr->invecs[sr->numopsbegin]     = (void*)x;
556:   if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
557:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
558:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
559:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
560:   return(0);
561: }

565: /*@
566:    VecTDotEnd - Ends a split phase transpose dot product computation.

568:    Input Parameters:
569: +  x - the first vector (can be PETSC_NULL)
570: .  y - the second vector (can be PETSC_NULL)
571: -  result - where the result will go

573:    Level: advanced

575:    Notes:
576:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

578: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
579:          VecDotBegin(), VecDotEnd()
580: @*/
581: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
582: {

586:   /*
587:       TDotEnd() is the same as DotEnd() so reuse the code
588:   */
589:   VecDotEnd(x,y,result);
590:   return(0);
591: }

593: /* -------------------------------------------------------------------------*/

597: /*@
598:    VecNormBegin - Starts a split phase norm computation.

600:    Input Parameters:
601: +  x - the first vector
602: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
603: -  result - where the result will go (can be PETSC_NULL)

605:    Level: advanced

607:    Notes:
608:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

610: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

612: @*/
613: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
614: {
615:   PetscErrorCode      ierr;
616:   PetscSplitReduction *sr;
617:   PetscReal           lresult[2];
618:   MPI_Comm            comm;

621:   PetscObjectGetComm((PetscObject)x,&comm);
622:   PetscSplitReductionGet(comm,&sr);
623:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
624:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
625:     PetscSplitReductionExtend(sr);
626:   }
627: 
628:   sr->invecs[sr->numopsbegin]     = (void*)x;
629:   if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
630:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
631:   (*x->ops->norm_local)(x,ntype,lresult);
632:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
633:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
634:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
635:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
636:   else                   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
637:   sr->lvalues[sr->numopsbegin++] = lresult[0];
638:   if (ntype == NORM_1_AND_2) {
639:     sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
640:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
641:   }
642:   return(0);
643: }

647: /*@
648:    VecNormEnd - Ends a split phase norm computation.

650:    Input Parameters:
651: +  x - the first vector (can be PETSC_NULL)
652: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
653: -  result - where the result will go

655:    Level: advanced

657:    Notes:
658:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

660: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

662: @*/
663: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
664: {
665:   PetscErrorCode      ierr;
666:   PetscSplitReduction *sr;
667:   MPI_Comm            comm;

670:   PetscObjectGetComm((PetscObject)x,&comm);
671:   PetscSplitReductionGet(comm,&sr);
672:   PetscSplitReductionEnd(sr);

674:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
675:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
676:   if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
677:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

679:   if (ntype == NORM_2) {
680:     result[0] = PetscSqrtReal(result[0]);
681:   } else if (ntype == NORM_1_AND_2) {
682:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
683:     result[1] = PetscSqrtReal(result[1]);
684:   }
685:   if (ntype!=NORM_1_AND_2) {
686:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
687:   }

689:   if (sr->numopsend == sr->numopsbegin) {
690:     sr->state        = STATE_BEGIN;
691:     sr->numopsend    = 0;
692:     sr->numopsbegin  = 0;
693:   }
694:   return(0);
695: }

697: /*
698:    Possibly add

700:      PetscReductionSumBegin/End()
701:      PetscReductionMaxBegin/End()
702:      PetscReductionMinBegin/End()
703:    or have more like MPI with a single function with flag for Op? Like first better
704: */

708: /*@
709:    VecMDotBegin - Starts a split phase multiple dot product computation.

711:    Input Parameters:
712: +   x - the first vector
713: .   nv - number of vectors
714: .   y - array of vectors
715: -   result - where the result will go (can be PETSC_NULL)

717:    Level: advanced

719:    Notes:
720:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

722: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
723:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
724: @*/
725: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
726: {
727:   PetscErrorCode      ierr;
728:   PetscSplitReduction *sr;
729:   MPI_Comm            comm;
730:   int                 i;

733:   PetscObjectGetComm((PetscObject)x,&comm);
734:   PetscSplitReductionGet(comm,&sr);
735:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
736:   for (i=0;i<nv;i++) {
737:     if (sr->numopsbegin+i >= sr->maxops) {
738:       PetscSplitReductionExtend(sr);
739:     }
740:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
741:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
742:   }
743:   if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
744:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
745:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
746:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
747:   sr->numopsbegin += nv;
748:   return(0);
749: }

753: /*@
754:    VecMDotEnd - Ends a split phase multiple dot product computation.

756:    Input Parameters:
757: +   x - the first vector (can be PETSC_NULL)
758: .   nv - number of vectors
759: -   y - array of vectors (can be PETSC_NULL)

761:    Output Parameters:
762: .   result - where the result will go

764:    Level: advanced

766:    Notes:
767:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

769: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
770:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()

772: @*/
773: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
774: {
775:   PetscErrorCode      ierr;
776:   PetscSplitReduction *sr;
777:   MPI_Comm            comm;
778:   int                 i;

781:   PetscObjectGetComm((PetscObject)x,&comm);
782:   PetscSplitReductionGet(comm,&sr);
783:   PetscSplitReductionEnd(sr);

785:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
786:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
787:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
788:   for (i=0;i<nv;i++) {
789:     result[i] = sr->gvalues[sr->numopsend++];
790:   }
791: 
792:   /*
793:      We are finished getting all the results so reset to no outstanding requests
794:   */
795:   if (sr->numopsend == sr->numopsbegin) {
796:     sr->state        = STATE_BEGIN;
797:     sr->numopsend    = 0;
798:     sr->numopsbegin  = 0;
799:   }
800:   return(0);
801: }

805: /*@
806:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

808:    Input Parameters:
809: +  x - the first vector
810: .  nv - number of vectors
811: .  y - array of  vectors
812: -  result - where the result will go (can be PETSC_NULL)

814:    Level: advanced

816:    Notes:
817:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

819: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
820:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()

822: @*/
823: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
824: {
825:   PetscErrorCode      ierr;
826:   PetscSplitReduction *sr;
827:   MPI_Comm            comm;
828:   int                 i;

831:   PetscObjectGetComm((PetscObject)x,&comm);
832:   PetscSplitReductionGet(comm,&sr);
833:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
834:   for (i=0;i<nv;i++) {
835:     if (sr->numopsbegin+i >= sr->maxops) {
836:       PetscSplitReductionExtend(sr);
837:     }
838:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
839:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
840:   }
841:   if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
842:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
843:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
844:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
845:   sr->numopsbegin += nv;
846:   return(0);
847: }

851: /*@
852:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

854:    Input Parameters:
855: +  x - the first vector (can be PETSC_NULL)
856: .  nv - number of vectors
857: -  y - array of  vectors (can be PETSC_NULL)

859:    Output Parameters
860: .  result - where the result will go

862:    Level: advanced

864:    Notes:
865:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

867: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
868:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
869: @*/
870: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
871: {

875:   /*
876:       MTDotEnd() is the same as MDotEnd() so reuse the code
877:   */
878:   VecMDotEnd(x,nv,y,result);
879:   return(0);
880: }