Actual source code: comb.c

  1: /*
  2:       Split phase global vector reductions with support for combining the
  3:    communication portion of several operations. Using MPI-1.1 support only

  5:       The idea for this and much of the initial code is contributed by
  6:    Victor Eijkhout.

  8:        Usage:
  9:              VecDotBegin(Vec,Vec,PetscScalar *);
 10:              VecNormBegin(Vec,NormType,PetscReal *);
 11:              ....
 12:              VecDotEnd(Vec,Vec,PetscScalar *);
 13:              VecNormEnd(Vec,NormType,PetscReal *);

 15:        Limitations:
 16:          - The order of the xxxEnd() functions MUST be in the same order
 17:            as the xxxBegin(). There is extensive error checking to try to
 18:            insure that the user calls the routines in the correct order
 19: */

 21: #include <petsc/private/vecimpl.h>

 23: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
 24: {
 25:   PetscFunctionBegin;
 26: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 27:   PetscCallMPI(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request));
 28: #else
 29:   PetscCall(MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm));
 30:   *request = MPI_REQUEST_NULL;
 31: #endif
 32:   PetscFunctionReturn(PETSC_SUCCESS);
 33: }

 35: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);

 37: /*
 38:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 39: */
 40: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
 41: {
 42:   PetscFunctionBegin;
 43:   PetscCall(PetscNew(sr));
 44:   (*sr)->numopsbegin = 0;
 45:   (*sr)->numopsend   = 0;
 46:   (*sr)->state       = STATE_BEGIN;
 47: #define MAXOPS 32
 48:   (*sr)->maxops = MAXOPS;
 49:   PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
 50: #undef MAXOPS
 51:   (*sr)->comm    = comm;
 52:   (*sr)->request = MPI_REQUEST_NULL;
 53:   (*sr)->mix     = PETSC_FALSE;
 54:   (*sr)->async   = PETSC_FALSE;
 55: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 56:   (*sr)->async = PETSC_TRUE; /* Enable by default */
 57: #endif
 58:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 59:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
 60:   PetscFunctionReturn(PETSC_SUCCESS);
 61: }

 63: /*
 64:        This function is the MPI reduction operation used when there is
 65:    a combination of sums and max in the reduction. The call below to
 66:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 67:    MPI operator PetscSplitReduction_Op.
 68: */
 69: MPI_Op PetscSplitReduction_Op = 0;

 71: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
 72: {
 73:   struct PetscScalarInt {
 74:     PetscScalar v;
 75:     PetscInt    i;
 76:   };
 77:   struct PetscScalarInt *xin  = (struct PetscScalarInt *)in;
 78:   struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
 79:   PetscInt               i, count = (PetscInt)*cnt;

 81:   PetscFunctionBegin;
 82:   if (*datatype != MPIU_SCALAR_INT) {
 83:     PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
 84:     PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 85:   }
 86:   for (i = 0; i < count; i++) {
 87:     if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
 88:     else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 89:     else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
 90:     else {
 91:       PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
 92:       PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 93:     }
 94:   }
 95:   PetscFunctionReturnVoid();
 96: }

 98: /*@
 99:   PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

101:   Collective but not synchronizing

103:   Input Parameter:
104: . comm - communicator on which split reduction has been queued

106:   Level: advanced

108:   Note:
109:   Calling this function is optional when using split-mode reduction. On supporting hardware,
110:   calling this after all VecXxxBegin() allows the reduction to make asynchronous progress
111:   before the result is needed (in VecXxxEnd()).

113: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
114: @*/
115: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
116: {
117:   PetscSplitReduction *sr;

119:   PetscFunctionBegin;
120:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
121:   PetscCall(PetscSplitReductionGet(comm, &sr));
122:   PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
123:   if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
124:     PetscInt     i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
125:     PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
126:     PetscInt     sum_flg = 0, max_flg = 0, min_flg = 0;
127:     MPI_Comm     comm = sr->comm;
128:     PetscMPIInt  size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

130:     PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
131:     PetscCallMPI(MPI_Comm_size(sr->comm, &size));
132:     if (size == 1) {
133:       PetscCall(PetscArraycpy(gvalues, lvalues, numops));
134:     } else {
135:       /* determine if all reductions are sum, max, or min */
136:       for (i = 0; i < numops; i++) {
137:         if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
138:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
139:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
140:         else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
141:       }
142:       PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
143:       if (sum_flg + max_flg + min_flg > 1) {
144:         sr->mix = PETSC_TRUE;
145:         for (i = 0; i < numops; i++) {
146:           sr->lvalues_mix[i].v = lvalues[i];
147:           sr->lvalues_mix[i].i = reducetype[i];
148:         }
149:         PetscCall(MPIPetsc_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
150:       } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
151:         PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
152:       } else if (min_flg) {
153:         PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
154:       } else {
155:         PetscCall(MPIPetsc_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
156:       }
157:     }
158:     sr->state     = STATE_PENDING;
159:     sr->numopsend = 0;
160:     PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
161:   } else {
162:     PetscCall(PetscSplitReductionApply(sr));
163:   }
164:   PetscFunctionReturn(PETSC_SUCCESS);
165: }

167: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
168: {
169:   PetscFunctionBegin;
170:   switch (sr->state) {
171:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
172:     PetscCall(PetscSplitReductionApply(sr));
173:     break;
174:   case STATE_PENDING:
175:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
176:     PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
177:     if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
178:     sr->state = STATE_END;
179:     if (sr->mix) {
180:       PetscInt i;
181:       for (i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
182:       sr->mix = PETSC_FALSE;
183:     }
184:     PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
185:     break;
186:   default:
187:     break; /* everything is already done */
188:   }
189:   PetscFunctionReturn(PETSC_SUCCESS);
190: }

192: /*
193:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
194: */
195: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
196: {
197:   PetscInt     i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
198:   PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
199:   PetscInt     sum_flg = 0, max_flg = 0, min_flg = 0;
200:   MPI_Comm     comm = sr->comm;
201:   PetscMPIInt  size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);

203:   PetscFunctionBegin;
204:   PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
205:   PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
206:   PetscCallMPI(MPI_Comm_size(sr->comm, &size));
207:   if (size == 1) {
208:     PetscCall(PetscArraycpy(gvalues, lvalues, numops));
209:   } else {
210:     /* determine if all reductions are sum, max, or min */
211:     for (i = 0; i < numops; i++) {
212:       if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
213:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
214:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
215:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
216:     }
217:     if (sum_flg + max_flg + min_flg > 1) {
218:       PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
219:       for (i = 0; i < numops; i++) {
220:         sr->lvalues_mix[i].v = lvalues[i];
221:         sr->lvalues_mix[i].i = reducetype[i];
222:       }
223:       PetscCall(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
224:       for (i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
225:     } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
226:       PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
227:     } else if (min_flg) {
228:       PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
229:     } else {
230:       PetscCall(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
231:     }
232:   }
233:   sr->state     = STATE_END;
234:   sr->numopsend = 0;
235:   PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
236:   PetscFunctionReturn(PETSC_SUCCESS);
237: }

239: /*
240:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
241: */
242: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
243: {
244:   struct PetscScalarInt {
245:     PetscScalar v;
246:     PetscInt    i;
247:   };
248:   PetscInt               maxops = sr->maxops, *reducetype = sr->reducetype;
249:   PetscScalar           *lvalues = sr->lvalues, *gvalues = sr->gvalues;
250:   struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
251:   struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
252:   void                 **invecs      = sr->invecs;

254:   PetscFunctionBegin;
255:   sr->maxops = 2 * maxops;
256:   PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
257:   PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
258:   PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
259:   PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
260:   PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
261:   PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
262:   PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
263:   PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }

267: static PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
268: {
269:   PetscFunctionBegin;
270:   PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
271:   PetscCall(PetscFree(sr));
272:   PetscFunctionReturn(PETSC_SUCCESS);
273: }

275: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

277: /*
278:    Private routine to delete internal storage when a communicator is freed.
279:   This is called by MPI, not by users.

281:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
282:   it was MPI_Comm *comm.
283: */
284: static PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PETSC_UNUSED PetscMPIInt keyval, void *attr_val, PETSC_UNUSED void *extra_state)
285: {
286:   PetscFunctionBegin;
287:   PetscCallMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
288:   PetscCallMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
289:   PetscFunctionReturn(PETSC_SUCCESS);
290: }

292: /*
293:      PetscSplitReductionGet - Gets the split reduction object from a
294:         PETSc vector, creates if it does not exit.

296: */
297: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
298: {
299:   PetscMPIInt flag;

301:   PetscFunctionBegin;
302:   PetscCheck(!PetscDefined(HAVE_THREADSAFETY), comm, PETSC_ERR_SUP, "PetscSplitReductionGet() is not thread-safe");
303:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
304:     /*
305:        The calling sequence of the 2nd argument to this function changed
306:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
307:        new standard, if you are using an MPI implementation that uses
308:        the older version you will get a warning message about the next line;
309:        it is only a warning message and should do no harm.
310:     */
311:     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
312:   }
313:   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag));
314:   if (!flag) { /* doesn't exist yet so create it and put it in */
315:     PetscCall(PetscSplitReductionCreate(comm, sr));
316:     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
317:     PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
318:   }
319:   PetscFunctionReturn(PETSC_SUCCESS);
320: }

322: /* ----------------------------------------------------------------------------------------------------*/

324: /*@
325:   VecDotBegin - Starts a split phase dot product computation.

327:   Input Parameters:
328: + x      - the first vector
329: . y      - the second vector
330: - result - where the result will go (can be NULL)

332:   Level: advanced

334:   Notes:
335:   Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.

337: .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
338:           `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
339: @*/
340: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
341: {
342:   PetscSplitReduction *sr;
343:   MPI_Comm             comm;

345:   PetscFunctionBegin;
348:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
349:   if (PetscDefined(HAVE_THREADSAFETY)) {
350:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
351:     PetscCall(VecDot(x, y, result));
352:     PetscFunctionReturn(PETSC_SUCCESS);
353:   }
354:   PetscCall(PetscSplitReductionGet(comm, &sr));
355:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
356:   if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
357:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
358:   sr->invecs[sr->numopsbegin]     = (void *)x;
359:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
360:   PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
361:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
362:   PetscFunctionReturn(PETSC_SUCCESS);
363: }

365: /*@
366:   VecDotEnd - Ends a split phase dot product computation.

368:   Input Parameters:
369: + x      - the first vector (can be `NULL`)
370: . y      - the second vector (can be `NULL`)
371: - result - where the result will go

373:   Level: advanced

375:   Notes:
376:   Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.

378: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
379:           `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
380: @*/
381: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
382: {
383:   PetscSplitReduction *sr;
384:   MPI_Comm             comm;

386:   PetscFunctionBegin;
387:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
388:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
389:   PetscCall(PetscSplitReductionGet(comm, &sr));
390:   PetscCall(PetscSplitReductionEnd(sr));

392:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
393:   PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
394:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
395:   *result = sr->gvalues[sr->numopsend++];

397:   /*
398:      We are finished getting all the results so reset to no outstanding requests
399:   */
400:   if (sr->numopsend == sr->numopsbegin) {
401:     sr->state       = STATE_BEGIN;
402:     sr->numopsend   = 0;
403:     sr->numopsbegin = 0;
404:     sr->mix         = PETSC_FALSE;
405:   }
406:   PetscFunctionReturn(PETSC_SUCCESS);
407: }

409: /*@
410:   VecTDotBegin - Starts a split phase transpose dot product computation.

412:   Input Parameters:
413: + x      - the first vector
414: . y      - the second vector
415: - result - where the result will go (can be `NULL`)

417:   Level: advanced

419:   Notes:
420:   Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

422: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
423:           `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
424: @*/
425: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
426: {
427:   PetscSplitReduction *sr;
428:   MPI_Comm             comm;

430:   PetscFunctionBegin;
431:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
432:   if (PetscDefined(HAVE_THREADSAFETY)) {
433:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
434:     PetscCall(VecTDot(x, y, result));
435:     PetscFunctionReturn(PETSC_SUCCESS);
436:   }
437:   PetscCall(PetscSplitReductionGet(comm, &sr));
438:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
439:   if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
440:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
441:   sr->invecs[sr->numopsbegin]     = (void *)x;
442:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
443:   PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
444:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
445:   PetscFunctionReturn(PETSC_SUCCESS);
446: }

448: /*@
449:   VecTDotEnd - Ends a split phase transpose dot product computation.

451:   Input Parameters:
452: + x      - the first vector (can be `NULL`)
453: . y      - the second vector (can be `NULL`)
454: - result - where the result will go

456:   Level: advanced

458:   Notes:
459:   Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

461: .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
462:           `VecDotBegin()`, `VecDotEnd()`
463: @*/
464: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
465: {
466:   PetscFunctionBegin;
467:   /*
468:       TDotEnd() is the same as DotEnd() so reuse the code
469:   */
470:   PetscCall(VecDotEnd(x, y, result));
471:   PetscFunctionReturn(PETSC_SUCCESS);
472: }

474: /* -------------------------------------------------------------------------*/

476: /*@
477:   VecNormBegin - Starts a split phase norm computation.

479:   Input Parameters:
480: + x      - the first vector
481: . ntype  - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
482: - result - where the result will go (can be `NULL`)

484:   Level: advanced

486:   Notes:
487:   Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.

489: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
490: @*/
491: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
492: {
493:   PetscSplitReduction *sr;
494:   PetscReal            lresult[2];
495:   MPI_Comm             comm;

497:   PetscFunctionBegin;
499:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
500:   if (PetscDefined(HAVE_THREADSAFETY)) {
501:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
502:     PetscCall(PetscObjectStateIncrease((PetscObject)x)); // increase PetscObjectState to invalidate cached norms
503:     PetscCall(VecNorm(x, ntype, result));
504:     PetscFunctionReturn(PETSC_SUCCESS);
505:   }
506:   PetscCall(PetscSplitReductionGet(comm, &sr));
507:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
508:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));

510:   sr->invecs[sr->numopsbegin] = (void *)x;
511:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
512:   PetscUseTypeMethod(x, norm_local, ntype, lresult);
513:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
514:   if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
515:   if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
516:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
517:   else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
518:   sr->lvalues[sr->numopsbegin++] = lresult[0];
519:   if (ntype == NORM_1_AND_2) {
520:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
521:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
522:   }
523:   PetscFunctionReturn(PETSC_SUCCESS);
524: }

526: /*@
527:   VecNormEnd - Ends a split phase norm computation.

529:   Input Parameters:
530: + x      - the first vector
531: . ntype  - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
532: - result - where the result will go

534:   Level: advanced

536:   Notes:
537:   Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.

539:   The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value

541: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
542: @*/
543: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
544: {
545:   PetscSplitReduction *sr;
546:   MPI_Comm             comm;

548:   PetscFunctionBegin;
549:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
551:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
552:   PetscCall(PetscSplitReductionGet(comm, &sr));
553:   PetscCall(PetscSplitReductionEnd(sr));

555:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
556:   PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
557:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
558:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

560:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
561:   else if (ntype == NORM_1_AND_2) {
562:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
563:     result[1] = PetscSqrtReal(result[1]);
564:   }
565:   if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));

567:   if (sr->numopsend == sr->numopsbegin) {
568:     sr->state       = STATE_BEGIN;
569:     sr->numopsend   = 0;
570:     sr->numopsbegin = 0;
571:   }
572:   PetscFunctionReturn(PETSC_SUCCESS);
573: }

575: /*
576:    Possibly add

578:      PetscReductionSumBegin/End()
579:      PetscReductionMaxBegin/End()
580:      PetscReductionMinBegin/End()
581:    or have more like MPI with a single function with flag for Op? Like first better
582: */

584: /*@
585:   VecMDotBegin - Starts a split phase multiple dot product computation.

587:   Input Parameters:
588: + x      - the first vector
589: . nv     - number of vectors
590: . y      - array of vectors
591: - result - where the result will go (can be `NULL`)

593:   Level: advanced

595:   Notes:
596:   Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.

598: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
599:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
600: @*/
601: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
602: {
603:   PetscSplitReduction *sr;
604:   MPI_Comm             comm;
605:   PetscInt             i;

607:   PetscFunctionBegin;
608:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
609:   if (PetscDefined(HAVE_THREADSAFETY)) {
610:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
611:     PetscCall(VecMDot(x, nv, y, result));
612:     PetscFunctionReturn(PETSC_SUCCESS);
613:   }
614:   PetscCall(PetscSplitReductionGet(comm, &sr));
615:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
616:   for (i = 0; i < nv; i++) {
617:     if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
618:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
619:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
620:   }
621:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
622:   PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
623:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
624:   sr->numopsbegin += nv;
625:   PetscFunctionReturn(PETSC_SUCCESS);
626: }

628: /*@
629:   VecMDotEnd - Ends a split phase multiple dot product computation.

631:   Input Parameters:
632: + x  - the first vector (can be `NULL`)
633: . nv - number of vectors
634: - y  - array of vectors (can be `NULL`)

636:   Output Parameter:
637: . result - where the result will go

639:   Level: advanced

641:   Notes:
642:   Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.

644: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
645:           `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
646: @*/
647: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
648: {
649:   PetscSplitReduction *sr;
650:   MPI_Comm             comm;
651:   PetscInt             i;

653:   PetscFunctionBegin;
654:   if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
655:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
656:   PetscCall(PetscSplitReductionGet(comm, &sr));
657:   PetscCall(PetscSplitReductionEnd(sr));

659:   PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
660:   PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
661:   PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
662:   for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];

664:   /*
665:      We are finished getting all the results so reset to no outstanding requests
666:   */
667:   if (sr->numopsend == sr->numopsbegin) {
668:     sr->state       = STATE_BEGIN;
669:     sr->numopsend   = 0;
670:     sr->numopsbegin = 0;
671:   }
672:   PetscFunctionReturn(PETSC_SUCCESS);
673: }

675: /*@
676:   VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

678:   Input Parameters:
679: + x      - the first vector
680: . nv     - number of vectors
681: . y      - array of  vectors
682: - result - where the result will go (can be `NULL`)

684:   Level: advanced

686:   Notes:
687:   Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.

689: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
690:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
691: @*/
692: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
693: {
694:   PetscSplitReduction *sr;
695:   MPI_Comm             comm;
696:   PetscInt             i;

698:   PetscFunctionBegin;
699:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
700:   if (PetscDefined(HAVE_THREADSAFETY)) {
701:     PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
702:     PetscCall(VecMTDot(x, nv, y, result));
703:     PetscFunctionReturn(PETSC_SUCCESS);
704:   }
705:   PetscCall(PetscSplitReductionGet(comm, &sr));
706:   PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
707:   for (i = 0; i < nv; i++) {
708:     if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
709:     sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
710:     sr->invecs[sr->numopsbegin + i]     = (void *)x;
711:   }
712:   PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
713:   PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
714:   PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
715:   sr->numopsbegin += nv;
716:   PetscFunctionReturn(PETSC_SUCCESS);
717: }

719: /*@
720:   VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

722:   Input Parameters:
723: + x  - the first vector (can be `NULL`)
724: . nv - number of vectors
725: - y  - array of  vectors (can be `NULL`)

727:   Output Parameter:
728: . result - where the result will go

730:   Level: advanced

732:   Notes:
733:   Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.

735: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
736:           `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
737: @*/
738: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
739: {
740:   PetscFunctionBegin;
741:   /*
742:       MTDotEnd() is the same as MDotEnd() so reuse the code
743:   */
744:   PetscCall(VecMDotEnd(x, nv, y, result));
745:   PetscFunctionReturn(PETSC_SUCCESS);
746: }