Actual source code: comb.c
1: /*
2: Split phase global vector reductions with support for combining the
3: communication portion of several operations. Using MPI-1.1 support only
5: The idea for this and much of the initial code is contributed by
6: Victor Eijkhout.
8: Usage:
9: VecDotBegin(Vec,Vec,PetscScalar *);
10: VecNormBegin(Vec,NormType,PetscReal *);
11: ....
12: VecDotEnd(Vec,Vec,PetscScalar *);
13: VecNormEnd(Vec,NormType,PetscReal *);
15: Limitations:
16: - The order of the xxxEnd() functions MUST be in the same order
17: as the xxxBegin(). There is extensive error checking to try to
18: insure that the user calls the routines in the correct order
19: */
21: #include <petsc/private/vecimpl.h>
23: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
24: {
25: PetscFunctionBegin;
26: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
27: PetscCallMPI(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request));
28: #else
29: PetscCall(MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm));
30: *request = MPI_REQUEST_NULL;
31: #endif
32: PetscFunctionReturn(PETSC_SUCCESS);
33: }
35: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);
37: /*
38: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
39: */
40: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
41: {
42: PetscFunctionBegin;
43: PetscCall(PetscNew(sr));
44: (*sr)->numopsbegin = 0;
45: (*sr)->numopsend = 0;
46: (*sr)->state = STATE_BEGIN;
47: #define MAXOPS 32
48: (*sr)->maxops = MAXOPS;
49: PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
50: #undef MAXOPS
51: (*sr)->comm = comm;
52: (*sr)->request = MPI_REQUEST_NULL;
53: (*sr)->mix = PETSC_FALSE;
54: (*sr)->async = PETSC_FALSE;
55: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
56: (*sr)->async = PETSC_TRUE; /* Enable by default */
57: #endif
58: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
59: PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
60: PetscFunctionReturn(PETSC_SUCCESS);
61: }
63: /*
64: This function is the MPI reduction operation used when there is
65: a combination of sums and max in the reduction. The call below to
66: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
67: MPI operator PetscSplitReduction_Op.
68: */
69: MPI_Op PetscSplitReduction_Op = 0;
71: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
72: {
73: struct PetscScalarInt {
74: PetscScalar v;
75: PetscInt i;
76: };
77: struct PetscScalarInt *xin = (struct PetscScalarInt *)in;
78: struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
79: PetscInt i, count = (PetscInt)*cnt;
81: PetscFunctionBegin;
82: if (*datatype != MPIU_SCALAR_INT) {
83: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
84: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
85: }
86: for (i = 0; i < count; i++) {
87: if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
88: else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
89: else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
90: else {
91: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
92: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
93: }
94: }
95: PetscFunctionReturnVoid();
96: }
98: /*@
99: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
101: Collective but not synchronizing
103: Input Parameter:
104: . comm - communicator on which split reduction has been queued
106: Level: advanced
108: Note:
109: Calling this function is optional when using split-mode reduction. On supporting hardware,
110: calling this after all VecXxxBegin() allows the reduction to make asynchronous progress
111: before the result is needed (in VecXxxEnd()).
113: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
114: @*/
115: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
116: {
117: PetscSplitReduction *sr;
119: PetscFunctionBegin;
120: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
121: PetscCall(PetscSplitReductionGet(comm, &sr));
122: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
123: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
124: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
125: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
126: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
127: MPI_Comm comm = sr->comm;
128: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
130: PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
131: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
132: if (size == 1) {
133: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
134: } else {
135: /* determine if all reductions are sum, max, or min */
136: for (i = 0; i < numops; i++) {
137: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
138: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
139: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
140: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
141: }
142: PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
143: if (sum_flg + max_flg + min_flg > 1) {
144: sr->mix = PETSC_TRUE;
145: for (i = 0; i < numops; i++) {
146: sr->lvalues_mix[i].v = lvalues[i];
147: sr->lvalues_mix[i].i = reducetype[i];
148: }
149: PetscCall(MPIPetsc_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
150: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
151: PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
152: } else if (min_flg) {
153: PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
154: } else {
155: PetscCall(MPIPetsc_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
156: }
157: }
158: sr->state = STATE_PENDING;
159: sr->numopsend = 0;
160: PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
161: } else {
162: PetscCall(PetscSplitReductionApply(sr));
163: }
164: PetscFunctionReturn(PETSC_SUCCESS);
165: }
167: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
168: {
169: PetscFunctionBegin;
170: switch (sr->state) {
171: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
172: PetscCall(PetscSplitReductionApply(sr));
173: break;
174: case STATE_PENDING:
175: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
176: PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
177: if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
178: sr->state = STATE_END;
179: if (sr->mix) {
180: PetscInt i;
181: for (i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
182: sr->mix = PETSC_FALSE;
183: }
184: PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
185: break;
186: default:
187: break; /* everything is already done */
188: }
189: PetscFunctionReturn(PETSC_SUCCESS);
190: }
192: /*
193: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
194: */
195: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
196: {
197: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
198: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
199: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
200: MPI_Comm comm = sr->comm;
201: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
203: PetscFunctionBegin;
204: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
205: PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
206: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
207: if (size == 1) {
208: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
209: } else {
210: /* determine if all reductions are sum, max, or min */
211: for (i = 0; i < numops; i++) {
212: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
213: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
214: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
215: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
216: }
217: if (sum_flg + max_flg + min_flg > 1) {
218: PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
219: for (i = 0; i < numops; i++) {
220: sr->lvalues_mix[i].v = lvalues[i];
221: sr->lvalues_mix[i].i = reducetype[i];
222: }
223: PetscCall(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
224: for (i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
225: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
226: PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
227: } else if (min_flg) {
228: PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
229: } else {
230: PetscCall(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
231: }
232: }
233: sr->state = STATE_END;
234: sr->numopsend = 0;
235: PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
236: PetscFunctionReturn(PETSC_SUCCESS);
237: }
239: /*
240: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
241: */
242: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
243: {
244: struct PetscScalarInt {
245: PetscScalar v;
246: PetscInt i;
247: };
248: PetscInt maxops = sr->maxops, *reducetype = sr->reducetype;
249: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
250: struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
251: struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
252: void **invecs = sr->invecs;
254: PetscFunctionBegin;
255: sr->maxops = 2 * maxops;
256: PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
257: PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
258: PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
259: PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
260: PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
261: PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
262: PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
263: PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
264: PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: static PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
268: {
269: PetscFunctionBegin;
270: PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
271: PetscCall(PetscFree(sr));
272: PetscFunctionReturn(PETSC_SUCCESS);
273: }
275: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
277: /*
278: Private routine to delete internal storage when a communicator is freed.
279: This is called by MPI, not by users.
281: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
282: it was MPI_Comm *comm.
283: */
284: static PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PETSC_UNUSED PetscMPIInt keyval, void *attr_val, PETSC_UNUSED void *extra_state)
285: {
286: PetscFunctionBegin;
287: PetscCallMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
288: PetscCallMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
289: PetscFunctionReturn(PETSC_SUCCESS);
290: }
292: /*
293: PetscSplitReductionGet - Gets the split reduction object from a
294: PETSc vector, creates if it does not exit.
296: */
297: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
298: {
299: PetscMPIInt flag;
301: PetscFunctionBegin;
302: PetscCheck(!PetscDefined(HAVE_THREADSAFETY), comm, PETSC_ERR_SUP, "PetscSplitReductionGet() is not thread-safe");
303: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
304: /*
305: The calling sequence of the 2nd argument to this function changed
306: between MPI Standard 1.0 and the revisions 1.1 Here we match the
307: new standard, if you are using an MPI implementation that uses
308: the older version you will get a warning message about the next line;
309: it is only a warning message and should do no harm.
310: */
311: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
312: }
313: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag));
314: if (!flag) { /* doesn't exist yet so create it and put it in */
315: PetscCall(PetscSplitReductionCreate(comm, sr));
316: PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
317: PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
318: }
319: PetscFunctionReturn(PETSC_SUCCESS);
320: }
322: /* ----------------------------------------------------------------------------------------------------*/
324: /*@
325: VecDotBegin - Starts a split phase dot product computation.
327: Input Parameters:
328: + x - the first vector
329: . y - the second vector
330: - result - where the result will go (can be NULL)
332: Level: advanced
334: Notes:
335: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
337: .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
338: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
339: @*/
340: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
341: {
342: PetscSplitReduction *sr;
343: MPI_Comm comm;
345: PetscFunctionBegin;
348: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
349: if (PetscDefined(HAVE_THREADSAFETY)) {
350: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
351: PetscCall(VecDot(x, y, result));
352: PetscFunctionReturn(PETSC_SUCCESS);
353: }
354: PetscCall(PetscSplitReductionGet(comm, &sr));
355: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
356: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
357: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
358: sr->invecs[sr->numopsbegin] = (void *)x;
359: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
360: PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
361: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
362: PetscFunctionReturn(PETSC_SUCCESS);
363: }
365: /*@
366: VecDotEnd - Ends a split phase dot product computation.
368: Input Parameters:
369: + x - the first vector (can be `NULL`)
370: . y - the second vector (can be `NULL`)
371: - result - where the result will go
373: Level: advanced
375: Notes:
376: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
378: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
379: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
380: @*/
381: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
382: {
383: PetscSplitReduction *sr;
384: MPI_Comm comm;
386: PetscFunctionBegin;
387: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
388: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
389: PetscCall(PetscSplitReductionGet(comm, &sr));
390: PetscCall(PetscSplitReductionEnd(sr));
392: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
393: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
394: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
395: *result = sr->gvalues[sr->numopsend++];
397: /*
398: We are finished getting all the results so reset to no outstanding requests
399: */
400: if (sr->numopsend == sr->numopsbegin) {
401: sr->state = STATE_BEGIN;
402: sr->numopsend = 0;
403: sr->numopsbegin = 0;
404: sr->mix = PETSC_FALSE;
405: }
406: PetscFunctionReturn(PETSC_SUCCESS);
407: }
409: /*@
410: VecTDotBegin - Starts a split phase transpose dot product computation.
412: Input Parameters:
413: + x - the first vector
414: . y - the second vector
415: - result - where the result will go (can be `NULL`)
417: Level: advanced
419: Notes:
420: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
422: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
423: `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
424: @*/
425: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
426: {
427: PetscSplitReduction *sr;
428: MPI_Comm comm;
430: PetscFunctionBegin;
431: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
432: if (PetscDefined(HAVE_THREADSAFETY)) {
433: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
434: PetscCall(VecTDot(x, y, result));
435: PetscFunctionReturn(PETSC_SUCCESS);
436: }
437: PetscCall(PetscSplitReductionGet(comm, &sr));
438: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
439: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
440: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
441: sr->invecs[sr->numopsbegin] = (void *)x;
442: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
443: PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
444: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
445: PetscFunctionReturn(PETSC_SUCCESS);
446: }
448: /*@
449: VecTDotEnd - Ends a split phase transpose dot product computation.
451: Input Parameters:
452: + x - the first vector (can be `NULL`)
453: . y - the second vector (can be `NULL`)
454: - result - where the result will go
456: Level: advanced
458: Notes:
459: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
461: .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
462: `VecDotBegin()`, `VecDotEnd()`
463: @*/
464: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
465: {
466: PetscFunctionBegin;
467: /*
468: TDotEnd() is the same as DotEnd() so reuse the code
469: */
470: PetscCall(VecDotEnd(x, y, result));
471: PetscFunctionReturn(PETSC_SUCCESS);
472: }
474: /* -------------------------------------------------------------------------*/
476: /*@
477: VecNormBegin - Starts a split phase norm computation.
479: Input Parameters:
480: + x - the first vector
481: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
482: - result - where the result will go (can be `NULL`)
484: Level: advanced
486: Notes:
487: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
489: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
490: @*/
491: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
492: {
493: PetscSplitReduction *sr;
494: PetscReal lresult[2];
495: MPI_Comm comm;
497: PetscFunctionBegin;
499: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
500: if (PetscDefined(HAVE_THREADSAFETY)) {
501: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
502: PetscCall(PetscObjectStateIncrease((PetscObject)x)); // increase PetscObjectState to invalidate cached norms
503: PetscCall(VecNorm(x, ntype, result));
504: PetscFunctionReturn(PETSC_SUCCESS);
505: }
506: PetscCall(PetscSplitReductionGet(comm, &sr));
507: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
508: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));
510: sr->invecs[sr->numopsbegin] = (void *)x;
511: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
512: PetscUseTypeMethod(x, norm_local, ntype, lresult);
513: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
514: if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
515: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
516: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
517: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
518: sr->lvalues[sr->numopsbegin++] = lresult[0];
519: if (ntype == NORM_1_AND_2) {
520: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
521: sr->lvalues[sr->numopsbegin++] = lresult[1];
522: }
523: PetscFunctionReturn(PETSC_SUCCESS);
524: }
526: /*@
527: VecNormEnd - Ends a split phase norm computation.
529: Input Parameters:
530: + x - the first vector
531: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
532: - result - where the result will go
534: Level: advanced
536: Notes:
537: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
539: The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value
541: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
542: @*/
543: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
544: {
545: PetscSplitReduction *sr;
546: MPI_Comm comm;
548: PetscFunctionBegin;
549: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
551: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
552: PetscCall(PetscSplitReductionGet(comm, &sr));
553: PetscCall(PetscSplitReductionEnd(sr));
555: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
556: PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
557: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
558: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
560: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
561: else if (ntype == NORM_1_AND_2) {
562: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
563: result[1] = PetscSqrtReal(result[1]);
564: }
565: if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));
567: if (sr->numopsend == sr->numopsbegin) {
568: sr->state = STATE_BEGIN;
569: sr->numopsend = 0;
570: sr->numopsbegin = 0;
571: }
572: PetscFunctionReturn(PETSC_SUCCESS);
573: }
575: /*
576: Possibly add
578: PetscReductionSumBegin/End()
579: PetscReductionMaxBegin/End()
580: PetscReductionMinBegin/End()
581: or have more like MPI with a single function with flag for Op? Like first better
582: */
584: /*@
585: VecMDotBegin - Starts a split phase multiple dot product computation.
587: Input Parameters:
588: + x - the first vector
589: . nv - number of vectors
590: . y - array of vectors
591: - result - where the result will go (can be `NULL`)
593: Level: advanced
595: Notes:
596: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
598: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
599: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
600: @*/
601: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
602: {
603: PetscSplitReduction *sr;
604: MPI_Comm comm;
605: PetscInt i;
607: PetscFunctionBegin;
608: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
609: if (PetscDefined(HAVE_THREADSAFETY)) {
610: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
611: PetscCall(VecMDot(x, nv, y, result));
612: PetscFunctionReturn(PETSC_SUCCESS);
613: }
614: PetscCall(PetscSplitReductionGet(comm, &sr));
615: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
616: for (i = 0; i < nv; i++) {
617: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
618: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
619: sr->invecs[sr->numopsbegin + i] = (void *)x;
620: }
621: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
622: PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
623: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
624: sr->numopsbegin += nv;
625: PetscFunctionReturn(PETSC_SUCCESS);
626: }
628: /*@
629: VecMDotEnd - Ends a split phase multiple dot product computation.
631: Input Parameters:
632: + x - the first vector (can be `NULL`)
633: . nv - number of vectors
634: - y - array of vectors (can be `NULL`)
636: Output Parameter:
637: . result - where the result will go
639: Level: advanced
641: Notes:
642: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
644: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
645: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
646: @*/
647: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
648: {
649: PetscSplitReduction *sr;
650: MPI_Comm comm;
651: PetscInt i;
653: PetscFunctionBegin;
654: if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
655: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
656: PetscCall(PetscSplitReductionGet(comm, &sr));
657: PetscCall(PetscSplitReductionEnd(sr));
659: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
660: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
661: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
662: for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];
664: /*
665: We are finished getting all the results so reset to no outstanding requests
666: */
667: if (sr->numopsend == sr->numopsbegin) {
668: sr->state = STATE_BEGIN;
669: sr->numopsend = 0;
670: sr->numopsbegin = 0;
671: }
672: PetscFunctionReturn(PETSC_SUCCESS);
673: }
675: /*@
676: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
678: Input Parameters:
679: + x - the first vector
680: . nv - number of vectors
681: . y - array of vectors
682: - result - where the result will go (can be `NULL`)
684: Level: advanced
686: Notes:
687: Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.
689: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
690: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
691: @*/
692: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
693: {
694: PetscSplitReduction *sr;
695: MPI_Comm comm;
696: PetscInt i;
698: PetscFunctionBegin;
699: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
700: if (PetscDefined(HAVE_THREADSAFETY)) {
701: PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
702: PetscCall(VecMTDot(x, nv, y, result));
703: PetscFunctionReturn(PETSC_SUCCESS);
704: }
705: PetscCall(PetscSplitReductionGet(comm, &sr));
706: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
707: for (i = 0; i < nv; i++) {
708: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
709: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
710: sr->invecs[sr->numopsbegin + i] = (void *)x;
711: }
712: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
713: PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
714: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
715: sr->numopsbegin += nv;
716: PetscFunctionReturn(PETSC_SUCCESS);
717: }
719: /*@
720: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
722: Input Parameters:
723: + x - the first vector (can be `NULL`)
724: . nv - number of vectors
725: - y - array of vectors (can be `NULL`)
727: Output Parameter:
728: . result - where the result will go
730: Level: advanced
732: Notes:
733: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
735: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
736: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
737: @*/
738: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
739: {
740: PetscFunctionBegin;
741: /*
742: MTDotEnd() is the same as MDotEnd() so reuse the code
743: */
744: PetscCall(VecMDotEnd(x, nv, y, result));
745: PetscFunctionReturn(PETSC_SUCCESS);
746: }