Actual source code: sfwindow.c

  1: #include <petsc/private/sfimpl.h>

  3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
  4: typedef struct _n_PetscSFWinLink  *PetscSFWinLink;

  6: typedef struct {
  7:   PetscSFWindowSyncType   sync;   /* FENCE, LOCK, or ACTIVE synchronization */
  8:   PetscSFDataLink         link;   /* List of MPI data types, lazily constructed for each data type */
  9:   PetscSFWinLink          wins;   /* List of active windows */
 10:   PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
 11:   PetscSF                 dynsf;
 12:   MPI_Info                info;
 13: } PetscSF_Window;

 15: struct _n_PetscSFDataLink {
 16:   MPI_Datatype    unit;
 17:   MPI_Datatype   *mine;
 18:   MPI_Datatype   *remote;
 19:   PetscSFDataLink next;
 20: };

 22: struct _n_PetscSFWinLink {
 23:   PetscBool               inuse;
 24:   size_t                  bytes;
 25:   void                   *addr;
 26:   void                   *paddr;
 27:   MPI_Win                 win;
 28:   MPI_Request            *reqs;
 29:   PetscSFWindowFlavorType flavor;
 30:   MPI_Aint               *dyn_target_addr;
 31:   PetscBool               epoch;
 32:   PetscSFWinLink          next;
 33: };

 35: const char *const PetscSFWindowSyncTypes[]   = {"FENCE", "LOCK", "ACTIVE", "PetscSFWindowSyncType", "PETSCSF_WINDOW_SYNC_", NULL};
 36: const char *const PetscSFWindowFlavorTypes[] = {"CREATE", "DYNAMIC", "ALLOCATE", "SHARED", "PetscSFWindowFlavorType", "PETSCSF_WINDOW_FLAVOR_", NULL};

 38: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
 39: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
 40: {
 41:   PetscFunctionBegin;
 42:   if (*op == MPIU_SUM) *op = MPI_SUM;
 43:   else if (*op == MPIU_MAX) *op = MPI_MAX;
 44:   else if (*op == MPIU_MIN) *op = MPI_MIN;
 45:   PetscFunctionReturn(PETSC_SUCCESS);
 46: }

 48: /*
 49:    PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank

 51:    Not Collective

 53:    Input Parameters:
 54: +  sf - star forest of type `PETSCSFWINDOW`
 55: -  unit - data type for each node

 57:    Output Parameters:
 58: +  localtypes - types describing part of local leaf buffer referencing each remote rank
 59: -  remotetypes - types describing part of remote root buffer referenced for each remote rank

 61:    Level: developer

 63: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetGraph()`, `PetscSFView()`
 64: @*/
 65: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf, MPI_Datatype unit, const MPI_Datatype **localtypes, const MPI_Datatype **remotetypes)
 66: {
 67:   PetscSF_Window    *w = (PetscSF_Window *)sf->data;
 68:   PetscSFDataLink    link;
 69:   PetscInt           i, nranks;
 70:   const PetscInt    *roffset, *rmine, *rremote;
 71:   const PetscMPIInt *ranks;

 73:   PetscFunctionBegin;
 74:   /* Look for types in cache */
 75:   for (link = w->link; link; link = link->next) {
 76:     PetscBool match;
 77:     PetscCall(MPIPetsc_Type_compare(unit, link->unit, &match));
 78:     if (match) {
 79:       *localtypes  = link->mine;
 80:       *remotetypes = link->remote;
 81:       PetscFunctionReturn(PETSC_SUCCESS);
 82:     }
 83:   }

 85:   /* Create new composite types for each send rank */
 86:   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &roffset, &rmine, &rremote));
 87:   PetscCall(PetscNew(&link));
 88:   PetscCallMPI(MPI_Type_dup(unit, &link->unit));
 89:   PetscCall(PetscMalloc2(nranks, &link->mine, nranks, &link->remote));
 90:   for (i = 0; i < nranks; i++) {
 91:     PetscInt     rcount = roffset[i + 1] - roffset[i];
 92:     PetscMPIInt *rmine, *rremote;
 93: #if !defined(PETSC_USE_64BIT_INDICES)
 94:     rmine   = sf->rmine + sf->roffset[i];
 95:     rremote = sf->rremote + sf->roffset[i];
 96: #else
 97:     PetscInt j;
 98:     PetscCall(PetscMalloc2(rcount, &rmine, rcount, &rremote));
 99:     for (j = 0; j < rcount; j++) {
100:       PetscCall(PetscMPIIntCast(sf->rmine[sf->roffset[i] + j], rmine + j));
101:       PetscCall(PetscMPIIntCast(sf->rremote[sf->roffset[i] + j], rremote + j));
102:     }
103: #endif

105:     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rmine, link->unit, &link->mine[i]));
106:     PetscCallMPI(MPI_Type_create_indexed_block(rcount, 1, rremote, link->unit, &link->remote[i]));
107: #if defined(PETSC_USE_64BIT_INDICES)
108:     PetscCall(PetscFree2(rmine, rremote));
109: #endif
110:     PetscCallMPI(MPI_Type_commit(&link->mine[i]));
111:     PetscCallMPI(MPI_Type_commit(&link->remote[i]));
112:   }
113:   link->next = w->link;
114:   w->link    = link;

116:   *localtypes  = link->mine;
117:   *remotetypes = link->remote;
118:   PetscFunctionReturn(PETSC_SUCCESS);
119: }

121: /*@C
122:   PetscSFWindowSetFlavorType - Set flavor type for `MPI_Win` creation

124:   Logically Collective

126:   Input Parameters:
127: + sf     - star forest for communication of type `PETSCSFWINDOW`
128: - flavor - flavor type

130:   Options Database Key:
131: . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see `PetscSFWindowFlavorType`)

133:   Level: advanced

135:   Notes:
136:   Windows reuse follows these rules\:
137: .vb
138:      PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create

140:      PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
141:        for i=1 to K
142:          PetscSFOperationBegin(rootdata1,leafdata_whatever);
143:          PetscSFOperationEnd(rootdata1,leafdata_whatever);
144:          ...
145:          PetscSFOperationBegin(rootdataN,leafdata_whatever);
146:          PetscSFOperationEnd(rootdataN,leafdata_whatever);
147:        endfor
148:        The following pattern will instead raise an error
149:          PetscSFOperationBegin(rootdata1,leafdata_whatever);
150:          PetscSFOperationEnd(rootdata1,leafdata_whatever);
151:          PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
152:          PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);

154:      PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use

156:      PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
157: .ve

159: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetFlavorType()`
160: @*/
161: PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf, PetscSFWindowFlavorType flavor)
162: {
163:   PetscFunctionBegin;
166:   PetscTryMethod(sf, "PetscSFWindowSetFlavorType_C", (PetscSF, PetscSFWindowFlavorType), (sf, flavor));
167:   PetscFunctionReturn(PETSC_SUCCESS);
168: }

170: static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType flavor)
171: {
172:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

174:   PetscFunctionBegin;
175:   w->flavor = flavor;
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }

179: /*@C
180:   PetscSFWindowGetFlavorType - Get  `PETSCSFWINDOW` flavor type for `PetscSF` communication

182:   Logically Collective

184:   Input Parameter:
185: . sf - star forest for communication of type `PETSCSFWINDOW`

187:   Output Parameter:
188: . flavor - flavor type

190:   Level: advanced

192: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetFlavorType()`
193: @*/
194: PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf, PetscSFWindowFlavorType *flavor)
195: {
196:   PetscFunctionBegin;
198:   PetscAssertPointer(flavor, 2);
199:   PetscUseMethod(sf, "PetscSFWindowGetFlavorType_C", (PetscSF, PetscSFWindowFlavorType *), (sf, flavor));
200:   PetscFunctionReturn(PETSC_SUCCESS);
201: }

203: static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf, PetscSFWindowFlavorType *flavor)
204: {
205:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

207:   PetscFunctionBegin;
208:   *flavor = w->flavor;
209:   PetscFunctionReturn(PETSC_SUCCESS);
210: }

212: /*@C
213:   PetscSFWindowSetSyncType - Set synchronization type for `PetscSF` communication of type  `PETSCSFWINDOW`

215:   Logically Collective

217:   Input Parameters:
218: + sf   - star forest for communication
219: - sync - synchronization type

221:   Options Database Key:
222: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see `PetscSFWindowSyncType`)

224:   Level: advanced

226: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetSyncType()`, `PetscSFWindowSyncType`
227: @*/
228: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf, PetscSFWindowSyncType sync)
229: {
230:   PetscFunctionBegin;
233:   PetscTryMethod(sf, "PetscSFWindowSetSyncType_C", (PetscSF, PetscSFWindowSyncType), (sf, sync));
234:   PetscFunctionReturn(PETSC_SUCCESS);
235: }

237: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf, PetscSFWindowSyncType sync)
238: {
239:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

241:   PetscFunctionBegin;
242:   w->sync = sync;
243:   PetscFunctionReturn(PETSC_SUCCESS);
244: }

246: /*@C
247:   PetscSFWindowGetSyncType - Get synchronization type for `PetscSF` communication of type `PETSCSFWINDOW`

249:   Logically Collective

251:   Input Parameter:
252: . sf - star forest for communication

254:   Output Parameter:
255: . sync - synchronization type

257:   Level: advanced

259: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetSyncType()`, `PetscSFWindowSyncType`
260: @*/
261: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf, PetscSFWindowSyncType *sync)
262: {
263:   PetscFunctionBegin;
265:   PetscAssertPointer(sync, 2);
266:   PetscUseMethod(sf, "PetscSFWindowGetSyncType_C", (PetscSF, PetscSFWindowSyncType *), (sf, sync));
267:   PetscFunctionReturn(PETSC_SUCCESS);
268: }

270: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf, PetscSFWindowSyncType *sync)
271: {
272:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

274:   PetscFunctionBegin;
275:   *sync = w->sync;
276:   PetscFunctionReturn(PETSC_SUCCESS);
277: }

279: /*@C
280:   PetscSFWindowSetInfo - Set the `MPI_Info` handle that will be used for subsequent windows allocation

282:   Logically Collective

284:   Input Parameters:
285: + sf   - star forest for communication
286: - info - `MPI_Info` handle

288:   Level: advanced

290:   Note:
291:   The info handle is duplicated with a call to `MPI_Info_dup()` unless info = `MPI_INFO_NULL`.

293: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowGetInfo()`
294: @*/
295: PetscErrorCode PetscSFWindowSetInfo(PetscSF sf, MPI_Info info)
296: {
297:   PetscFunctionBegin;
299:   PetscTryMethod(sf, "PetscSFWindowSetInfo_C", (PetscSF, MPI_Info), (sf, info));
300:   PetscFunctionReturn(PETSC_SUCCESS);
301: }

303: static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf, MPI_Info info)
304: {
305:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

307:   PetscFunctionBegin;
308:   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
309:   if (info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_dup(info, &w->info));
310:   PetscFunctionReturn(PETSC_SUCCESS);
311: }

313: /*@C
314:   PetscSFWindowGetInfo - Get the `MPI_Info` handle used for windows allocation

316:   Logically Collective

318:   Input Parameter:
319: . sf - star forest for communication

321:   Output Parameter:
322: . info - `MPI_Info` handle

324:   Level: advanced

326:   Note:
327:   If `PetscSFWindowSetInfo()` has not be called, this returns `MPI_INFO_NULL`

329: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFSetFromOptions()`, `PetscSFWindowSetInfo()`
330: @*/
331: PetscErrorCode PetscSFWindowGetInfo(PetscSF sf, MPI_Info *info)
332: {
333:   PetscFunctionBegin;
335:   PetscAssertPointer(info, 2);
336:   PetscUseMethod(sf, "PetscSFWindowGetInfo_C", (PetscSF, MPI_Info *), (sf, info));
337:   PetscFunctionReturn(PETSC_SUCCESS);
338: }

340: static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf, MPI_Info *info)
341: {
342:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

344:   PetscFunctionBegin;
345:   *info = w->info;
346:   PetscFunctionReturn(PETSC_SUCCESS);
347: }

349: /*
350:    PetscSFGetWindow - Get a window for use with a given data type

352:    Collective

354:    Input Parameters:
355: +  sf - star forest
356: .  unit - data type
357: .  array - array to be sent
358: .  sync - type of synchronization `PetscSFWindowSyncType`
359: .  epoch - `PETSC_TRUE` to acquire the window and start an epoch, `PETSC_FALSE` to just acquire the window
360: .  fenceassert - assert parameter for call to `MPI_Win_fence()`, if sync == `PETSCSF_WINDOW_SYNC_FENCE`
361: .  postassert - assert parameter for call to `MPI_Win_post()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`
362: -  startassert - assert parameter for call to `MPI_Win_start()`, if sync == `PETSCSF_WINDOW_SYNC_ACTIVE`

364:    Output Parameters:
365: +  target_disp - target_disp argument for RMA calls (significative for `PETSCSF_WINDOW_FLAVOR_DYNAMIC` only)
366: +  reqs - array of requests (significative for sync == `PETSCSF_WINDOW_SYNC_LOCK` only)
367: -  win - window

369:    Level: developer

371: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetRootRanks()`, `PetscSFWindowGetDataTypes()`
372: */
373: static PetscErrorCode PetscSFGetWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscMPIInt postassert, PetscMPIInt startassert, const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
374: {
375:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
376:   MPI_Aint        lb, lb_true, bytes, bytes_true;
377:   PetscSFWinLink  link;
378: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
379:   MPI_Aint winaddr;
380:   PetscInt nranks;
381: #endif
382:   PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
383:   PetscBool dummy[2];
384:   MPI_Aint  wsize;

386:   PetscFunctionBegin;
387:   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
388:   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
389:   PetscCheck(lb == 0 && lb_true == 0, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
390:   PetscCheck(bytes == bytes_true, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
391:   if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
392:   for (link = w->wins; reuse && link; link = link->next) {
393:     PetscBool winok = PETSC_FALSE;
394:     if (w->flavor != link->flavor) continue;
395:     switch (w->flavor) {
396:     case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
397:       if (array == link->addr) {
398:         if (PetscDefined(USE_DEBUG)) {
399:           dummy[0] = PETSC_TRUE;
400:           dummy[1] = PETSC_TRUE;
401:           PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
402:           PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
403:           PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
404:         }
405:         PetscCheck(!link->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window in use");
406:         PetscCheck(!epoch || !link->epoch, PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Window epoch not finished");
407:         winok       = PETSC_TRUE;
408:         link->paddr = array;
409:       } else if (PetscDefined(USE_DEBUG)) {
410:         dummy[0] = PETSC_FALSE;
411:         dummy[1] = PETSC_FALSE;
412:         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)sf)));
413:         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, dummy + 1, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
414:         PetscCheck(dummy[0] == dummy[1], PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
415:       }
416:       break;
417:     case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
418:     case PETSCSF_WINDOW_FLAVOR_SHARED:
419:       if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
420:         update      = PETSC_TRUE;
421:         link->paddr = array;
422:         winok       = PETSC_TRUE;
423:       }
424:       break;
425:     default:
426:       SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
427:     }
428:     if (winok) {
429:       *win = link->win;
430:       PetscCall(PetscInfo(sf, "Reusing window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf)));
431:       goto found;
432:     }
433:   }

435:   wsize = (MPI_Aint)bytes * sf->nroots;
436:   PetscCall(PetscNew(&link));
437:   link->bytes           = bytes;
438:   link->next            = w->wins;
439:   link->flavor          = w->flavor;
440:   link->dyn_target_addr = NULL;
441:   link->reqs            = NULL;
442:   w->wins               = link;
443:   if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
444:     PetscInt i;

446:     PetscCall(PetscMalloc1(sf->nranks, &link->reqs));
447:     for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
448:   }
449:   switch (w->flavor) {
450:   case PETSCSF_WINDOW_FLAVOR_CREATE:
451:     PetscCallMPI(MPI_Win_create(array, wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->win));
452:     link->addr  = array;
453:     link->paddr = array;
454:     break;
455: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
456:   case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
457:     PetscCallMPI(MPI_Win_create_dynamic(w->info, PetscObjectComm((PetscObject)sf), &link->win));
458:   #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some Open MPI versions do not support MPI_Win_attach(win,NULL,0); */
459:     PetscCallMPI(MPI_Win_attach(link->win, wsize ? array : (void *)dummy, wsize));
460:   #else
461:     PetscCallMPI(MPI_Win_attach(link->win, array, wsize));
462:   #endif
463:     link->addr  = array;
464:     link->paddr = array;
465:     PetscCheck(w->dynsf, PetscObjectComm((PetscObject)sf), PETSC_ERR_ORDER, "Must call PetscSFSetUp()");
466:     PetscCall(PetscSFSetUp(w->dynsf));
467:     PetscCall(PetscSFGetRootRanks(w->dynsf, &nranks, NULL, NULL, NULL, NULL));
468:     PetscCall(PetscMalloc1(nranks, &link->dyn_target_addr));
469:     PetscCallMPI(MPI_Get_address(array, &winaddr));
470:     PetscCall(PetscSFBcastBegin(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
471:     PetscCall(PetscSFBcastEnd(w->dynsf, MPI_AINT, &winaddr, link->dyn_target_addr, MPI_REPLACE));
472:     break;
473:   case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
474:     PetscCallMPI(MPI_Win_allocate(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
475:     update      = PETSC_TRUE;
476:     link->paddr = array;
477:     break;
478: #endif
479: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
480:   case PETSCSF_WINDOW_FLAVOR_SHARED:
481:     PetscCallMPI(MPI_Win_allocate_shared(wsize, (PetscMPIInt)bytes, w->info, PetscObjectComm((PetscObject)sf), &link->addr, &link->win));
482:     update      = PETSC_TRUE;
483:     link->paddr = array;
484:     break;
485: #endif
486:   default:
487:     SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "No support for flavor %s", PetscSFWindowFlavorTypes[w->flavor]);
488:   }
489:   PetscCall(PetscInfo(sf, "New window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf)));
490:   *win = link->win;

492: found:

494:   if (target_disp) *target_disp = link->dyn_target_addr;
495:   if (reqs) *reqs = link->reqs;
496:   if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
497:     PetscMPIInt rank;

499:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
500:     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, MPI_MODE_NOCHECK, *win));
501:     PetscCall(PetscMemcpy(link->addr, array, sf->nroots * bytes));
502:     if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
503:       PetscCallMPI(MPI_Win_unlock(rank, *win));
504:       PetscCallMPI(MPI_Win_fence(0, *win));
505:     }
506:   }
507:   link->inuse = PETSC_TRUE;
508:   link->epoch = epoch;
509:   if (epoch) {
510:     switch (sync) {
511:     case PETSCSF_WINDOW_SYNC_FENCE:
512:       PetscCallMPI(MPI_Win_fence(fenceassert, *win));
513:       break;
514:     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
515:       break;
516:     case PETSCSF_WINDOW_SYNC_ACTIVE: {
517:       MPI_Group   ingroup, outgroup;
518:       PetscMPIInt isize, osize;

520:       /* Open MPI 4.0.2 with btl=vader does not like calling
521:          - MPI_Win_complete when ogroup is empty
522:          - MPI_Win_wait when igroup is empty
523:          So, we do not even issue the corresponding start and post calls
524:          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
525:          start(outgroup) has a matching post(ingroup)
526:          and this is guaranteed by PetscSF
527:       */
528:       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
529:       PetscCallMPI(MPI_Group_size(ingroup, &isize));
530:       PetscCallMPI(MPI_Group_size(outgroup, &osize));
531:       if (isize) PetscCallMPI(MPI_Win_post(ingroup, postassert, *win));
532:       if (osize) PetscCallMPI(MPI_Win_start(outgroup, startassert, *win));
533:     } break;
534:     default:
535:       SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
536:     }
537:   }
538:   PetscFunctionReturn(PETSC_SUCCESS);
539: }

541: /*
542:    PetscSFFindWindow - Finds a window that is already in use

544:    Not Collective

546:    Input Parameters:
547: +  sf - star forest
548: .  unit - data type
549: -  array - array with which the window is associated

551:    Output Parameters:
552: +  win - window
553: -  reqs - outstanding requests associated to the window

555:    Level: developer

557: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
558: */
559: static PetscErrorCode PetscSFFindWindow(PetscSF sf, MPI_Datatype unit, const void *array, MPI_Win *win, MPI_Request **reqs)
560: {
561:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
562:   PetscSFWinLink  link;

564:   PetscFunctionBegin;
565:   *win = MPI_WIN_NULL;
566:   for (link = w->wins; link; link = link->next) {
567:     if (array == link->paddr) {
568:       PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf)));
569:       *win  = link->win;
570:       *reqs = link->reqs;
571:       PetscFunctionReturn(PETSC_SUCCESS);
572:     }
573:   }
574:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");
575: }

577: /*
578:    PetscSFRestoreWindow - Restores a window obtained with `PetscSFGetWindow()`

580:    Collective

582:    Input Parameters:
583: +  sf - star forest
584: .  unit - data type
585: .  array - array associated with window
586: .  sync - type of synchronization `PetscSFWindowSyncType`
587: .  epoch - close an epoch, must match argument to `PetscSFGetWindow()`
588: .  update - if we have to update the local window array
589: -  win - window

591:    Level: developer

593: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFFindWindow()`
594: */
595: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf, MPI_Datatype unit, void *array, PetscSFWindowSyncType sync, PetscBool epoch, PetscMPIInt fenceassert, PetscBool update, MPI_Win *win)
596: {
597:   PetscSF_Window         *w = (PetscSF_Window *)sf->data;
598:   PetscSFWinLink         *p, link;
599:   PetscBool               reuse = PETSC_FALSE;
600:   PetscSFWindowFlavorType flavor;
601:   void                   *laddr;
602:   size_t                  bytes;

604:   PetscFunctionBegin;
605:   for (p = &w->wins; *p; p = &(*p)->next) {
606:     link = *p;
607:     if (*win == link->win) {
608:       PetscCheck(array == link->paddr, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Matched window, but not array");
609:       if (epoch != link->epoch) {
610:         PetscCheck(!epoch, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "No epoch to end");
611:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Restoring window without ending epoch");
612:       }
613:       laddr  = link->addr;
614:       flavor = link->flavor;
615:       bytes  = link->bytes;
616:       if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
617:       else {
618:         *p     = link->next;
619:         update = PETSC_FALSE;
620:       } /* remove from list */
621:       goto found;
622:     }
623:   }
624:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Requested window not in use");

626: found:
627:   PetscCall(PetscInfo(sf, "Window %" PETSC_INTPTR_T_FMT " of flavor %d for comm %" PETSC_INTPTR_T_FMT "\n", (PETSC_INTPTR_T)link->win, link->flavor, (PETSC_INTPTR_T)PetscObjectComm((PetscObject)sf)));
628:   if (epoch) {
629:     switch (sync) {
630:     case PETSCSF_WINDOW_SYNC_FENCE:
631:       PetscCallMPI(MPI_Win_fence(fenceassert, *win));
632:       break;
633:     case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
634:       break;
635:     case PETSCSF_WINDOW_SYNC_ACTIVE: {
636:       MPI_Group   ingroup, outgroup;
637:       PetscMPIInt isize, osize;

639:       /* Open MPI 4.0.2 with btl=wader does not like calling
640:          - MPI_Win_complete when ogroup is empty
641:          - MPI_Win_wait when igroup is empty
642:          The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
643:          - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
644:          - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
645:       */
646:       PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
647:       PetscCallMPI(MPI_Group_size(ingroup, &isize));
648:       PetscCallMPI(MPI_Group_size(outgroup, &osize));
649:       if (osize) PetscCallMPI(MPI_Win_complete(*win));
650:       if (isize) PetscCallMPI(MPI_Win_wait(*win));
651:     } break;
652:     default:
653:       SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_PLIB, "Unknown synchronization type");
654:     }
655:   }
656:   if (update) {
657:     if (sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, *win));
658:     PetscCall(PetscMemcpy(array, laddr, sf->nroots * bytes));
659:   }
660:   link->epoch = PETSC_FALSE;
661:   link->inuse = PETSC_FALSE;
662:   link->paddr = NULL;
663:   if (!reuse) {
664:     PetscCall(PetscFree(link->dyn_target_addr));
665:     PetscCall(PetscFree(link->reqs));
666:     PetscCallMPI(MPI_Win_free(&link->win));
667:     PetscCall(PetscFree(link));
668:     *win = MPI_WIN_NULL;
669:   }
670:   PetscFunctionReturn(PETSC_SUCCESS);
671: }

673: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
674: {
675:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
676:   MPI_Group       ingroup, outgroup;

678:   PetscFunctionBegin;
679:   PetscCall(PetscSFSetUpRanks(sf, MPI_GROUP_EMPTY));
680:   if (!w->dynsf) {
681:     PetscInt     i;
682:     PetscSFNode *remotes;

684:     PetscCall(PetscMalloc1(sf->nranks, &remotes));
685:     for (i = 0; i < sf->nranks; i++) {
686:       remotes[i].rank  = sf->ranks[i];
687:       remotes[i].index = 0;
688:     }
689:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &w->dynsf));
690:     PetscCall(PetscSFWindowSetFlavorType(w->dynsf, PETSCSF_WINDOW_FLAVOR_CREATE)); /* break recursion */
691:     PetscCall(PetscSFSetGraph(w->dynsf, 1, sf->nranks, NULL, PETSC_OWN_POINTER, remotes, PETSC_OWN_POINTER));
692:   }
693:   switch (w->sync) {
694:   case PETSCSF_WINDOW_SYNC_ACTIVE:
695:     PetscCall(PetscSFGetGroups(sf, &ingroup, &outgroup));
696:   default:
697:     break;
698:   }
699:   PetscFunctionReturn(PETSC_SUCCESS);
700: }

702: static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf, PetscOptionItems *PetscOptionsObject)
703: {
704:   PetscSF_Window         *w      = (PetscSF_Window *)sf->data;
705:   PetscSFWindowFlavorType flavor = w->flavor;

707:   PetscFunctionBegin;
708:   PetscOptionsHeadBegin(PetscOptionsObject, "PetscSF Window options");
709:   PetscCall(PetscOptionsEnum("-sf_window_sync", "synchronization type to use for PetscSF Window communication", "PetscSFWindowSetSyncType", PetscSFWindowSyncTypes, (PetscEnum)w->sync, (PetscEnum *)&w->sync, NULL));
710:   PetscCall(PetscOptionsEnum("-sf_window_flavor", "flavor to use for PetscSF Window creation", "PetscSFWindowSetFlavorType", PetscSFWindowFlavorTypes, (PetscEnum)flavor, (PetscEnum *)&flavor, NULL));
711:   PetscCall(PetscSFWindowSetFlavorType(sf, flavor));
712:   PetscOptionsHeadEnd();
713:   PetscFunctionReturn(PETSC_SUCCESS);
714: }

716: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
717: {
718:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
719:   PetscSFDataLink link, next;
720:   PetscSFWinLink  wlink, wnext;
721:   PetscInt        i;

723:   PetscFunctionBegin;
724:   for (link = w->link; link; link = next) {
725:     next = link->next;
726:     PetscCallMPI(MPI_Type_free(&link->unit));
727:     for (i = 0; i < sf->nranks; i++) {
728:       PetscCallMPI(MPI_Type_free(&link->mine[i]));
729:       PetscCallMPI(MPI_Type_free(&link->remote[i]));
730:     }
731:     PetscCall(PetscFree2(link->mine, link->remote));
732:     PetscCall(PetscFree(link));
733:   }
734:   w->link = NULL;
735:   for (wlink = w->wins; wlink; wlink = wnext) {
736:     wnext = wlink->next;
737:     PetscCheck(!wlink->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Window still in use with address %p", (void *)wlink->addr);
738:     PetscCall(PetscFree(wlink->dyn_target_addr));
739:     PetscCall(PetscFree(wlink->reqs));
740:     PetscCallMPI(MPI_Win_free(&wlink->win));
741:     PetscCall(PetscFree(wlink));
742:   }
743:   w->wins = NULL;
744:   PetscCall(PetscSFDestroy(&w->dynsf));
745:   if (w->info != MPI_INFO_NULL) PetscCallMPI(MPI_Info_free(&w->info));
746:   PetscFunctionReturn(PETSC_SUCCESS);
747: }

749: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
750: {
751:   PetscFunctionBegin;
752:   PetscCall(PetscSFReset_Window(sf));
753:   PetscCall(PetscFree(sf->data));
754:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", NULL));
755:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", NULL));
756:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", NULL));
757:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", NULL));
758:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", NULL));
759:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", NULL));
760:   PetscFunctionReturn(PETSC_SUCCESS);
761: }

763: static PetscErrorCode PetscSFView_Window(PetscSF sf, PetscViewer viewer)
764: {
765:   PetscSF_Window   *w = (PetscSF_Window *)sf->data;
766:   PetscBool         iascii;
767:   PetscViewerFormat format;

769:   PetscFunctionBegin;
770:   PetscCall(PetscViewerGetFormat(viewer, &format));
771:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
772:   if (iascii) {
773:     PetscCall(PetscViewerASCIIPrintf(viewer, "  current flavor=%s synchronization=%s MultiSF sort=%s\n", PetscSFWindowFlavorTypes[w->flavor], PetscSFWindowSyncTypes[w->sync], sf->rankorder ? "rank-order" : "unordered"));
774:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
775:       if (w->info != MPI_INFO_NULL) {
776:         PetscMPIInt k, nkeys;
777:         char        key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];

779:         PetscCallMPI(MPI_Info_get_nkeys(w->info, &nkeys));
780:         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info with %d keys. Ordered key-value pairs follow:\n", nkeys));
781:         for (k = 0; k < nkeys; k++) {
782:           PetscMPIInt flag;

784:           PetscCallMPI(MPI_Info_get_nthkey(w->info, k, key));
785:           PetscCallMPI(MPI_Info_get(w->info, key, MPI_MAX_INFO_VAL, value, &flag));
786:           PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing key %s", key);
787:           PetscCall(PetscViewerASCIIPrintf(viewer, "      %s = %s\n", key, value));
788:         }
789:       } else {
790:         PetscCall(PetscViewerASCIIPrintf(viewer, "    current info=MPI_INFO_NULL\n"));
791:       }
792:     }
793:   }
794:   PetscFunctionReturn(PETSC_SUCCESS);
795: }

797: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf, PetscSFDuplicateOption opt, PetscSF newsf)
798: {
799:   PetscSF_Window       *w = (PetscSF_Window *)sf->data;
800:   PetscSFWindowSyncType synctype;

802:   PetscFunctionBegin;
803:   synctype = w->sync;
804:   /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
805:   if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
806:   PetscCall(PetscSFWindowSetSyncType(newsf, synctype));
807:   PetscCall(PetscSFWindowSetFlavorType(newsf, w->flavor));
808:   PetscCall(PetscSFWindowSetInfo(newsf, w->info));
809:   PetscFunctionReturn(PETSC_SUCCESS);
810: }

812: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
813: {
814:   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
815:   PetscInt            i, nranks;
816:   const PetscMPIInt  *ranks;
817:   const MPI_Aint     *target_disp;
818:   const MPI_Datatype *mine, *remote;
819:   MPI_Request        *reqs;
820:   MPI_Win             win;

822:   PetscFunctionBegin;
823:   PetscCheck(op == MPI_REPLACE, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
824:   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
825:   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
826:   PetscCall(PetscSFGetWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT, 0, &target_disp, &reqs, &win));
827:   for (i = 0; i < nranks; i++) {
828:     MPI_Aint tdp = target_disp ? target_disp[i] : 0;

830:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
831:       PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
832: #if defined(PETSC_HAVE_MPI_RGET)
833:       PetscCallMPI(MPI_Rget(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win, &reqs[i]));
834: #else
835:       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
836: #endif
837:     } else {
838:       PetscCallMPI(MPI_Get(leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
839:     }
840:   }
841:   PetscFunctionReturn(PETSC_SUCCESS);
842: }

844: static PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
845: {
846:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
847:   MPI_Win         win;
848:   MPI_Request    *reqs = NULL;

850:   PetscFunctionBegin;
851:   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
852:   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
853:   if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
854:     PetscInt           i, nranks;
855:     const PetscMPIInt *ranks;

857:     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
858:     for (i = 0; i < nranks; i++) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
859:   }
860:   PetscCall(PetscSFRestoreWindow(sf, unit, (void *)rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED, PETSC_FALSE, &win));
861:   PetscFunctionReturn(PETSC_SUCCESS);
862: }

864: static PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
865: {
866:   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
867:   PetscInt            i, nranks;
868:   const PetscMPIInt  *ranks;
869:   const MPI_Aint     *target_disp;
870:   const MPI_Datatype *mine, *remote;
871:   MPI_Win             win;

873:   PetscFunctionBegin;
874:   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
875:   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
876:   PetscCall(PetscSFWindowOpTranslate(&op));
877:   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
878:   for (i = 0; i < nranks; i++) {
879:     MPI_Aint tdp = target_disp ? target_disp[i] : 0;

881:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], MPI_MODE_NOCHECK, win));
882:     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
883:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
884:   }
885:   PetscFunctionReturn(PETSC_SUCCESS);
886: }

888: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
889: {
890:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
891:   MPI_Win         win;
892:   MPI_Request    *reqs = NULL;

894:   PetscFunctionBegin;
895:   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
896:   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
897:   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
898:   PetscFunctionReturn(PETSC_SUCCESS);
899: }

901: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
902: {
903:   PetscInt            i, nranks;
904:   const PetscMPIInt  *ranks;
905:   const MPI_Datatype *mine, *remote;
906:   const MPI_Aint     *target_disp;
907:   MPI_Win             win;
908:   PetscSF_Window     *w = (PetscSF_Window *)sf->data;
909: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
910:   PetscSFWindowFlavorType oldf;
911: #endif

913:   PetscFunctionBegin;
914:   PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
915:   PetscCall(PetscSFWindowGetDataTypes(sf, unit, &mine, &remote));
916:   PetscCall(PetscSFWindowOpTranslate(&op));
917: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
918:   /* FetchAndOp without MPI_Get_Accumulate requires locking.
919:      we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
920:   oldf      = w->flavor;
921:   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
922:   PetscCall(PetscSFGetWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, 0, 0, &target_disp, NULL, &win));
923: #else
924:   PetscCall(PetscSFGetWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOPRECEDE, 0, 0, &target_disp, NULL, &win));
925: #endif
926:   for (i = 0; i < nranks; i++) {
927:     MPI_Aint tdp = target_disp ? target_disp[i] : 0;

929: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
930:     PetscCallMPI(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, ranks[i], 0, win));
931:     PetscCallMPI(MPI_Get(leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], win));
932:     PetscCallMPI(MPI_Accumulate((void *)leafdata, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
933:     PetscCallMPI(MPI_Win_unlock(ranks[i], win));
934: #else
935:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_lock(MPI_LOCK_SHARED, ranks[i], 0, win));
936:     PetscCallMPI(MPI_Get_accumulate((void *)leafdata, 1, mine[i], leafupdate, 1, mine[i], ranks[i], tdp, 1, remote[i], op, win));
937:     if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) PetscCallMPI(MPI_Win_unlock(ranks[i], win));
938: #endif
939:   }
940: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
941:   w->flavor = oldf;
942: #endif
943:   PetscFunctionReturn(PETSC_SUCCESS);
944: }

946: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
947: {
948:   MPI_Win win;
949: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
950:   PetscSF_Window *w = (PetscSF_Window *)sf->data;
951: #endif
952:   MPI_Request *reqs = NULL;

954:   PetscFunctionBegin;
955:   PetscCall(PetscSFFindWindow(sf, unit, rootdata, &win, &reqs));
956:   if (reqs) PetscCallMPI(MPI_Waitall(sf->nranks, reqs, MPI_STATUSES_IGNORE));
957: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
958:   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, w->sync, PETSC_TRUE, MPI_MODE_NOSUCCEED, PETSC_TRUE, &win));
959: #else
960:   PetscCall(PetscSFRestoreWindow(sf, unit, rootdata, PETSCSF_WINDOW_SYNC_LOCK, PETSC_FALSE, 0, PETSC_TRUE, &win));
961: #endif
962:   PetscFunctionReturn(PETSC_SUCCESS);
963: }

965: PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
966: {
967:   PetscSF_Window *w = (PetscSF_Window *)sf->data;

969:   PetscFunctionBegin;
970:   sf->ops->SetUp           = PetscSFSetUp_Window;
971:   sf->ops->SetFromOptions  = PetscSFSetFromOptions_Window;
972:   sf->ops->Reset           = PetscSFReset_Window;
973:   sf->ops->Destroy         = PetscSFDestroy_Window;
974:   sf->ops->View            = PetscSFView_Window;
975:   sf->ops->Duplicate       = PetscSFDuplicate_Window;
976:   sf->ops->BcastBegin      = PetscSFBcastBegin_Window;
977:   sf->ops->BcastEnd        = PetscSFBcastEnd_Window;
978:   sf->ops->ReduceBegin     = PetscSFReduceBegin_Window;
979:   sf->ops->ReduceEnd       = PetscSFReduceEnd_Window;
980:   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
981:   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Window;

983:   PetscCall(PetscNew(&w));
984:   sf->data  = (void *)w;
985:   w->sync   = PETSCSF_WINDOW_SYNC_FENCE;
986:   w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
987:   w->info   = MPI_INFO_NULL;

989:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetSyncType_C", PetscSFWindowSetSyncType_Window));
990:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetSyncType_C", PetscSFWindowGetSyncType_Window));
991:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetFlavorType_C", PetscSFWindowSetFlavorType_Window));
992:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetFlavorType_C", PetscSFWindowGetFlavorType_Window));
993:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowSetInfo_C", PetscSFWindowSetInfo_Window));
994:   PetscCall(PetscObjectComposeFunction((PetscObject)sf, "PetscSFWindowGetInfo_C", PetscSFWindowGetInfo_Window));

996: #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
997:   {
998:     PetscBool ackbug = PETSC_FALSE;
999:     PetscCall(PetscOptionsGetBool(NULL, NULL, "-acknowledge_ompi_onesided_bug", &ackbug, NULL));
1000:     if (ackbug) {
1001:       PetscCall(PetscInfo(sf, "Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n"));
1002:     } else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_LIB, "Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1003:   }
1004: #endif
1005:   PetscFunctionReturn(PETSC_SUCCESS);
1006: }