Actual source code: sf.c
petsc-3.4.5 2014-06-29
1: #include <petsc-private/sfimpl.h> /*I "petscsf.h" I*/
2: #include <petscctable.h>
4: #if defined(PETSC_USE_DEBUG)
5: # define PetscSFCheckGraphSet(sf,arg) do { \
6: if (PetscUnlikely(!(sf)->graphset)) \
7: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
8: } while (0)
9: #else
10: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
11: #endif
13: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
17: /*@C
18: PetscSFCreate - create a star forest communication context
20: Not Collective
22: Input Arguments:
23: . comm - communicator on which the star forest will operate
25: Output Arguments:
26: . sf - new star forest context
28: Level: intermediate
30: .seealso: PetscSFSetGraph(), PetscSFDestroy()
31: @*/
32: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
33: {
35: PetscSF b;
39: #if !defined(PETSC_USE_DYNAMIC_LIBRARIES)
40: PetscSFInitializePackage();
41: #endif
43: PetscHeaderCreate(b,_p_PetscSF,struct _PetscSFOps,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
45: b->nroots = -1;
46: b->nleaves = -1;
47: b->nranks = -1;
48: b->rankorder = PETSC_TRUE;
49: b->ingroup = MPI_GROUP_NULL;
50: b->outgroup = MPI_GROUP_NULL;
51: b->graphset = PETSC_FALSE;
53: *sf = b;
54: return(0);
55: }
59: /*@C
60: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
62: Collective
64: Input Arguments:
65: . sf - star forest
67: Level: advanced
69: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
70: @*/
71: PetscErrorCode PetscSFReset(PetscSF sf)
72: {
77: sf->mine = NULL;
78: PetscFree(sf->mine_alloc);
79: sf->remote = NULL;
80: PetscFree(sf->remote_alloc);
81: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
82: PetscFree(sf->degree);
83: if (sf->ingroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
84: if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
85: PetscSFDestroy(&sf->multi);
86: sf->graphset = PETSC_FALSE;
87: if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
88: sf->setupcalled = PETSC_FALSE;
89: return(0);
90: }
94: /*@C
95: PetscSFSetType - set the PetscSF communication implementation
97: Collective on PetscSF
99: Input Parameters:
100: + sf - the PetscSF context
101: - type - a known method
103: Options Database Key:
104: . -sf_type <type> - Sets the method; use -help for a list
105: of available methods (for instance, window, pt2pt, neighbor)
107: Notes:
108: See "include/petscsf.h" for available methods (for instance)
109: + PETSCSFWINDOW - MPI-2/3 one-sided
110: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
112: Level: intermediate
114: .keywords: PetscSF, set, type
116: .seealso: PetscSFType, PetscSFCreate()
117: @*/
118: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
119: {
120: PetscErrorCode ierr,(*r)(PetscSF);
121: PetscBool match;
127: PetscObjectTypeCompare((PetscObject)sf,type,&match);
128: if (match) return(0);
130: PetscFunctionListFind(PetscSFList,type,&r);
131: if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
132: /* Destroy the previous private PetscSF context */
133: if (sf->ops->Destroy) {
134: (*(sf)->ops->Destroy)(sf);
135: }
136: PetscMemzero(sf->ops,sizeof(*sf->ops));
137: PetscObjectChangeTypeName((PetscObject)sf,type);
138: (*r)(sf);
139: return(0);
140: }
144: /*@C
145: PetscSFDestroy - destroy star forest
147: Collective
149: Input Arguments:
150: . sf - address of star forest
152: Level: intermediate
154: .seealso: PetscSFCreate(), PetscSFReset()
155: @*/
156: PetscErrorCode PetscSFDestroy(PetscSF *sf)
157: {
161: if (!*sf) return(0);
163: if (--((PetscObject)(*sf))->refct > 0) {*sf = 0; return(0);}
164: PetscSFReset(*sf);
165: if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
166: PetscHeaderDestroy(sf);
167: return(0);
168: }
172: /*@
173: PetscSFSetUp - set up communication structures
175: Collective
177: Input Arguments:
178: . sf - star forest communication object
180: Level: beginner
182: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
183: @*/
184: PetscErrorCode PetscSFSetUp(PetscSF sf)
185: {
189: if (sf->setupcalled) return(0);
190: if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);}
191: if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
192: sf->setupcalled = PETSC_TRUE;
193: return(0);
194: }
198: /*@C
199: PetscSFSetFromOptions - set PetscSF options using the options database
201: Logically Collective
203: Input Arguments:
204: . sf - star forest
206: Options Database Keys:
207: . -sf_synchronization - synchronization type used by PetscSF
209: Level: intermediate
211: .keywords: KSP, set, from, options, database
213: .seealso: PetscSFWindowSetSyncType()
214: @*/
215: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
216: {
217: PetscSFType deft;
218: char type[256];
220: PetscBool flg;
224: PetscObjectOptionsBegin((PetscObject)sf);
225: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
226: PetscOptionsList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,256,&flg);
227: PetscSFSetType(sf,flg ? type : deft);
228: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
229: if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(sf);}
230: PetscOptionsEnd();
231: return(0);
232: }
236: /*@C
237: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
239: Logically Collective
241: Input Arguments:
242: + sf - star forest
243: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
245: Level: advanced
247: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
248: @*/
249: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
250: {
255: if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
256: sf->rankorder = flg;
257: return(0);
258: }
262: /*@C
263: PetscSFSetGraph - Set a parallel star forest
265: Collective
267: Input Arguments:
268: + sf - star forest
269: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
270: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
271: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage
272: . localmode - copy mode for ilocal
273: . iremote - remote locations of root vertices for each leaf on the current process
274: - remotemode - copy mode for iremote
276: Level: intermediate
278: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
279: @*/
280: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
281: {
282: PetscErrorCode ierr;
283: PetscTable table;
284: PetscTablePosition pos;
285: PetscMPIInt size;
286: PetscInt i,*rcount,*ranks;
292: if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots);
293: if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
294: PetscSFReset(sf);
295: sf->nroots = nroots;
296: sf->nleaves = nleaves;
297: if (ilocal) {
298: switch (localmode) {
299: case PETSC_COPY_VALUES:
300: PetscMalloc(nleaves*sizeof(*sf->mine),&sf->mine_alloc);
301: sf->mine = sf->mine_alloc;
302: PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));
303: sf->minleaf = PETSC_MAX_INT;
304: sf->maxleaf = PETSC_MIN_INT;
305: for (i=0; i<nleaves; i++) {
306: sf->minleaf = PetscMin(sf->minleaf,ilocal[i]);
307: sf->maxleaf = PetscMax(sf->maxleaf,ilocal[i]);
308: }
309: break;
310: case PETSC_OWN_POINTER:
311: sf->mine_alloc = (PetscInt*)ilocal;
312: sf->mine = sf->mine_alloc;
313: break;
314: case PETSC_USE_POINTER:
315: sf->mine = (PetscInt*)ilocal;
316: break;
317: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
318: }
319: }
320: if (!ilocal || nleaves > 0) {
321: sf->minleaf = 0;
322: sf->maxleaf = nleaves - 1;
323: }
324: switch (remotemode) {
325: case PETSC_COPY_VALUES:
326: PetscMalloc(nleaves*sizeof(*sf->remote),&sf->remote_alloc);
327: sf->remote = sf->remote_alloc;
328: PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));
329: break;
330: case PETSC_OWN_POINTER:
331: sf->remote_alloc = (PetscSFNode*)iremote;
332: sf->remote = sf->remote_alloc;
333: break;
334: case PETSC_USE_POINTER:
335: sf->remote = (PetscSFNode*)iremote;
336: break;
337: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
338: }
340: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
341: PetscTableCreate(10,size,&table);
342: for (i=0; i<nleaves; i++) {
343: /* Log 1-based rank */
344: PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);
345: }
346: PetscTableGetCount(table,&sf->nranks);
347: PetscMalloc4(sf->nranks,PetscInt,&sf->ranks,sf->nranks+1,PetscInt,&sf->roffset,nleaves,PetscInt,&sf->rmine,nleaves,PetscInt,&sf->rremote);
348: PetscMalloc2(sf->nranks,PetscInt,&rcount,sf->nranks,PetscInt,&ranks);
349: PetscTableGetHeadPosition(table,&pos);
350: for (i=0; i<sf->nranks; i++) {
351: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
352: ranks[i]--; /* Convert back to 0-based */
353: }
354: PetscTableDestroy(&table);
355: PetscSortIntWithArray(sf->nranks,ranks,rcount);
356: sf->roffset[0] = 0;
357: for (i=0; i<sf->nranks; i++) {
358: PetscMPIIntCast(ranks[i],sf->ranks+i);
359: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
360: rcount[i] = 0;
361: }
362: for (i=0; i<nleaves; i++) {
363: PetscInt lo,hi,irank;
364: /* Search for index of iremote[i].rank in sf->ranks */
365: lo = 0; hi = sf->nranks;
366: while (hi - lo > 1) {
367: PetscInt mid = lo + (hi - lo)/2;
368: if (iremote[i].rank < sf->ranks[mid]) hi = mid;
369: else lo = mid;
370: }
371: if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo;
372: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank);
373: sf->rmine[sf->roffset[irank] + rcount[irank]] = ilocal ? ilocal[i] : i;
374: sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index;
375: rcount[irank]++;
376: }
377: PetscFree2(rcount,ranks);
378: #if !defined(PETSC_USE_64BIT_INDICES)
379: if (nroots == PETSC_DETERMINE) {
380: /* Jed, if you have a better way to do this, put it in */
381: PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0;
383: /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */
384: PetscMalloc4(size,PetscInt,&numRankLeaves,size+1,PetscInt,&leafOff,size,PetscInt,&numRankRoots,size+1,PetscInt,&rootOff);
385: PetscMemzero(numRankLeaves, size * sizeof(PetscInt));
386: for (i = 0; i < nleaves; ++i) ++numRankLeaves[iremote[i].rank];
387: MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, PetscObjectComm((PetscObject)sf));
388: /* Could set nroots to this maximum */
389: for (i = 0; i < size; ++i) maxRoots += numRankRoots[i];
391: /* Gather all indices */
392: PetscMalloc2(nleaves,PetscInt,&leafIndices,maxRoots,PetscInt,&rootIndices);
393: leafOff[0] = 0;
394: for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i];
395: for (i = 0; i < nleaves; ++i) leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index;
396: leafOff[0] = 0;
397: for (i = 0; i < size; ++i) leafOff[i+1] = leafOff[i] + numRankLeaves[i];
398: rootOff[0] = 0;
399: for (i = 0; i < size; ++i) rootOff[i+1] = rootOff[i] + numRankRoots[i];
400: MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, PetscObjectComm((PetscObject)sf));
401: /* Sort and reduce */
402: PetscSortRemoveDupsInt(&maxRoots, rootIndices);
403: PetscFree2(leafIndices,rootIndices);
404: PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);
405: sf->nroots = maxRoots;
406: }
407: #endif
409: sf->graphset = PETSC_TRUE;
410: return(0);
411: }
415: /*@C
416: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
418: Collective
420: Input Arguments:
421: . sf - star forest to invert
423: Output Arguments:
424: . isf - inverse of sf
426: Level: advanced
428: Notes:
429: All roots must have degree 1.
431: The local space may be a permutation, but cannot be sparse.
433: .seealso: PetscSFSetGraph()
434: @*/
435: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
436: {
438: PetscMPIInt rank;
439: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
440: const PetscInt *ilocal;
441: PetscSFNode *roots,*leaves;
444: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
445: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
446: for (i=0,maxlocal=0; i<nleaves; i++) maxlocal = PetscMax(maxlocal,(ilocal ? ilocal[i] : i)+1);
447: PetscMalloc2(nroots,PetscSFNode,&roots,nleaves,PetscSFNode,&leaves);
448: for (i=0; i<nleaves; i++) {
449: leaves[i].rank = rank;
450: leaves[i].index = i;
451: }
452: for (i=0; i <nroots; i++) {
453: roots[i].rank = -1;
454: roots[i].index = -1;
455: }
456: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
457: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
459: /* Check whether our leaves are sparse */
460: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
461: if (count == nroots) newilocal = NULL;
462: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
463: PetscMalloc(count*sizeof(PetscInt),&newilocal);
464: for (i=0,count=0; i<nroots; i++) {
465: if (roots[i].rank >= 0) {
466: newilocal[count] = i;
467: roots[count].rank = roots[i].rank;
468: roots[count].index = roots[i].index;
469: count++;
470: }
471: }
472: }
474: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
475: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
476: PetscFree2(roots,leaves);
477: return(0);
478: }
482: /*@
483: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
485: Collective
487: Input Arguments:
488: + sf - communication object to duplicate
489: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
491: Output Arguments:
492: . newsf - new communication object
494: Level: beginner
496: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
497: @*/
498: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
499: {
503: PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
504: PetscSFSetType(*newsf,((PetscObject)sf)->type_name);
505: if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
506: if (opt == PETSCSF_DUPLICATE_GRAPH) {
507: PetscInt nroots,nleaves;
508: const PetscInt *ilocal;
509: const PetscSFNode *iremote;
510: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
511: PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
512: }
513: return(0);
514: }
518: /*@C
519: PetscSFGetGraph - Get the graph specifying a parallel star forest
521: Not Collective
523: Input Arguments:
524: . sf - star forest
526: Output Arguments:
527: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
528: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
529: . ilocal - locations of leaves in leafdata buffers
530: - iremote - remote locations of root vertices for each leaf on the current process
532: Level: intermediate
534: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
535: @*/
536: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
537: {
541: /* We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set */
542: /* if (!sf->graphset) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Graph has not been set, must call PetscSFSetGraph()"); */
543: if (nroots) *nroots = sf->nroots;
544: if (nleaves) *nleaves = sf->nleaves;
545: if (ilocal) *ilocal = sf->mine;
546: if (iremote) *iremote = sf->remote;
547: return(0);
548: }
552: /*@C
553: PetscSFGetLeafRange - Get the active leaf ranges
555: Not Collective
557: Input Arguments:
558: . sf - star forest
560: Output Arguments:
561: + minleaf - minimum active leaf on this process
562: - maxleaf - maximum active leaf on this process
564: Level: developer
566: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
567: @*/
568: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
569: {
573: if (minleaf) *minleaf = sf->minleaf;
574: if (maxleaf) *maxleaf = sf->maxleaf;
575: return(0);
576: }
580: /*@C
581: PetscSFView - view a star forest
583: Collective
585: Input Arguments:
586: + sf - star forest
587: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
589: Level: beginner
591: .seealso: PetscSFCreate(), PetscSFSetGraph()
592: @*/
593: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
594: {
595: PetscErrorCode ierr;
596: PetscBool iascii;
597: PetscViewerFormat format;
601: if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
604: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
605: if (iascii) {
606: PetscMPIInt rank;
607: PetscInt i,j;
609: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer,"Star Forest Object");
610: PetscViewerASCIIPushTab(viewer);
611: if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
612: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
613: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
614: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
615: for (i=0; i<sf->nleaves; i++) {
616: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
617: }
618: PetscViewerFlush(viewer);
619: PetscViewerGetFormat(viewer,&format);
620: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
621: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
622: for (i=0; i<sf->nranks; i++) {
623: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
624: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
625: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
626: }
627: }
628: }
629: PetscViewerFlush(viewer);
630: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
631: PetscViewerASCIIPopTab(viewer);
632: }
633: return(0);
634: }
638: /*@C
639: PetscSFGetRanks - Get ranks and number of vertices referenced by leaves on this process
641: Not Collective
643: Input Arguments:
644: . sf - star forest
646: Output Arguments:
647: + nranks - number of ranks referenced by local part
648: . ranks - array of ranks
649: . roffset - offset in rmine/rremote for each rank (length nranks+1)
650: . rmine - concatenated array holding local indices referencing each remote rank
651: - rremote - concatenated array holding remote indices referenced for each remote rank
653: Level: developer
655: .seealso: PetscSFSetGraph()
656: @*/
657: PetscErrorCode PetscSFGetRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
658: {
662: if (nranks) *nranks = sf->nranks;
663: if (ranks) *ranks = sf->ranks;
664: if (roffset) *roffset = sf->roffset;
665: if (rmine) *rmine = sf->rmine;
666: if (rremote) *rremote = sf->rremote;
667: return(0);
668: }
672: /*@C
673: PetscSFGetGroups - gets incoming and outgoing process groups
675: Collective
677: Input Argument:
678: . sf - star forest
680: Output Arguments:
681: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
682: - outgoing - group of destination processes for outgoing edges (roots that I reference)
684: Level: developer
686: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
687: @*/
688: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
689: {
691: MPI_Group group;
694: if (sf->ingroup == MPI_GROUP_NULL) {
695: PetscInt i;
696: const PetscInt *indegree;
697: PetscMPIInt rank,*outranks,*inranks;
698: PetscSFNode *remote;
699: PetscSF bgcount;
701: /* Compute the number of incoming ranks */
702: PetscMalloc(sf->nranks*sizeof(PetscSFNode),&remote);
703: for (i=0; i<sf->nranks; i++) {
704: remote[i].rank = sf->ranks[i];
705: remote[i].index = 0;
706: }
707: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
708: PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
709: PetscSFComputeDegreeBegin(bgcount,&indegree);
710: PetscSFComputeDegreeEnd(bgcount,&indegree);
712: /* Enumerate the incoming ranks */
713: PetscMalloc2(indegree[0],PetscMPIInt,&inranks,sf->nranks,PetscMPIInt,&outranks);
714: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
715: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
716: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
717: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
718: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
719: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
720: MPI_Group_free(&group);
721: PetscFree2(inranks,outranks);
722: PetscSFDestroy(&bgcount);
723: }
724: *incoming = sf->ingroup;
726: if (sf->outgroup == MPI_GROUP_NULL) {
727: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
728: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
729: MPI_Group_free(&group);
730: }
731: *outgoing = sf->outgroup;
732: return(0);
733: }
737: /*@C
738: PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
740: Collective
742: Input Argument:
743: . sf - star forest that may contain roots with 0 or with more than 1 vertex
745: Output Arguments:
746: . multi - star forest with split roots, such that each root has degree exactly 1
748: Level: developer
750: Notes:
752: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
753: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
754: edge, it is a candidate for future optimization that might involve its removal.
756: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin()
757: @*/
758: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
759: {
765: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
766: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
767: *multi = sf->multi;
768: return(0);
769: }
770: if (!sf->multi) {
771: const PetscInt *indegree;
772: PetscInt i,*inoffset,*outones,*outoffset;
773: PetscSFNode *remote;
774: PetscSFComputeDegreeBegin(sf,&indegree);
775: PetscSFComputeDegreeEnd(sf,&indegree);
776: PetscMalloc3(sf->nroots+1,PetscInt,&inoffset,sf->nleaves,PetscInt,&outones,sf->nleaves,PetscInt,&outoffset);
777: inoffset[0] = 0;
778: #if 1
779: for (i=0; i<sf->nroots; i++) inoffset[i+1] = PetscMax(i+1, inoffset[i] + indegree[i]);
780: #else
781: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
782: #endif
783: for (i=0; i<sf->nleaves; i++) outones[i] = 1;
784: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPIU_SUM);
785: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPIU_SUM);
786: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
787: #if 0
788: #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */
789: for (i=0; i<sf->nroots; i++) {
790: if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
791: }
792: #endif
793: #endif
794: PetscMalloc(sf->nleaves*sizeof(*remote),&remote);
795: for (i=0; i<sf->nleaves; i++) {
796: remote[i].rank = sf->remote[i].rank;
797: remote[i].index = outoffset[i];
798: }
799: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
800: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
801: if (sf->rankorder) { /* Sort the ranks */
802: PetscMPIInt rank;
803: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
804: PetscSFNode *newremote;
805: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
806: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
807: PetscMalloc5(sf->multi->nroots,PetscInt,&inranks,sf->multi->nroots,PetscInt,&newoffset,sf->nleaves,PetscInt,&outranks,sf->nleaves,PetscInt,&newoutoffset,maxdegree,PetscInt,&tmpoffset);
808: for (i=0; i<sf->nleaves; i++) outranks[i] = rank;
809: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
810: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
811: /* Sort the incoming ranks at each vertex, build the inverse map */
812: for (i=0; i<sf->nroots; i++) {
813: PetscInt j;
814: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
815: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
816: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
817: }
818: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
819: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
820: PetscMalloc(sf->nleaves*sizeof(*newremote),&newremote);
821: for (i=0; i<sf->nleaves; i++) {
822: newremote[i].rank = sf->remote[i].rank;
823: newremote[i].index = newoutoffset[i];
824: }
825: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,NULL,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
826: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
827: }
828: PetscFree3(inoffset,outones,outoffset);
829: }
830: *multi = sf->multi;
831: return(0);
832: }
836: /*@C
837: PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
839: Collective
841: Input Arguments:
842: + sf - original star forest
843: . nroots - number of roots to select on this process
844: - selected - selected roots on this process
846: Output Arguments:
847: . newsf - new star forest
849: Level: advanced
851: Note:
852: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
853: be done by calling PetscSFGetGraph().
855: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
856: @*/
857: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nroots,const PetscInt *selected,PetscSF *newsf)
858: {
860: PetscInt i,nleaves,*ilocal,*rootdata,*leafdata;
861: PetscSFNode *iremote;
867: PetscMalloc2(sf->nroots,PetscInt,&rootdata,sf->nleaves,PetscInt,&leafdata);
868: PetscMemzero(rootdata,sf->nroots*sizeof(PetscInt));
869: PetscMemzero(leafdata,sf->nleaves*sizeof(PetscInt));
870: for (i=0; i<nroots; i++) rootdata[selected[i]] = 1;
871: PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata);
872: PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata);
874: for (i=0,nleaves=0; i<sf->nleaves; i++) nleaves += leafdata[i];
875: PetscMalloc(nleaves*sizeof(PetscInt),&ilocal);
876: PetscMalloc(nleaves*sizeof(PetscSFNode),&iremote);
877: for (i=0,nleaves=0; i<sf->nleaves; i++) {
878: if (leafdata[i]) {
879: ilocal[nleaves] = sf->mine ? sf->mine[i] : i;
880: iremote[nleaves].rank = sf->remote[i].rank;
881: iremote[nleaves].index = sf->remote[i].index;
882: nleaves++;
883: }
884: }
885: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,newsf);
886: PetscSFSetGraph(*newsf,sf->nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
887: PetscFree2(rootdata,leafdata);
888: return(0);
889: }
893: /*@C
894: PetscSFBcastBegin - begin pointwise broadcast to be concluded with call to PetscSFBcastEnd()
896: Collective on PetscSF
898: Input Arguments:
899: + sf - star forest on which to communicate
900: . unit - data type associated with each node
901: - rootdata - buffer to broadcast
903: Output Arguments:
904: . leafdata - buffer to update with values from each leaf's respective root
906: Level: intermediate
908: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFView(), PetscSFBcastEnd(), PetscSFReduceBegin()
909: @*/
910: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
911: {
916: PetscSFCheckGraphSet(sf,1);
917: PetscSFSetUp(sf);
918: (*sf->ops->BcastBegin)(sf,unit,rootdata,leafdata);
919: return(0);
920: }
924: /*@C
925: PetscSFBcastEnd - end a broadcast operation started with PetscSFBcastBegin()
927: Collective
929: Input Arguments:
930: + sf - star forest
931: . unit - data type
932: - rootdata - buffer to broadcast
934: Output Arguments:
935: . leafdata - buffer to update with values from each leaf's respective root
937: Level: intermediate
939: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
940: @*/
941: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
942: {
947: PetscSFCheckGraphSet(sf,1);
948: PetscSFSetUp(sf);
949: (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata);
950: return(0);
951: }
955: /*@C
956: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
958: Collective
960: Input Arguments:
961: + sf - star forest
962: . unit - data type
963: . leafdata - values to reduce
964: - op - reduction operation
966: Output Arguments:
967: . rootdata - result of reduction of values from all leaves of each root
969: Level: intermediate
971: .seealso: PetscSFBcastBegin()
972: @*/
973: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
974: {
979: PetscSFCheckGraphSet(sf,1);
980: PetscSFSetUp(sf);
981: (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);
982: return(0);
983: }
987: /*@C
988: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
990: Collective
992: Input Arguments:
993: + sf - star forest
994: . unit - data type
995: . leafdata - values to reduce
996: - op - reduction operation
998: Output Arguments:
999: . rootdata - result of reduction of values from all leaves of each root
1001: Level: intermediate
1003: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1004: @*/
1005: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1006: {
1011: PetscSFCheckGraphSet(sf,1);
1012: PetscSFSetUp(sf);
1013: (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1014: return(0);
1015: }
1019: /*@C
1020: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1022: Collective
1024: Input Arguments:
1025: . sf - star forest
1027: Output Arguments:
1028: . degree - degree of each root vertex
1030: Level: advanced
1032: .seealso: PetscSFGatherBegin()
1033: @*/
1034: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1035: {
1040: PetscSFCheckGraphSet(sf,1);
1042: if (!sf->degree) {
1043: PetscInt i;
1044: PetscMalloc(sf->nroots*sizeof(PetscInt),&sf->degree);
1045: PetscMalloc(sf->nleaves*sizeof(PetscInt),&sf->degreetmp);
1046: for (i=0; i<sf->nroots; i++) sf->degree[i] = 0;
1047: for (i=0; i<sf->nleaves; i++) sf->degreetmp[i] = 1;
1048: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPIU_SUM);
1049: }
1050: *degree = NULL;
1051: return(0);
1052: }
1056: /*@C
1057: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1059: Collective
1061: Input Arguments:
1062: . sf - star forest
1064: Output Arguments:
1065: . degree - degree of each root vertex
1067: Level: developer
1069: .seealso:
1070: @*/
1071: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1072: {
1077: PetscSFCheckGraphSet(sf,1);
1078: if (!sf->degreeknown) {
1079: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPIU_SUM);
1080: PetscFree(sf->degreetmp);
1082: sf->degreeknown = PETSC_TRUE;
1083: }
1084: *degree = sf->degree;
1085: return(0);
1086: }
1090: /*@C
1091: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1093: Collective
1095: Input Arguments:
1096: + sf - star forest
1097: . unit - data type
1098: . leafdata - leaf values to use in reduction
1099: - op - operation to use for reduction
1101: Output Arguments:
1102: + rootdata - root values to be updated, input state is seen by first process to perform an update
1103: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1105: Level: advanced
1107: Note:
1108: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1109: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1110: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1111: integers.
1113: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1114: @*/
1115: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1116: {
1121: PetscSFCheckGraphSet(sf,1);
1122: PetscSFSetUp(sf);
1123: (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);
1124: return(0);
1125: }
1129: /*@C
1130: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1132: Collective
1134: Input Arguments:
1135: + sf - star forest
1136: . unit - data type
1137: . leafdata - leaf values to use in reduction
1138: - op - operation to use for reduction
1140: Output Arguments:
1141: + rootdata - root values to be updated, input state is seen by first process to perform an update
1142: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1144: Level: advanced
1146: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1147: @*/
1148: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1149: {
1154: PetscSFCheckGraphSet(sf,1);
1155: PetscSFSetUp(sf);
1156: (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1157: return(0);
1158: }
1162: /*@C
1163: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1165: Collective
1167: Input Arguments:
1168: + sf - star forest
1169: . unit - data type
1170: - leafdata - leaf data to gather to roots
1172: Output Argument:
1173: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1175: Level: intermediate
1177: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1178: @*/
1179: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1180: {
1182: PetscSF multi;
1186: PetscSFGetMultiSF(sf,&multi);
1187: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1188: return(0);
1189: }
1193: /*@C
1194: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1196: Collective
1198: Input Arguments:
1199: + sf - star forest
1200: . unit - data type
1201: - leafdata - leaf data to gather to roots
1203: Output Argument:
1204: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1206: Level: intermediate
1208: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1209: @*/
1210: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1211: {
1213: PetscSF multi;
1217: PetscSFCheckGraphSet(sf,1);
1218: PetscSFSetUp(sf);
1219: PetscSFGetMultiSF(sf,&multi);
1220: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1221: return(0);
1222: }
1226: /*@C
1227: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1229: Collective
1231: Input Arguments:
1232: + sf - star forest
1233: . unit - data type
1234: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1236: Output Argument:
1237: . leafdata - leaf data to be update with personal data from each respective root
1239: Level: intermediate
1241: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1242: @*/
1243: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1244: {
1246: PetscSF multi;
1250: PetscSFCheckGraphSet(sf,1);
1251: PetscSFSetUp(sf);
1252: PetscSFGetMultiSF(sf,&multi);
1253: PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1254: return(0);
1255: }
1259: /*@C
1260: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1262: Collective
1264: Input Arguments:
1265: + sf - star forest
1266: . unit - data type
1267: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1269: Output Argument:
1270: . leafdata - leaf data to be update with personal data from each respective root
1272: Level: intermediate
1274: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1275: @*/
1276: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1277: {
1279: PetscSF multi;
1283: PetscSFCheckGraphSet(sf,1);
1284: PetscSFSetUp(sf);
1285: PetscSFGetMultiSF(sf,&multi);
1286: PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1287: return(0);
1288: }