Actual source code: sfalltoall.c

petsc-3.15.0 2021-04-05
Report Typos and Errors
  1: #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
  2: #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
  3: #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>

  5: /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
  6: typedef PetscSF_Allgatherv PetscSF_Alltoall;

  8: /*===================================================================================*/
  9: /*              Implementations of SF public APIs                                    */
 10: /*===================================================================================*/
 11: static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
 12: {
 14:   PetscInt       i;

 17:   if (nroots)  *nroots  = sf->nroots;
 18:   if (nleaves) *nleaves = sf->nleaves;
 19:   if (ilocal)  *ilocal  = NULL; /* Contiguous local indices */
 20:   if (iremote) {
 21:     if (!sf->remote) {
 22:       PetscMalloc1(sf->nleaves,&sf->remote);
 23:       sf->remote_alloc = sf->remote;
 24:       for (i=0; i<sf->nleaves; i++) {
 25:         sf->remote[i].rank  = i;
 26:         sf->remote[i].index = i;
 27:       }
 28:     }
 29:     *iremote = sf->remote;
 30:   }
 31:   return(0);
 32: }

 34: static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
 35: {
 36:   PetscErrorCode       ierr;
 37:   PetscSFLink          link;
 38:   MPI_Comm             comm;
 39:   void                 *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */
 40:   MPI_Request          *req;

 43:   PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);
 44:   PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);
 45:   PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
 46:   PetscObjectGetComm((PetscObject)sf,&comm);
 47:   PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_../../../../../..2LEAF,&rootbuf,&leafbuf,&req,NULL);
 48:   PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_../../../../../..2LEAF);
 49:   MPIU_Ialltoall(rootbuf,1,unit,leafbuf,1,unit,comm,req);
 50:   return(0);
 51: }

 53: static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
 54: {
 55:   PetscErrorCode       ierr;
 56:   PetscSFLink          link;
 57:   MPI_Comm             comm;
 58:   void                 *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */
 59:   MPI_Request          *req;

 62:   PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_REDUCE,&link);
 63:   PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);
 64:   PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
 65:   PetscObjectGetComm((PetscObject)sf,&comm);
 66:   PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2../../../../../..,&rootbuf,&leafbuf,&req,NULL);
 67:   PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2../../../../../..);
 68:   MPIU_Ialltoall(leafbuf,1,unit,rootbuf,1,unit,comm,req);
 69:   return(0);
 70: }

 72: static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out)
 73: {
 75:   PetscInt       nroots = 1,nleaves = 1,*ilocal;
 76:   PetscSFNode    *iremote = NULL;
 77:   PetscSF        lsf;
 78:   PetscMPIInt    rank;

 81:   nroots  = 1;
 82:   nleaves = 1;
 83:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
 84:   PetscMalloc1(nleaves,&ilocal);
 85:   PetscMalloc1(nleaves,&iremote);
 86:   ilocal[0]        = rank;
 87:   iremote[0].rank  = 0;    /* rank in PETSC_COMM_SELF */
 88:   iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */

 90:   PetscSFCreate(PETSC_COMM_SELF,&lsf);
 91:   PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
 92:   PetscSFSetUp(lsf);
 93:   *out = lsf;
 94:   return(0);
 95: }

 97: static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
 98: {
100:   PetscInt       i,*tmproots,*ilocal,ndranks,ndiranks;
101:   PetscSFNode    *iremote;
102:   PetscMPIInt    nroots,*roots,nleaves,*leaves,rank;
103:   MPI_Comm       comm;
104:   PetscSF_Basic  *bas;
105:   PetscSF        esf;

108:   PetscObjectGetComm((PetscObject)sf,&comm);
109:   MPI_Comm_rank(comm,&rank);

111:   /* Uniq selected[] and store the result in roots[] */
112:   PetscMalloc1(nselected,&tmproots);
113:   PetscArraycpy(tmproots,selected,nselected);
114:   PetscSortRemoveDupsInt(&nselected,tmproots); /* nselected might be changed */
115:   if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots);
116:   nroots = nselected;   /* For Alltoall, we know root indices will not overflow MPI_INT */
117:   PetscMalloc1(nselected,&roots);
118:   for (i=0; i<nselected; i++) roots[i] = tmproots[i];
119:   PetscFree(tmproots);

121:   /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
122:   PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);

124:   /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
125:   ndranks = 0;
126:   for (i=0; i<nleaves; i++) {
127:     if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;}
128:   }
129:   PetscSortMPIInt(nleaves,leaves);
130:   if (nleaves && leaves[0] < 0) leaves[0] = rank;

132:   /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
133:   PetscMalloc1(nleaves,&ilocal);
134:   PetscMalloc1(nleaves,&iremote);
135:   for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */
136:     ilocal[i]        = leaves[i];
137:     iremote[i].rank  = leaves[i];
138:     iremote[i].index = leaves[i];
139:   }
140:   PetscSFCreate(comm,&esf);
141:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */
142:   PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);

144:   /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
145:   PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);
146:   esf->nranks     = nleaves;
147:   esf->ndranks    = ndranks;
148:   esf->roffset[0] = 0;
149:   for (i=0; i<nleaves; i++) {
150:     esf->ranks[i]     = leaves[i];
151:     esf->roffset[i+1] = i+1;
152:     esf->rmine[i]     = leaves[i];
153:     esf->rremote[i]   = leaves[i];
154:   }

156:   /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
157:   bas  = (PetscSF_Basic*)esf->data;
158:   PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);
159:   PetscMalloc1(nroots,&bas->irootloc);
160:   /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
161:   ndiranks = 0;
162:   for (i=0; i<nroots; i++) {
163:     if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;}
164:   }
165:   PetscSortMPIInt(nroots,roots);
166:   if (nroots && roots[0] < 0) roots[0] = rank;

168:   bas->niranks    = nroots;
169:   bas->ndiranks   = ndiranks;
170:   bas->ioffset[0] = 0;
171:   bas->itotal     = nroots;
172:   for (i=0; i<nroots; i++) {
173:     bas->iranks[i]    = roots[i];
174:     bas->ioffset[i+1] = i+1;
175:     bas->irootloc[i]  = roots[i];
176:   }

178:   /* See PetscSFCreateEmbeddedRootSF_Basic */
179:   esf->nleafreqs  = esf->nranks - esf->ndranks;
180:   bas->nrootreqs  = bas->niranks - bas->ndiranks;
181:   esf->persistent = PETSC_TRUE;
182:   /* Setup packing related fields */
183:   PetscSFSetUpPackFields(esf);

185:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
186:   *newsf = esf;
187:   return(0);
188: }

190: PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
191: {
192:   PetscErrorCode   ierr;
193:   PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data;

196:   sf->ops->BcastEnd        = PetscSFBcastEnd_Basic;
197:   sf->ops->ReduceEnd       = PetscSFReduceEnd_Basic;

199:   /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
200:   sf->ops->Destroy         = PetscSFDestroy_Allgatherv;
201:   sf->ops->Reset           = PetscSFReset_Allgatherv;
202:   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Allgatherv;
203:   sf->ops->GetRootRanks    = PetscSFGetRootRanks_Allgatherv;

205:   /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
206:   sf->ops->GetLeafRanks    = PetscSFGetLeafRanks_Allgatherv;
207:   sf->ops->SetUp           = PetscSFSetUp_Allgather;

209:   /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
210:   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv;

212:   /* Alltoall stuff */
213:   sf->ops->GetGraph             = PetscSFGetGraph_Alltoall;
214:   sf->ops->BcastBegin           = PetscSFBcastBegin_Alltoall;
215:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Alltoall;
216:   sf->ops->CreateLocalSF        = PetscSFCreateLocalSF_Alltoall;
217:   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall;

219:   PetscNewLog(sf,&dat);
220:   sf->data = (void*)dat;
221:   return(0);
222: }