Actual source code: sfalltoall.c

petsc-master 2020-01-15
Report Typos and Errors
  1:  #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
  2:  #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
  3:  #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>

  5: #define PetscSFPackGet_Alltoall PetscSFPackGet_Allgatherv

  7: /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
  8: typedef PetscSF_Allgatherv PetscSF_Alltoall;

 10: /*===================================================================================*/
 11: /*              Implementations of SF public APIs                                    */
 12: /*===================================================================================*/
 13: static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
 14: {
 16:   PetscInt       i;

 19:   if (nroots)  *nroots  = sf->nroots;
 20:   if (nleaves) *nleaves = sf->nleaves;
 21:   if (ilocal)  *ilocal  = NULL; /* Contiguous local indices */
 22:   if (iremote) {
 23:     if (!sf->remote) {
 24:       PetscMalloc1(sf->nleaves,&sf->remote);
 25:       sf->remote_alloc = sf->remote;
 26:       for (i=0; i<sf->nleaves; i++) {
 27:         sf->remote[i].rank  = i;
 28:         sf->remote[i].index = i;
 29:       }
 30:     }
 31:     *iremote = sf->remote;
 32:   }
 33:   return(0);
 34: }

 36: static PetscErrorCode PetscSFBcastAndOpBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
 37: {
 38:   PetscErrorCode       ierr;
 39:   PetscSFPack          link;
 40:   MPI_Comm             comm;
 41:   const void           *rootbuf_mpi; /* buffer used by MPI */
 42:   void                 *leafbuf_mpi;
 43:   PetscMemType         rootmtype_mpi,leafmtype_mpi;

 46:   PetscSFPackGet_Alltoall(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);
 47:   PetscObjectGetComm((PetscObject)sf,&comm);
 48:   PetscSFBcastPrepareMPIBuffers_Allgatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);
 49:   MPIU_Ialltoall(rootbuf_mpi,1,unit,leafbuf_mpi,1,unit,comm,link->rootreqs[PETSCSF_../../../../../..2LEAF_BCAST][rootmtype_mpi]);
 50:   return(0);
 51: }

 53: static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
 54: {
 55:   PetscErrorCode       ierr;
 56:   PetscSFPack          link;
 57:   MPI_Comm             comm;
 58:   void                 *recvbuf;

 61:   PetscObjectGetComm((PetscObject)sf,&comm);
 62:   if (!use_gpu_aware_mpi && (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE)) SETERRQ(comm,PETSC_ERR_SUP,"No support for PetscSFReduce"); /* No known uses */
 63:   PetscSFPackGet_Alltoall(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);

 65:   if (op != MPIU_REPLACE) {
 66:     if (!link->rootbuf[rootmtype]) {PetscMallocWithMemType(rootmtype,sf->nroots*link->unitbytes,(void**)&link->rootbuf[rootmtype]);}
 67:     recvbuf = link->rootbuf[rootmtype];
 68:   } else {
 69:     recvbuf = (char*)rootdata;
 70:   }
 71:   MPIU_Ialltoall(leafdata,1,unit,recvbuf,1,unit,comm,link->rootreqs[PETSCSF_LEAF2../../../../../.._REDUCE][rootmtype]);
 72:   return(0);
 73: }

 75: static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out)
 76: {
 78:   PetscInt       nroots=1,nleaves=1,*ilocal;
 79:   PetscSFNode    *iremote = NULL;
 80:   PetscSF        lsf;
 81:   PetscMPIInt    rank;

 84:   nroots  = 1;
 85:   nleaves = 1;
 86:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
 87:   PetscMalloc1(nleaves,&ilocal);
 88:   PetscMalloc1(nleaves,&iremote);
 89:   ilocal[0]        = rank;
 90:   iremote[0].rank  = 0;    /* rank in PETSC_COMM_SELF */
 91:   iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */

 93:   PetscSFCreate(PETSC_COMM_SELF,&lsf);
 94:   PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
 95:   PetscSFSetUp(lsf);
 96:   *out = lsf;
 97:   return(0);
 98: }

100: static PetscErrorCode PetscSFCreateEmbeddedSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
101: {
103:   PetscInt       i,*tmproots,*ilocal,ndranks,ndiranks;
104:   PetscSFNode    *iremote;
105:   PetscMPIInt    nroots,*roots,nleaves,*leaves,rank;
106:   MPI_Comm       comm;
107:   PetscSF_Basic  *bas;
108:   PetscSF        esf;

111:   PetscObjectGetComm((PetscObject)sf,&comm);
112:   MPI_Comm_rank(comm,&rank);

114:   /* Uniq selected[] and store the result in roots[] */
115:   PetscMalloc1(nselected,&tmproots);
116:   PetscArraycpy(tmproots,selected,nselected);
117:   PetscSortRemoveDupsInt(&nselected,tmproots); /* nselected might be changed */
118:   if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots);
119:   nroots = nselected;   /* For Alltoall, we know root indices will not overflow MPI_INT */
120:   PetscMalloc1(nselected,&roots);
121:   for (i=0; i<nselected; i++) roots[i] = tmproots[i];
122:   PetscFree(tmproots);

124:   /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
125:   PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);

127:   /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
128:   ndranks = 0;
129:   for (i=0; i<nleaves; i++) {
130:     if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;}
131:   }
132:   PetscSortMPIInt(nleaves,leaves);
133:   if (nleaves && leaves[0] < 0) leaves[0] = rank;

135:   /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
136:   PetscMalloc1(nleaves,&ilocal);
137:   PetscMalloc1(nleaves,&iremote);
138:   for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */
139:     ilocal[i]        = leaves[i];
140:     iremote[i].rank  = leaves[i];
141:     iremote[i].index = leaves[i];
142:   }
143:   PetscSFCreate(comm,&esf);
144:   PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */
145:   PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);

147:   /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
148:   PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);
149:   esf->nranks     = nleaves;
150:   esf->ndranks    = ndranks;
151:   esf->roffset[0] = 0;
152:   for (i=0; i<nleaves; i++) {
153:     esf->ranks[i]     = leaves[i];
154:     esf->roffset[i+1] = i+1;
155:     esf->rmine[i]     = leaves[i];
156:     esf->rremote[i]   = leaves[i];
157:   }

159:   /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
160:   bas  = (PetscSF_Basic*)esf->data;
161:   PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);
162:   PetscMalloc1(nroots,&bas->irootloc);
163:   /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
164:   ndiranks = 0;
165:   for (i=0; i<nroots; i++) {
166:     if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;}
167:   }
168:   PetscSortMPIInt(nroots,roots);
169:   if (nroots && roots[0] < 0) roots[0] = rank;

171:   bas->niranks    = nroots;
172:   bas->ndiranks   = ndiranks;
173:   bas->ioffset[0] = 0;
174:   bas->itotal     = nroots;
175:   for (i=0; i<nroots; i++) {
176:     bas->iranks[i]    = roots[i];
177:     bas->ioffset[i+1] = i+1;
178:     bas->irootloc[i]  = roots[i];
179:   }

181:   *newsf = esf;
182:   return(0);
183: }

185: PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
186: {
187:   PetscErrorCode   ierr;
188:   PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data;

191:   /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
192:   sf->ops->Destroy         = PetscSFDestroy_Allgatherv;
193:   sf->ops->Reset           = PetscSFReset_Allgatherv;
194:   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Allgatherv;
195:   sf->ops->ReduceEnd       = PetscSFReduceEnd_Allgatherv;
196:   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Allgatherv;
197:   sf->ops->GetRootRanks    = PetscSFGetRootRanks_Allgatherv;

199:   /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
200:   sf->ops->GetLeafRanks    = PetscSFGetLeafRanks_Allgatherv;

202:   /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
203:   sf->ops->FetchAndOpBegin  = PetscSFFetchAndOpBegin_Gatherv;

205:   /* Alltoall stuff */
206:   sf->ops->GetGraph         = PetscSFGetGraph_Alltoall;
207:   sf->ops->BcastAndOpBegin  = PetscSFBcastAndOpBegin_Alltoall;
208:   sf->ops->ReduceBegin      = PetscSFReduceBegin_Alltoall;
209:   sf->ops->CreateLocalSF    = PetscSFCreateLocalSF_Alltoall;
210:   sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Alltoall;

212:   PetscNewLog(sf,&dat);
213:   sf->data = (void*)dat;
214:   return(0);
215: }