Actual source code: sfimpl.h

petsc-master 2019-12-03
Report Typos and Errors
  1: #if !defined(PETSCSFIMPL_H)
  2: #define PETSCSFIMPL_H

  4:  #include <petscsf.h>
  5:  #include <petsc/private/petscimpl.h>
  6:  #include <petscviewer.h>

  8: #if defined(PETSC_HAVE_CUDA)
  9:  #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
 10: #endif

 12: PETSC_EXTERN PetscLogEvent PETSCSF_SetGraph;
 13: PETSC_EXTERN PetscLogEvent PETSCSF_SetUp;
 14: PETSC_EXTERN PetscLogEvent PETSCSF_BcastBegin;
 15: PETSC_EXTERN PetscLogEvent PETSCSF_BcastEnd;
 16: PETSC_EXTERN PetscLogEvent PETSCSF_BcastAndOpBegin;
 17: PETSC_EXTERN PetscLogEvent PETSCSF_BcastAndOpEnd;
 18: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceBegin;
 19: PETSC_EXTERN PetscLogEvent PETSCSF_ReduceEnd;
 20: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpBegin;
 21: PETSC_EXTERN PetscLogEvent PETSCSF_FetchAndOpEnd;
 22: PETSC_EXTERN PetscLogEvent PETSCSF_EmbedSF;
 23: PETSC_EXTERN PetscLogEvent PETSCSF_DistSect;
 24: PETSC_EXTERN PetscLogEvent PETSCSF_SectSF;
 25: PETSC_EXTERN PetscLogEvent PETSCSF_RemoteOff;

 27: typedef enum {PETSCSF_LEAF2../../.._REDUCE=0, PETSCSF_../../..2LEAF_BCAST=1} PetscSFDirection;
 28: typedef enum {PETSC_MEMTYPE_HOST=0, PETSC_MEMTYPE_DEVICE=1} PetscMemType;

 30: struct _PetscSFOps {
 31:   PetscErrorCode (*Reset)(PetscSF);
 32:   PetscErrorCode (*Destroy)(PetscSF);
 33:   PetscErrorCode (*SetUp)(PetscSF);
 34:   PetscErrorCode (*SetFromOptions)(PetscOptionItems*,PetscSF);
 35:   PetscErrorCode (*View)(PetscSF,PetscViewer);
 36:   PetscErrorCode (*Duplicate)(PetscSF,PetscSFDuplicateOption,PetscSF);
 37:   PetscErrorCode (*BcastAndOpBegin)(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*,      MPI_Op);
 38:   PetscErrorCode (*BcastAndOpEnd)  (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*,      MPI_Op);
 39:   PetscErrorCode (*ReduceBegin)    (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*,      MPI_Op);
 40:   PetscErrorCode (*ReduceEnd)      (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*,      MPI_Op);
 41:   PetscErrorCode (*FetchAndOpBegin)(PetscSF,MPI_Datatype,PetscMemType,      void*,PetscMemType,const void*,void*,MPI_Op);
 42:   PetscErrorCode (*FetchAndOpEnd)  (PetscSF,MPI_Datatype,PetscMemType,      void*,PetscMemType,const void*,void*,MPI_Op);
 43:   PetscErrorCode (*BcastToZero)    (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,      void*); /* For interal use only */
 44:   PetscErrorCode (*GetRootRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**,const PetscInt**);
 45:   PetscErrorCode (*GetLeafRanks)(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**);
 46:   PetscErrorCode (*CreateLocalSF)(PetscSF,PetscSF*);
 47:   PetscErrorCode (*GetGraph)(PetscSF,PetscInt*,PetscInt*,const PetscInt**,const PetscSFNode**);
 48:   PetscErrorCode (*CreateEmbeddedSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
 49:   PetscErrorCode (*CreateEmbeddedLeafSF)(PetscSF,PetscInt,const PetscInt*,PetscSF*);
 50: };

 52: typedef struct _n_PetscSFPackOpt *PetscSFPackOpt;

 54: struct _p_PetscSF {
 55:   PETSCHEADER(struct _PetscSFOps);
 56:   PetscInt        nroots;          /* Number of root vertices on current process (candidates for incoming edges) */
 57:   PetscInt        nleaves;         /* Number of leaf vertices on current process (this process specifies a root for each leaf) */
 58:   PetscInt        *mine;           /* Location of leaves in leafdata arrays provided to the communication routines */
 59:   PetscInt        *mine_alloc;
 60:   PetscInt        minleaf,maxleaf;
 61:   PetscSFNode     *remote;         /* Remote references to roots for each local leaf */
 62:   PetscSFNode     *remote_alloc;
 63:   PetscInt        nranks;          /* Number of ranks owning roots connected to my leaves */
 64:   PetscInt        ndranks;         /* Number of ranks in distinguished group holding roots connected to my leaves */
 65:   PetscMPIInt     *ranks;          /* List of ranks referenced by "remote" */
 66:   PetscInt        *roffset;        /* Array of length nranks+1, offset in rmine/rremote for each rank */
 67:   PetscInt        *rmine;          /* Concatenated array holding local indices referencing each remote rank */
 68:   PetscInt        *rremote;        /* Concatenated array holding remote indices referenced for each remote rank */
 69:   PetscBool       degreeknown;     /* The degree is currently known, do not have to recompute */
 70:   PetscInt        *degree;         /* Degree of each of my root vertices */
 71:   PetscInt        *degreetmp;      /* Temporary local array for computing degree */
 72:   PetscBool       rankorder;       /* Sort ranks for gather and scatter operations */
 73:   MPI_Group       ingroup;         /* Group of processes connected to my roots */
 74:   MPI_Group       outgroup;        /* Group of processes connected to my leaves */
 75:   PetscSF         multi;           /* Internal graph used to implement gather and scatter operations */
 76:   PetscBool       graphset;        /* Flag indicating that the graph has been set, required before calling communication routines */
 77:   PetscBool       setupcalled;     /* Type and communication structures have been set up */
 78:   PetscSFPackOpt  leafpackopt;     /* Optimization plans to (un)pack leaves connected to remote roots, based on index patterns in rmine[]. NULL for no optimization */
 79:   PetscSFPackOpt  selfleafpackopt; /* Optimization plans to (un)pack leaves connected to local roots */
 80:   PetscBool       selfleafdups;    /* Indices of leaves in rmine[0,roffset[ndranks]) have dups, implying theads working ... */
 81:                                    /* ... on these leaves in parallel may have data race. */
 82:   PetscBool       remoteleafdups;  /* Indices of leaves in rmine[roffset[ndranks],roffset[nranks]) have dups */

 84:   PetscSFPattern  pattern;         /* Pattern of the graph */
 85:   PetscLayout     map;             /* Layout of leaves over all processes when building a patterned graph */
 86:   PetscBool       use_pinned_buf;  /* Whether use pinned (i.e., non-pagable) host memory for send/recv buffers */
 87: #if defined(PETSC_HAVE_CUDA)
 88:   PetscInt        *rmine_d;        /* A copy of rmine in device memory */
 89:   PetscInt        maxResidentThreadsPerGPU;
 90: #endif
 91:   void *data;                      /* Pointer to implementation */
 92: };

 94: PETSC_EXTERN PetscBool PetscSFRegisterAllCalled;
 95: PETSC_EXTERN PetscErrorCode PetscSFRegisterAll(void);

 97: PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF,PetscSF*);
 98: PETSC_INTERN PetscErrorCode PetscSFBcastToZero_Private(PetscSF,MPI_Datatype,const void*,void*);

100: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_unwrap(MPI_Datatype,MPI_Datatype*,PetscBool*);
101: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare(MPI_Datatype,MPI_Datatype,PetscBool*);
102: PETSC_EXTERN PetscErrorCode MPIPetsc_Type_compare_contig(MPI_Datatype,MPI_Datatype,PetscInt*);

104: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
105: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req)     MPI_Iscatter(a,b,c,d,e,f,g,h,req)
106: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req)  MPI_Iscatterv(a,b,c,d,e,f,g,h,i,req)
107: #define MPIU_Igather(a,b,c,d,e,f,g,h,req)      MPI_Igather(a,b,c,d,e,f,g,h,req)
108: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req)   MPI_Igatherv(a,b,c,d,e,f,g,h,i,req)
109: #define MPIU_Iallgather(a,b,c,d,e,f,g,req)     MPI_Iallgather(a,b,c,d,e,f,g,req)
110: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req)  MPI_Iallgatherv(a,b,c,d,e,f,g,h,req)
111: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req)      MPI_Ialltoall(a,b,c,d,e,f,g,req)
112: #else
113: /* Ignore req, the MPI_Request argument, and use MPI blocking collectives. One should initialize req
114:    to MPI_REQUEST_NULL so that one can do MPI_Wait(req,status) no matter the call is blocking or not.
115:  */
116: #define MPIU_Iscatter(a,b,c,d,e,f,g,h,req)     MPI_Scatter(a,b,c,d,e,f,g,h)
117: #define MPIU_Iscatterv(a,b,c,d,e,f,g,h,i,req)  MPI_Scatterv(a,b,c,d,e,f,g,h,i)
118: #define MPIU_Igather(a,b,c,d,e,f,g,h,req)      MPI_Gather(a,b,c,d,e,f,g,h)
119: #define MPIU_Igatherv(a,b,c,d,e,f,g,h,i,req)   MPI_Gatherv(a,b,c,d,e,f,g,h,i)
120: #define MPIU_Iallgather(a,b,c,d,e,f,g,req)     MPI_Allgather(a,b,c,d,e,f,g)
121: #define MPIU_Iallgatherv(a,b,c,d,e,f,g,h,req)  MPI_Allgatherv(a,b,c,d,e,f,g,h)
122: #define MPIU_Ialltoall(a,b,c,d,e,f,g,req)      MPI_Alltoall(a,b,c,d,e,f,g)
123: #endif

125: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *mtype)
126: {
129:   *mtype = PETSC_MEMTYPE_HOST;
130: #if defined(PETSC_HAVE_CUDA)
131:   {
132:     struct cudaPointerAttributes attr;
133:     if (data) {
134: #if (CUDART_VERSION < 10000)
135:       attr.memoryType = cudaMemoryTypeHost;
136:       cudaPointerGetAttributes(&attr,data);
137:       cudaGetLastError();
138:       if (attr.memoryType == cudaMemoryTypeDevice) *mtype = PETSC_MEMTYPE_DEVICE;
139: #else
140:       attr.type = cudaMemoryTypeHost;
141:       cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing host pointer will return cudaErrorInvalidValue */
142:       cudaGetLastError(); /* Get and then clear the last error */
143:       if (attr.type == cudaMemoryTypeDevice || attr.type == cudaMemoryTypeManaged) *mtype = PETSC_MEMTYPE_DEVICE;
144: #endif
145:     }
146:   }
147: #endif
148:   return(0);
149: }

151: #if defined(PETSC_HAVE_CUDA)
152: PETSC_STATIC_INLINE PetscErrorCode PetscMallocPinnedMemory(size_t size,void** ptr)
153: {
154:   cudaError_t cerr;
156:   cerr = cudaMallocHost(ptr,size);CHKERRCUDA(cerr);
157:   return(0);
158: }

160: PETSC_STATIC_INLINE PetscErrorCode PetscFreePinnedMemory_Private(void* ptr)
161: {
162:   cudaError_t cerr;
164:   cerr = cudaFreeHost(ptr);CHKERRCUDA(cerr);
165:   return(0);
166: }
167: #define PetscFreePinnedMemory(p) ((p) && (PetscFreePinnedMemory_Private(p) || ((p)=NULL,0)))
168: #endif

170: PETSC_STATIC_INLINE PetscErrorCode PetscMallocWithMemType(PetscMemType mtype,size_t size,void** ptr)
171: {
173:   if (mtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscMalloc(size,ptr);}
174: #if defined(PETSC_HAVE_CUDA)
175:   else if (mtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMalloc(ptr,size);CHKERRCUDA(err);}
176: #endif
177:   else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %d", (int)mtype);
178:   return(0);
179: }

181: PETSC_STATIC_INLINE PetscErrorCode PetscFreeWithMemType_Private(PetscMemType mtype,void* ptr)
182: {
184:   if (mtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscFree(ptr);}
185: #if defined(PETSC_HAVE_CUDA)
186:   else if (mtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaFree(ptr);CHKERRCUDA(err);}
187: #endif
188:   else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %d",(int)mtype);
189:   return(0);
190: }

192: /* Free memory and set ptr to NULL when succeeded */
193: #define PetscFreeWithMemType(t,p) ((p) && (PetscFreeWithMemType_Private((t),(p)) || ((p)=NULL,0)))

195: PETSC_STATIC_INLINE PetscErrorCode PetscMemcpyWithMemType(PetscMemType dstmtype,PetscMemType srcmtype,void* dst,const void*src,size_t n)
196: {
198:   if (n) {
199:     if (dstmtype == PETSC_MEMTYPE_HOST && srcmtype == PETSC_MEMTYPE_HOST) {PetscErrorCode PetscMemcpy(dst,src,n);}
200: #if defined(PETSC_HAVE_CUDA)
201:     else if (dstmtype == PETSC_MEMTYPE_DEVICE && srcmtype == PETSC_MEMTYPE_HOST)   {
202:       cudaError_t    err  = cudaMemcpy(dst,src,n,cudaMemcpyHostToDevice);CHKERRCUDA(err);
203:       PetscErrorCode PetscLogCpuToGpu(n);
204:     } else if (dstmtype == PETSC_MEMTYPE_HOST && srcmtype == PETSC_MEMTYPE_DEVICE) {
205:       cudaError_t     err = cudaMemcpy(dst,src,n,cudaMemcpyDeviceToHost);CHKERRCUDA(err);
206:       PetscErrorCode PetscLogGpuToCpu(n);
207:     } else if (dstmtype == PETSC_MEMTYPE_DEVICE && srcmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaMemcpy(dst,src,n,cudaMemcpyDeviceToDevice);CHKERRCUDA(err);}
208: #endif
209:     else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType for dst %d and src %d",(int)dstmtype,(int)srcmtype);
210:   }
211:   return(0);
212: }

214: #endif