/* The memory scalable AO application ordering routines. These store the orderings on each processor for that processor's range of values */ #include <../src/vec/is/ao/aoimpl.h> /*I "petscao.h" I*/ typedef struct { PetscInt *app_loc; /* app_loc[i] is the partner for the ith local PETSc slot */ PetscInt *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */ PetscLayout map; /* determines the local sizes of ao */ } AO_MemoryScalable; /* All processors ship the data to process 0 to be printed; note that this is not scalable because process 0 allocates space for all the orderings entry across all the processes */ PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer) { PetscErrorCode ierr; PetscMPIInt rank,size; AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; PetscBool iascii; PetscMPIInt tag_app,tag_petsc; PetscLayout map = aomems->map; PetscInt *app,*app_loc,*petsc,*petsc_loc,len,i,j; MPI_Status status; PetscFunctionBegin; ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name); ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRMPI(ierr); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRMPI(ierr); ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr); if (!rank) { ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer, "PETSc->App App->PETSc\n");CHKERRQ(ierr); ierr = PetscMalloc2(map->N,&app,map->N,&petsc);CHKERRQ(ierr); len = map->n; /* print local AO */ ierr = PetscViewerASCIIPrintf(viewer,"Process [%d]\n",rank);CHKERRQ(ierr); for (i=0; iapp_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr); } /* recv and print off-processor's AO */ for (i=1; irange[i+1] - map->range[i]; app_loc = app + map->range[i]; petsc_loc = petsc+ map->range[i]; ierr = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRMPI(ierr); ierr = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRMPI(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr); for (j=0; jrange[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr); } } ierr = PetscFree2(app,petsc);CHKERRQ(ierr); } else { /* send values */ ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRMPI(ierr); ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRMPI(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode AODestroy_MemoryScalable(AO ao) { AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr); ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr); ierr = PetscFree(aomems);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Input Parameters: + ao - the application ordering context . n - the number of integers in ia[] . ia - the integers; these are replaced with their mapped value - maploc - app_loc or petsc_loc in struct "AO_MemoryScalable" Output Parameter: . ia - the mapped interges */ PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,const PetscInt *maploc) { PetscErrorCode ierr; AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; MPI_Comm comm; PetscMPIInt rank,size,tag1,tag2; PetscInt *owner,*start,*sizes,nsends,nreceives; PetscInt nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2; const PetscInt *owners = aomems->map->range; MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2; MPI_Status recv_status; PetscMPIInt nindices,source,widx; PetscInt *rbuf,*sbuf; MPI_Status *send_status,*send_status2; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); /* first count number of contributors to each processor */ ierr = PetscMalloc1(size,&start);CHKERRQ(ierr); ierr = PetscCalloc2(2*size,&sizes,n,&owner);CHKERRQ(ierr); j = 0; lastidx = -1; for (i=0; i= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */ else { /* if indices are NOT locally sorted, need to start search at the beginning */ if (lastidx > (idx = ia[i])) j = 0; lastidx = idx; for (; j= owners[j] && idx < owners[j+1]) { sizes[2*j]++; /* num of indices to be sent */ sizes[2*j+1] = 1; /* send to proc[j] */ owner[i] = j; break; } } } } sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */ nsends = 0; for (i=0; idata; PetscInt *app_loc = aomems->app_loc; PetscFunctionBegin; ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia) { PetscErrorCode ierr; AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; PetscInt *petsc_loc = aomems->petsc_loc; PetscFunctionBegin; ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr); PetscFunctionReturn(0); } static struct _AOOps AOOps_MemoryScalable = { AOView_MemoryScalable, AODestroy_MemoryScalable, AOPetscToApplication_MemoryScalable, AOApplicationToPetsc_MemoryScalable, NULL, NULL, NULL, NULL }; PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc) { PetscErrorCode ierr; AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data; PetscLayout map = aomems->map; PetscInt n_local = map->n,i,j; PetscMPIInt rank,size,tag; PetscInt *owner,*start,*sizes,nsends,nreceives; PetscInt nmax,count,*sindices,*rindices,idx,lastidx; PetscInt *owners = aomems->map->range; MPI_Request *send_waits,*recv_waits; MPI_Status recv_status; PetscMPIInt nindices,widx; PetscInt *rbuf; PetscInt n=napp,ip,ia; MPI_Status *send_status; PetscFunctionBegin; ierr = PetscArrayzero(aomap_loc,n_local);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); /* first count number of contributors (of from_array[]) to each processor */ ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); ierr = PetscMalloc1(n,&owner);CHKERRQ(ierr); j = 0; lastidx = -1; for (i=0; i (idx = from_array[i])) j = 0; lastidx = idx; for (; j= owners[j] && idx < owners[j+1]) { sizes[2*j] += 2; /* num of indices to be sent - in pairs (ip,ia) */ sizes[2*j+1] = 1; /* send to proc[j] */ owner[i] = j; break; } } } sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */ nsends = 0; for (i=0; i0; j--) { ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRMPI(ierr); ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr); rbuf = rindices+nmax*widx; /* global index */ /* compute local mapping */ for (i=0; iisapp,ispetsc=ao->ispetsc; const PetscInt *mypetsc,*myapp; PetscInt napp,n_local,N,i,start,*petsc,*lens,*disp; MPI_Comm comm; AO_MemoryScalable *aomems; PetscLayout map; PetscMPIInt size,rank; PetscFunctionBegin; if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()"); /* create special struct aomems */ ierr = PetscNewLog(ao,&aomems);CHKERRQ(ierr); ao->data = (void*) aomems; ierr = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr); ierr = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr); /* transmit all local lengths of isapp to all processors */ ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); ierr = PetscMalloc2(size,&lens,size,&disp);CHKERRQ(ierr); ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr); ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRMPI(ierr); N = 0; for (i = 0; i < size; i++) { disp[i] = N; N += lens[i]; } /* If ispetsc is 0 then use "natural" numbering */ if (napp) { if (!ispetsc) { start = disp[rank]; ierr = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr); for (i=0; ibs = 1; map->N = N; ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); ao->N = N; ao->n = map->n; aomems->map = map; /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */ n_local = map->n; ierr = PetscCalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr); ierr = ISGetIndices(isapp,&myapp);CHKERRQ(ierr); ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr); ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr); ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr); if (napp) { if (ispetsc) { ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr); } else { ierr = PetscFree(petsc);CHKERRQ(ierr); } } ierr = PetscFree2(lens,disp);CHKERRQ(ierr); PetscFunctionReturn(0); } /*@C AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays. Collective Input Parameters: + comm - MPI communicator that is to share AO . napp - size of integer arrays . myapp - integer array that defines an ordering - mypetsc - integer array that defines another ordering (may be NULL to indicate the natural ordering, that is 0,1,2,3,...) Output Parameter: . aoout - the new application ordering Level: beginner Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes" in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices. Comparing with AOCreateBasic(), this routine trades memory with message communication. .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc() @*/ PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout) { PetscErrorCode ierr; IS isapp,ispetsc; const PetscInt *app=myapp,*petsc=mypetsc; PetscFunctionBegin; ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr); if (mypetsc) { ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr); } else { ispetsc = NULL; } ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr); ierr = ISDestroy(&isapp);CHKERRQ(ierr); if (mypetsc) { ierr = ISDestroy(&ispetsc);CHKERRQ(ierr); } PetscFunctionReturn(0); } /*@C AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets. Collective on IS Input Parameters: + isapp - index set that defines an ordering - ispetsc - index set that defines another ordering (may be NULL to use the natural ordering) Output Parameter: . aoout - the new application ordering Level: beginner Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates; that is there cannot be any "holes". Comparing with AOCreateBasicIS(), this routine trades memory with message communication. .seealso: AOCreateMemoryScalable(), AODestroy() @*/ PetscErrorCode AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout) { PetscErrorCode ierr; MPI_Comm comm; AO ao; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr); ierr = AOCreate(comm,&ao);CHKERRQ(ierr); ierr = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr); ierr = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr); ierr = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr); *aoout = ao; PetscFunctionReturn(0); }