Actual source code: ex47cu.cu

  1: static char help[] = "Solves -Laplacian u - exp(u) = 0,  0 < x < 1 using GPU\n\n";
  2: /*
  3:    Same as ex47.c except it also uses the GPU to evaluate the function
  4: */

  6: #include <petscdmda.h>
  7: #include <petscsnes.h>
  8: #include <petsccusp.h>

 11: PetscBool  useCUSP = PETSC_FALSE;

 13: int main(int argc,char **argv)
 14: {
 15:   SNES           snes;
 16:   Vec            x,f;
 17:   Mat            J;
 18:   DM             da;
 20:   char           *tmp,typeName[256];
 21:   PetscBool      flg;

 23:   PetscInitialize(&argc,&argv,(char *)0,help);
 24:   PetscOptionsGetString(PETSC_NULL,"-da_vec_type",typeName,256,&flg);
 25:   if (flg) {
 26:     PetscStrstr(typeName,"cusp",&tmp);
 27:     if (tmp) useCUSP = PETSC_TRUE;
 28:   }

 30:   DMDACreate1d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,-8,1,1,PETSC_NULL,&da);
 31:   DMCreateGlobalVector(da,&x); VecDuplicate(x,&f);
 32:   DMGetMatrix(da,MATAIJ,&J);

 34:   SNESCreate(PETSC_COMM_WORLD,&snes);
 35:   SNESSetFunction(snes,f,ComputeFunction,da);
 36:   SNESSetJacobian(snes,J,J,ComputeJacobian,da);
 37:   SNESSetFromOptions(snes);
 38:   SNESSolve(snes,PETSC_NULL,x);

 40:   MatDestroy(&J);
 41:   VecDestroy(&x);
 42:   VecDestroy(&f);
 43:   SNESDestroy(&snes);
 44:   DMDestroy(&da);

 46:   PetscFinalize();
 47:   return 0;
 48: }

 50: struct ApplyStencil
 51: {
 52:         template <typename Tuple>
 53:         __host__ __device__
 54:         void operator()(Tuple t)
 55:         {
 56:                 /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
 57:              thrust::get<0>(t) = 1;
 58:                 if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) {
 59:                   thrust::get<0>(t) = (2.0*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t));
 60:                 } else if (thrust::get<4>(t) == 0) {
 61:                   thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
 62:                 } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) {
 63:                   thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
 64:                 }
 65: 
 66:         }
 67: };

 69: PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx)
 70: {
 71:   PetscInt       i,Mx,xs,xm,xstartshift,xendshift,fstart;
 72:   PetscScalar    *xx,*ff,hx;
 73:   DM             da = (DM) ctx;
 74:   Vec            xlocal;
 76:   PetscMPIInt    rank,size;
 77:   MPI_Comm       comm;
 78:   CUSPARRAY      *xarray,*farray;

 80:   DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
 81:   hx     = 1.0/(PetscReal)(Mx-1);
 82:   DMGetLocalVector(da,&xlocal);
 83:   DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
 84:   DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);

 86:   if (useCUSP) {
 87:     VecCUSPGetArrayRead(xlocal,&xarray);
 88:     VecCUSPGetArrayWrite(f,&farray);
 89:     PetscObjectGetComm((PetscObject)da,&comm);
 90:     MPI_Comm_size(comm,&size);
 91:     MPI_Comm_rank(comm,&rank);
 92:     if (rank) xstartshift = 1; else xstartshift = 0;
 93:     if (rank != size-1) xendshift = 1; else xendshift = 0;
 94:     VecGetOwnershipRange(f,&fstart,PETSC_NULL);
 95:     try {
 96:       thrust::for_each(
 97:                        thrust::make_zip_iterator(
 98:                                                  thrust::make_tuple(
 99:                                                                     farray->begin(),
100:                                                                     xarray->begin()+xstartshift,
101:                                                                     xarray->begin()+xstartshift + 1,
102:                                                                     xarray->begin()+xstartshift - 1,
103:                                                                     thrust::counting_iterator<int>(fstart),
104:                                                                     thrust::constant_iterator<int>(Mx),
105:                                                                     thrust::constant_iterator<PetscScalar>(hx))),
106:                        thrust::make_zip_iterator(
107:                                                  thrust::make_tuple(
108:                                                                     farray->end(),
109:                                                                     xarray->end()-xendshift,
110:                                                                     xarray->end()-xendshift + 1,
111:                                                                     xarray->end()-xendshift - 1,
112:                                                                     thrust::counting_iterator<int>(fstart) + x->map->n,
113:                                                                     thrust::constant_iterator<int>(Mx),
114:                                                                     thrust::constant_iterator<PetscScalar>(hx))),
115:                        ApplyStencil());
116:     }
117:     catch(char* all){
118:       PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");
119:     }
120:     VecCUSPRestoreArrayRead(xlocal,&xarray);
121:     VecCUSPRestoreArrayWrite(f,&farray);
122:   } else {
123:     DMDAVecGetArray(da,xlocal,&xx);
124:     DMDAVecGetArray(da,f,&ff);
125:     DMDAGetCorners(da,&xs,PETSC_NULL,PETSC_NULL,&xm,PETSC_NULL,PETSC_NULL);
126: 
127:     for (i=xs; i<xs+xm; i++) {
128:       if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx;
129:       else  ff[i] =  (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]);
130:     }
131:     DMDAVecRestoreArray(da,xlocal,&xx);
132:     DMDAVecRestoreArray(da,f,&ff);
133:   }
134:     DMRestoreLocalVector(da,&xlocal);
135:   //  VecView(x,0);printf("f\n");
136:   //  VecView(f,0);
137:   return 0;

139: }
140: PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat *J,Mat *B,MatStructure *flag,void *ctx)
141: {
142:   DM             da = (DM) ctx;
143:   PetscInt       i,Mx,xm,xs;
144:   PetscScalar    hx,*xx;
145:   Vec            xlocal;

148:   DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
149:   hx = 1.0/(PetscReal)(Mx-1);
150:   DMGetLocalVector(da,&xlocal);DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
151:   DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);
152:   DMDAVecGetArray(da,xlocal,&xx);
153:   DMDAGetCorners(da,&xs,PETSC_NULL,PETSC_NULL,&xm,PETSC_NULL,PETSC_NULL);

155:   for (i=xs; i<xs+xm; i++) {
156:     if (i == 0 || i == Mx-1) {
157:       MatSetValue(*J,i,i,1.0/hx,INSERT_VALUES);
158:     } else {
159:       MatSetValue(*J,i,i-1,-1.0/hx,INSERT_VALUES);
160:       MatSetValue(*J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES);
161:       MatSetValue(*J,i,i+1,-1.0/hx,INSERT_VALUES);
162:     }
163:   }
164:   MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
165:   MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
166:   *flag = SAME_NONZERO_PATTERN;
167:   DMDAVecRestoreArray(da,xlocal,&xx);
168:   DMRestoreLocalVector(da,&xlocal);
169:   return 0;}