Actual source code: ex47cu.cu
1: static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n";
2: /*
3: Same as ex47.c except it also uses the GPU to evaluate the function
4: */
6: #include <petscdmda.h>
7: #include <petscsnes.h>
8: #include <petsccusp.h>
11: PetscBool useCUSP = PETSC_FALSE;
13: int main(int argc,char **argv)
14: {
15: SNES snes;
16: Vec x,f;
17: Mat J;
18: DM da;
20: char *tmp,typeName[256];
21: PetscBool flg;
23: PetscInitialize(&argc,&argv,(char *)0,help);
24: PetscOptionsGetString(PETSC_NULL,"-da_vec_type",typeName,256,&flg);
25: if (flg) {
26: PetscStrstr(typeName,"cusp",&tmp);
27: if (tmp) useCUSP = PETSC_TRUE;
28: }
30: DMDACreate1d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,-8,1,1,PETSC_NULL,&da);
31: DMCreateGlobalVector(da,&x); VecDuplicate(x,&f);
32: DMGetMatrix(da,MATAIJ,&J);
34: SNESCreate(PETSC_COMM_WORLD,&snes);
35: SNESSetFunction(snes,f,ComputeFunction,da);
36: SNESSetJacobian(snes,J,J,ComputeJacobian,da);
37: SNESSetFromOptions(snes);
38: SNESSolve(snes,PETSC_NULL,x);
40: MatDestroy(&J);
41: VecDestroy(&x);
42: VecDestroy(&f);
43: SNESDestroy(&snes);
44: DMDestroy(&da);
46: PetscFinalize();
47: return 0;
48: }
50: struct ApplyStencil
51: {
52: template <typename Tuple>
53: __host__ __device__
54: void operator()(Tuple t)
55: {
56: /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
57: thrust::get<0>(t) = 1;
58: if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) {
59: thrust::get<0>(t) = (2.0*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t));
60: } else if (thrust::get<4>(t) == 0) {
61: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
62: } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) {
63: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
64: }
65:
66: }
67: };
69: PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx)
70: {
71: PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart;
72: PetscScalar *xx,*ff,hx;
73: DM da = (DM) ctx;
74: Vec xlocal;
76: PetscMPIInt rank,size;
77: MPI_Comm comm;
78: CUSPARRAY *xarray,*farray;
80: DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
81: hx = 1.0/(PetscReal)(Mx-1);
82: DMGetLocalVector(da,&xlocal);
83: DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
84: DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);
86: if (useCUSP) {
87: VecCUSPGetArrayRead(xlocal,&xarray);
88: VecCUSPGetArrayWrite(f,&farray);
89: PetscObjectGetComm((PetscObject)da,&comm);
90: MPI_Comm_size(comm,&size);
91: MPI_Comm_rank(comm,&rank);
92: if (rank) xstartshift = 1; else xstartshift = 0;
93: if (rank != size-1) xendshift = 1; else xendshift = 0;
94: VecGetOwnershipRange(f,&fstart,PETSC_NULL);
95: try {
96: thrust::for_each(
97: thrust::make_zip_iterator(
98: thrust::make_tuple(
99: farray->begin(),
100: xarray->begin()+xstartshift,
101: xarray->begin()+xstartshift + 1,
102: xarray->begin()+xstartshift - 1,
103: thrust::counting_iterator<int>(fstart),
104: thrust::constant_iterator<int>(Mx),
105: thrust::constant_iterator<PetscScalar>(hx))),
106: thrust::make_zip_iterator(
107: thrust::make_tuple(
108: farray->end(),
109: xarray->end()-xendshift,
110: xarray->end()-xendshift + 1,
111: xarray->end()-xendshift - 1,
112: thrust::counting_iterator<int>(fstart) + x->map->n,
113: thrust::constant_iterator<int>(Mx),
114: thrust::constant_iterator<PetscScalar>(hx))),
115: ApplyStencil());
116: }
117: catch(char* all){
118: PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");
119: }
120: VecCUSPRestoreArrayRead(xlocal,&xarray);
121: VecCUSPRestoreArrayWrite(f,&farray);
122: } else {
123: DMDAVecGetArray(da,xlocal,&xx);
124: DMDAVecGetArray(da,f,&ff);
125: DMDAGetCorners(da,&xs,PETSC_NULL,PETSC_NULL,&xm,PETSC_NULL,PETSC_NULL);
126:
127: for (i=xs; i<xs+xm; i++) {
128: if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx;
129: else ff[i] = (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]);
130: }
131: DMDAVecRestoreArray(da,xlocal,&xx);
132: DMDAVecRestoreArray(da,f,&ff);
133: }
134: DMRestoreLocalVector(da,&xlocal);
135: // VecView(x,0);printf("f\n");
136: // VecView(f,0);
137: return 0;
139: }
140: PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat *J,Mat *B,MatStructure *flag,void *ctx)
141: {
142: DM da = (DM) ctx;
143: PetscInt i,Mx,xm,xs;
144: PetscScalar hx,*xx;
145: Vec xlocal;
148: DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
149: hx = 1.0/(PetscReal)(Mx-1);
150: DMGetLocalVector(da,&xlocal);DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
151: DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);
152: DMDAVecGetArray(da,xlocal,&xx);
153: DMDAGetCorners(da,&xs,PETSC_NULL,PETSC_NULL,&xm,PETSC_NULL,PETSC_NULL);
155: for (i=xs; i<xs+xm; i++) {
156: if (i == 0 || i == Mx-1) {
157: MatSetValue(*J,i,i,1.0/hx,INSERT_VALUES);
158: } else {
159: MatSetValue(*J,i,i-1,-1.0/hx,INSERT_VALUES);
160: MatSetValue(*J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES);
161: MatSetValue(*J,i,i+1,-1.0/hx,INSERT_VALUES);
162: }
163: }
164: MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
165: MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
166: *flag = SAME_NONZERO_PATTERN;
167: DMDAVecRestoreArray(da,xlocal,&xx);
168: DMRestoreLocalVector(da,&xlocal);
169: return 0;}