Actual source code: ex51.c
petsc-3.3-p7 2013-05-11
  2: static char help[] = "This example solves a linear system in parallel with KSP.  The matrix\n\
  3: uses arbitrary order polynomials for finite elements on the unit square.  To test the parallel\n\
  4: matrix assembly, the matrix is intentionally laid out across processors\n\
  5: differently from the way it is assembled.  Input arguments are:\n\
  6:   -m <size> -p <order> : mesh size and polynomial order\n\n";

  8: /* Contributed by Travis Austin <austin@txcorp.com>, 2010.
  9:    based on src/ksp/ksp/examples/tutorials/ex3.c
 10:  */

 12: #include <petscksp.h>

 14: /* Declare user-defined routines */
 15: static PetscReal      src(PetscReal,PetscReal);
 16: static PetscReal      ubdy(PetscReal,PetscReal);
 17: static PetscReal      polyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
 18: static PetscReal      derivPolyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
 19: static PetscErrorCode Form1DElementMass(PetscReal,PetscInt,double*,double*,PetscScalar*);
 20: static PetscErrorCode Form1DElementStiffness(PetscReal,PetscInt,double*,double*,PetscScalar*);
 21: static PetscErrorCode Form2DElementMass(PetscInt,PetscScalar*,PetscScalar*);
 22: static PetscErrorCode Form2DElementStiffness(PetscInt,PetscScalar*,PetscScalar*,PetscScalar*);
 23: static PetscErrorCode FormNodalRhs(PetscInt,PetscReal,PetscReal,PetscReal,double*,PetscScalar*);
 24: static PetscErrorCode FormNodalSoln(PetscInt,PetscReal,PetscReal,PetscReal,double*,PetscScalar*);
 25: static void leggaulob(double, double, double [], double [], int);
 26: static void qAndLEvaluation(int, double, double*, double*, double*);

 30: int main(int argc,char **args)
 31: {
 32:   PetscInt       p = 2, m = 5;
 33:   PetscInt       num1Dnodes, num2Dnodes;
 34:   PetscScalar    *Ke1D,*Ke2D,*Me1D,*Me2D;
 35:   PetscScalar    *r,*ue,val;
 36:   Vec            u,ustar,b,q;
 37:   Mat            A,Mass;
 38:   KSP            ksp;
 39:   PetscInt       M,N;
 40:   PetscMPIInt    rank,size;
 41:   PetscReal      x,y,h,norm;
 42:   PetscInt       *idx,indx,count,*rows,i,j,k,start,end,its;
 43:   PetscReal      *rowsx,*rowsy;
 44:   PetscReal      *gllNode, *gllWgts;

 47:   PetscInitialize(&argc,&args,(char *)0,help);
 48:   PetscOptionsBegin(PETSC_COMM_WORLD,PETSC_NULL,"Options for p-FEM","");
 49:   PetscOptionsInt("-m","Number of elements in each direction","None",m,&m,PETSC_NULL);
 50:   PetscOptionsInt("-p","Order of each element (tensor product basis)","None",p,&p,PETSC_NULL);
 51:   PetscOptionsEnd();
 52:   N = (p*m+1)*(p*m+1);  /* dimension of matrix */
 53:   M = m*m; /* number of elements */
 54:   h = 1.0/m; /* mesh width */
 55:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 56:   MPI_Comm_size(PETSC_COMM_WORLD,&size);

 58:   /* Create stiffness matrix */
 59:   MatCreate(PETSC_COMM_WORLD,&A);
 60:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,N,N);
 61:   MatSetFromOptions(A);
 62:   start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank);
 63:   end   = start + M/size + ((M%size) > rank);

 65:   /* Create matrix  */
 66:   MatCreate(PETSC_COMM_WORLD,&Mass);
 67:   MatSetSizes(Mass,PETSC_DECIDE,PETSC_DECIDE,N,N);
 68:   MatSetFromOptions(Mass);
 69:   start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank);
 70:   end   = start + M/size + ((M%size) > rank);

 72:   /* Allocate element stiffness matrices */
 73:   num1Dnodes = (p+1);
 74:   num2Dnodes = num1Dnodes*num1Dnodes;
 75:   PetscMalloc((num1Dnodes*num1Dnodes)*sizeof(PetscScalar),&Me1D);
 76:   PetscMalloc((num1Dnodes*num1Dnodes)*sizeof(PetscScalar),&Ke1D);
 77:   PetscMalloc((num2Dnodes*num2Dnodes)*sizeof(PetscScalar),&Me2D);
 78:   PetscMalloc((num2Dnodes*num2Dnodes)*sizeof(PetscScalar),&Ke2D);
 79:   PetscMalloc(num2Dnodes*sizeof(PetscInt),&idx);
 80:   PetscMalloc(num2Dnodes*sizeof(PetscScalar),&r);
 81:   PetscMalloc(num2Dnodes*sizeof(PetscScalar),&ue);

 83:   /* Allocate quadrature and create stiffness matrices */
 84:   PetscMalloc((p+1)*sizeof(PetscReal),&gllNode);
 85:   PetscMalloc((p+1)*sizeof(PetscReal),&gllWgts);
 86:   leggaulob(0.0,1.0,gllNode,gllWgts,p); // Get GLL nodes and weights
 87:   Form1DElementMass(h,p,gllNode,gllWgts,Me1D);
 88:   Form1DElementStiffness(h,p,gllNode,gllWgts,Ke1D);
 89:   Form2DElementMass(p,Me1D,Me2D);
 90:   Form2DElementStiffness(p,Ke1D,Me1D,Ke2D);

 92:   /* Assemble matrix */
 93:   for (i=start; i<end; i++) {
 94:      indx = 0;
 95:      for(k=0;k<(p+1);++k) {
 96:        for(j=0;j<(p+1);++j) {
 97:          idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
 98:        }
 99:      }
100:      MatSetValues(A,num2Dnodes,idx,num2Dnodes,idx,Ke2D,ADD_VALUES);
101:      MatSetValues(Mass,num2Dnodes,idx,num2Dnodes,idx,Me2D,ADD_VALUES);
102:   }
103:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
104:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
105:   MatAssemblyBegin(Mass,MAT_FINAL_ASSEMBLY);
106:   MatAssemblyEnd(Mass,MAT_FINAL_ASSEMBLY);

108:   PetscFree(Me1D);
109:   PetscFree(Ke1D);
110:   PetscFree(Me2D);
111:   PetscFree(Ke2D);

113:   /* Create right-hand-side and solution vectors */
114:   VecCreate(PETSC_COMM_WORLD,&u);
115:   VecSetSizes(u,PETSC_DECIDE,N);
116:   VecSetFromOptions(u);
117:   PetscObjectSetName((PetscObject)u,"Approx. Solution");
118:   VecDuplicate(u,&b);
119:   PetscObjectSetName((PetscObject)b,"Right hand side");
120:   VecDuplicate(u,&q);
121:   PetscObjectSetName((PetscObject)q,"Right hand side 2");
122:   VecDuplicate(b,&ustar);
123:   VecSet(u,0.0);
124:   VecSet(b,0.0);
125:   VecSet(q,0.0);

127:   /* Assemble nodal right-hand-side and soln vector  */
128:   for (i=start; i<end; i++) {
129:      x = h*(i % m);
130:      y = h*(i/m);
131:      indx = 0;
132:      for(k=0;k<(p+1);++k) {
133:        for(j=0;j<(p+1);++j) {
134:          idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
135:        }
136:      }
137:      FormNodalRhs(p,x,y,h,gllNode,r);
138:      FormNodalSoln(p,x,y,h,gllNode,ue);
139:      VecSetValues(q,num2Dnodes,idx,r,INSERT_VALUES);
140:      VecSetValues(ustar,num2Dnodes,idx,ue,INSERT_VALUES);
141:   }
142:   VecAssemblyBegin(q);
143:   VecAssemblyEnd(q);
144:   VecAssemblyBegin(ustar);
145:   VecAssemblyEnd(ustar);

147:   PetscFree(idx);
148:   PetscFree(r);
149:   PetscFree(ue);

151:   /* Get FE right-hand side vector */
152:   MatMult(Mass,q,b);

154:   /* Modify matrix and right-hand-side for Dirichlet boundary conditions */
155:   PetscMalloc(4*p*m*sizeof(PetscInt),&rows);
156:   PetscMalloc(4*p*m*sizeof(PetscReal),&rowsx);
157:   PetscMalloc(4*p*m*sizeof(PetscReal),&rowsy);
158:   for (i=0; i<p*m+1; i++) {
159:     rows[i] = i;   /* bottom */
160:     rowsx[i] = (i/p)*h+gllNode[i%p]*h;
161:     rowsy[i] = 0.0;
162:     rows[3*p*m-1+i] = (p*m)*(p*m+1) + i; /* top */
163:     rowsx[3*p*m-1+i] = (i/p)*h+gllNode[i%p]*h;
164:     rowsy[3*p*m-1+i] = 1.0;
165:   }
166:   count = p*m+1;/* left side */
167:   indx = 1;
168:   for (i=p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
169:     rows[count] = i;
170:     rowsx[count] = 0.0;
171:     rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
172:     indx++;
173:   }
174:   count = 2*p*m; /* right side */
175:   indx = 1;
176:   for (i=2*p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
177:     rows[count] = i;
178:     rowsx[count] = 1.0;
179:     rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
180:     indx++;
181:   }
182:   for (i=0; i<4*p*m; i++) {
183:     x = rowsx[i];
184:     y = rowsy[i];
185:     val = ubdy(x,y);
186:     VecSetValues(b,1,&rows[i],&val,INSERT_VALUES);
187:     VecSetValues(u,1,&rows[i],&val,INSERT_VALUES);
188:   }
189:   MatZeroRows(A,4*p*m,rows,1.0,0,0);
190:   PetscFree(rows);
191:   PetscFree(rowsx);
192:   PetscFree(rowsy);

194:   VecAssemblyBegin(u);
195:   VecAssemblyEnd(u);
196:   VecAssemblyBegin(b);
197:   VecAssemblyEnd(b);

199:   /* Solve linear system */
200:   KSPCreate(PETSC_COMM_WORLD,&ksp);
201:   KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN);
202:   KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);
203:   KSPSetFromOptions(ksp);
204:   KSPSolve(ksp,b,u);

206:   /* Check error */
207:   VecAXPY(u,-1.0,ustar);
208:   VecNorm(u,NORM_2,&norm);
209:   KSPGetIterationNumber(ksp,&its);
210:   PetscPrintf(PETSC_COMM_WORLD,"Norm of error %G Iterations %D\n",norm*h,its);

212:   PetscFree(gllNode);
213:   PetscFree(gllWgts);

215:   KSPDestroy(&ksp);
216:   VecDestroy(&u);
217:   VecDestroy(&b);
218:   VecDestroy(&q);
219:   VecDestroy(&ustar);
220:   MatDestroy(&A);
221:   MatDestroy(&Mass);

223:   PetscFinalize();
224:   return 0;
225: }

227: /* --------------------------------------------------------------------- */

231:    /* 1d element stiffness mass matrix  */
232: static PetscErrorCode Form1DElementMass(PetscReal H,PetscInt P,double* gqn,
233:                                  double* gqw,PetscScalar *Me1D)
234: {
235:   int i,j,k;
236:   int indx;

239:   for(j=0;j<(P+1);++j) {
240:     for(i=0;i<(P+1);++i) {
241:       indx = j*(P+1)+i;
242:       Me1D[indx] = 0.0;
243:       for(k=0; k<(P+1);++k) {
244:         Me1D[indx] += H*gqw[k]*polyBasisFunc(P,i,gqn,gqn[k])*polyBasisFunc(P,j,gqn,gqn[k]);
245:       }
246:     }
247:   }
248:   return(0);
249: }

251: /* --------------------------------------------------------------------- */

255:    /* 1d element stiffness matrix for derivative */
256: static PetscErrorCode Form1DElementStiffness(PetscReal H,PetscInt P,double* gqn,
257:                                       double* gqw,PetscScalar *Ke1D)
258: {
259:   int i,j,k;
260:   int indx;

263:   for(j=0;j<(P+1);++j) {
264:     for(i=0;i<(P+1);++i) {
265:       indx = j*(P+1)+i;
266:       Ke1D[indx] = 0.0;
267:       for(k=0; k<(P+1);++k) {
268:         Ke1D[indx] += (1./H)*gqw[k]*derivPolyBasisFunc(P,i,gqn,gqn[k])*derivPolyBasisFunc(P,j,gqn,gqn[k]);
269:       }
270:     }
271:   }

273:   return(0);
274: }

276: /* --------------------------------------------------------------------- */

280:    /* element mass matrix */
281: static PetscErrorCode Form2DElementMass(PetscInt P,PetscScalar *Me1D,PetscScalar *Me2D)
282: {
283:   int i1,j1,i2,j2;
284:   int indx1,indx2,indx3;

287:   for(j2=0;j2<(P+1);++j2) {
288:     for(i2=0; i2<(P+1);++i2) {
289:       for(j1=0;j1<(P+1);++j1) {
290:         for(i1=0;i1<(P+1);++i1) {
291:           indx1 = j1*(P+1)+i1;
292:           indx2 = j2*(P+1)+i2;
293:           indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
294:           Me2D[indx3] = Me1D[indx1]*Me1D[indx2];
295:         }
296:       }
297:     }
298:   }
299:   return(0);
300: }

302: /* --------------------------------------------------------------------- */

306:    /* element stiffness for Laplacian */
307: static PetscErrorCode Form2DElementStiffness(PetscInt P,PetscScalar *Ke1D,PetscScalar *Me1D,
308:                                       PetscScalar *Ke2D)
309: {
310:   int i1,j1,i2,j2;
311:   int indx1,indx2,indx3;

314:   for(j2=0;j2<(P+1);++j2) {
315:     for(i2=0; i2<(P+1);++i2) {
316:       for(j1=0;j1<(P+1);++j1) {
317:         for(i1=0;i1<(P+1);++i1) {
318:           indx1 = j1*(P+1)+i1;
319:           indx2 = j2*(P+1)+i2;
320:           indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
321:           Ke2D[indx3] = Ke1D[indx1]*Me1D[indx2] + Me1D[indx1]*Ke1D[indx2];
322:         }
323:       }
324:     }
325:   }
326:   return(0);
327: }

329: /* --------------------------------------------------------------------- */

333: static PetscErrorCode FormNodalRhs(PetscInt P,PetscReal x,PetscReal y,PetscReal H,double* nds,PetscScalar *r)
334: {
335:   int i,j,indx;

338:   indx=0;
339:   for(j=0;j<(P+1);++j) {
340:     for(i=0;i<(P+1);++i) {
341:       r[indx] = src(x+H*nds[i],y+H*nds[j]);
342:       indx++;
343:     }
344:   }
345:   return(0);
346: }

348: /* --------------------------------------------------------------------- */

352: static PetscErrorCode FormNodalSoln(PetscInt P,PetscReal x,PetscReal y,PetscReal H,double* nds,PetscScalar *u)
353: {
354:   int i,j,indx;

357:   indx=0;
358:   for(j=0;j<(P+1);++j) {
359:     for(i=0;i<(P+1);++i) {
360:       u[indx] = ubdy(x+H*nds[i],y+H*nds[j]);
361:       indx++;
362:     }
363:   }
364:   return(0);
365: }

367: /* --------------------------------------------------------------------- */

371: static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal* xLocVal, PetscReal xval) {
372:   PetscReal denominator = 1.;
373:   PetscReal numerator = 1.;
374:   PetscInt i=0;

376:   for (i=0; i<(order+1); i++) {
377:     if (i!=basis) {
378:       numerator *= (xval-xLocVal[i]);
379:       denominator *= (xLocVal[basis]-xLocVal[i]);
380:     }
381:   }
382:   return (numerator/denominator);
383: }

385: /* --------------------------------------------------------------------- */

389: static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal* xLocVal, PetscReal xval) {
390:   PetscReal denominator;
391:   PetscReal numerator;
392:   PetscReal numtmp;
393:   PetscInt i=0,j=0;

395:   denominator=1.;
396:   for (i=0; i<(order+1); i++) {
397:     if (i!=basis) {
398:       denominator *= (xLocVal[basis]-xLocVal[i]);
399:     }
400:   }
401:   numerator = 0.;
402:   for(j=0;j<(order+1);++j) {
403:     if(j != basis) {
404:       numtmp = 1.;
405:       for(i=0;i<(order+1);++i) {
406:         if(i!=basis && j!=i) {
407:           numtmp *= (xval-xLocVal[i]);
408:         }
409:       }
410:       numerator += numtmp;
411:     }
412:   }

414:   return (numerator/denominator);
415: }

417: /* --------------------------------------------------------------------- */

419: static PetscReal ubdy(PetscReal x,PetscReal y)
420: {
421:   return x*x*y*y;
422: }

424: static PetscReal src(PetscReal x,PetscReal y)
425: {
426:   return -2.*y*y - 2.*x*x;
427: }
428: /* --------------------------------------------------------------------- */

430: static void leggaulob(double x1, double x2, double x[], double w[], int n)
431: /*******************************************************************************
432: Given the lower and upper limits of integration x1 and x2, and given n, this
433: routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas
434: and weights of the Gauss-Lobatto-Legendre n-point quadrature formula.
435: *******************************************************************************/
436: {
437:   int j,m;
438:   double z1,z,xm,xl,q,qp,Ln,scale;
439:   double pi=3.1415926535897932385;
440:   if(n==1) {
441:     x[0]=x1;   /* Scale the root to the desired interval, */
442:     x[1]=x2;   /* and put in its symmetric counterpart.   */
443:     w[0]=1.;   /* Compute the weight */
444:     w[1]=1.;   /* and its symmetric counterpart. */
445:   } else {
446:     x[0]=x1;   /* Scale the root to the desired interval, */
447:     x[n]=x2;   /* and put in its symmetric counterpart.   */
448:     w[0]=2./(n*(n+1));;   /* Compute the weight */
449:     w[n]=2./(n*(n+1));   /* and its symmetric counterpart. */
450:     m=(n+1)/2; /* The roots are symmetric, so we only find half of them. */
451:     xm=0.5*(x2+x1);
452:     xl=0.5*(x2-x1);
453:     for (j=1;j<=(m-1);j++) { /* Loop over the desired roots. */
454:       z=-1.0*cos((pi*(j+0.25)/(n))-(3.0/(8.0*n*pi))*(1.0/(j+0.25)));
455:       /* Starting with the above approximation to the ith root, we enter */
456:       /* the main loop of refinement by Newton's method.                 */
457:       do {
458:         qAndLEvaluation(n,z,&q,&qp,&Ln);
459:         z1=z;
460:         z=z1-q/qp; /* Newton's method. */
461:       } while (fabs(z-z1) > 3.0e-11);
462:       qAndLEvaluation(n,z,&q,&qp,&Ln);
463:       x[j]=xm+xl*z;      /* Scale the root to the desired interval, */
464:       x[n-j]=xm-xl*z;  /* and put in its symmetric counterpart.   */
465:       w[j]=2.0/(n*(n+1)*Ln*Ln); /* Compute the weight */
466:       w[n-j]=w[j];                 /* and its symmetric counterpart. */
467:     }
468:   }
469:   if(n%2==0) {
470:     qAndLEvaluation(n,0.0,&q,&qp,&Ln);
471:     x[n/2]=(x2-x1)/2.0;
472:     w[n/2]=2.0/(n*(n+1)*Ln*Ln);
473:   }
474:   /* scale the weights according to mapping from [-1,1] to [0,1] */
475:   scale = (x2-x1)/2.0;
476:   for(j=0;j<=n;++j) w[j] = w[j]*scale;
477: }


480: /******************************************************************************/
481: static void qAndLEvaluation(int n, double x, double* q, double* qp, double* Ln)
482: /*******************************************************************************
483: Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in
484: addition to L_N(x) as these are needed for the GLL points.  See the book titled
485: "Implementing Spectral Methods for Partial Differential Equations: Algorithms
486: for Scientists and Engineers" by David A. Kopriva.
487: *******************************************************************************/
488: {
489:   int k;

491:   double Lnp;
492:   double Lnp1, Lnp1p;
493:   double Lnm1, Lnm1p;
494:   double Lnm2, Lnm2p;

496:   Lnm1 = 1.0;
497:   *Ln = x;
498:   Lnm1p = 0.0;
499:   Lnp = 1.0;

501:   for(k=2; k<=n; ++k) {
502:     Lnm2 = Lnm1;
503:     Lnm1 = *Ln;
504:     Lnm2p = Lnm1p;
505:     Lnm1p = Lnp;
506:     *Ln = (2.*k-1.)/(1.0*k)*x*Lnm1 - (k-1.)/(1.0*k)*Lnm2;
507:     Lnp = Lnm2p + (2.0*k-1)*Lnm1;
508:   }
509:   k=n+1;
510:   Lnp1 = (2.*k-1.)/(1.0*k)*x*(*Ln) - (k-1.)/(1.0*k)*Lnm1;
511:   Lnp1p = Lnm1p + (2.0*k-1)*(*Ln);
512:   *q = Lnp1 - Lnm1;
513:   *qp = Lnp1p - Lnm1p;
514: }