Actual source code: ex51.c
petsc-3.3-p7 2013-05-11
2: static char help[] = "This example solves a linear system in parallel with KSP. The matrix\n\
3: uses arbitrary order polynomials for finite elements on the unit square. To test the parallel\n\
4: matrix assembly, the matrix is intentionally laid out across processors\n\
5: differently from the way it is assembled. Input arguments are:\n\
6: -m <size> -p <order> : mesh size and polynomial order\n\n";
8: /* Contributed by Travis Austin <austin@txcorp.com>, 2010.
9: based on src/ksp/ksp/examples/tutorials/ex3.c
10: */
12: #include <petscksp.h>
14: /* Declare user-defined routines */
15: static PetscReal src(PetscReal,PetscReal);
16: static PetscReal ubdy(PetscReal,PetscReal);
17: static PetscReal polyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
18: static PetscReal derivPolyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
19: static PetscErrorCode Form1DElementMass(PetscReal,PetscInt,double*,double*,PetscScalar*);
20: static PetscErrorCode Form1DElementStiffness(PetscReal,PetscInt,double*,double*,PetscScalar*);
21: static PetscErrorCode Form2DElementMass(PetscInt,PetscScalar*,PetscScalar*);
22: static PetscErrorCode Form2DElementStiffness(PetscInt,PetscScalar*,PetscScalar*,PetscScalar*);
23: static PetscErrorCode FormNodalRhs(PetscInt,PetscReal,PetscReal,PetscReal,double*,PetscScalar*);
24: static PetscErrorCode FormNodalSoln(PetscInt,PetscReal,PetscReal,PetscReal,double*,PetscScalar*);
25: static void leggaulob(double, double, double [], double [], int);
26: static void qAndLEvaluation(int, double, double*, double*, double*);
30: int main(int argc,char **args)
31: {
32: PetscInt p = 2, m = 5;
33: PetscInt num1Dnodes, num2Dnodes;
34: PetscScalar *Ke1D,*Ke2D,*Me1D,*Me2D;
35: PetscScalar *r,*ue,val;
36: Vec u,ustar,b,q;
37: Mat A,Mass;
38: KSP ksp;
39: PetscInt M,N;
40: PetscMPIInt rank,size;
41: PetscReal x,y,h,norm;
42: PetscInt *idx,indx,count,*rows,i,j,k,start,end,its;
43: PetscReal *rowsx,*rowsy;
44: PetscReal *gllNode, *gllWgts;
47: PetscInitialize(&argc,&args,(char *)0,help);
48: PetscOptionsBegin(PETSC_COMM_WORLD,PETSC_NULL,"Options for p-FEM","");
49: PetscOptionsInt("-m","Number of elements in each direction","None",m,&m,PETSC_NULL);
50: PetscOptionsInt("-p","Order of each element (tensor product basis)","None",p,&p,PETSC_NULL);
51: PetscOptionsEnd();
52: N = (p*m+1)*(p*m+1); /* dimension of matrix */
53: M = m*m; /* number of elements */
54: h = 1.0/m; /* mesh width */
55: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
56: MPI_Comm_size(PETSC_COMM_WORLD,&size);
58: /* Create stiffness matrix */
59: MatCreate(PETSC_COMM_WORLD,&A);
60: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,N,N);
61: MatSetFromOptions(A);
62: start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank);
63: end = start + M/size + ((M%size) > rank);
65: /* Create matrix */
66: MatCreate(PETSC_COMM_WORLD,&Mass);
67: MatSetSizes(Mass,PETSC_DECIDE,PETSC_DECIDE,N,N);
68: MatSetFromOptions(Mass);
69: start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank);
70: end = start + M/size + ((M%size) > rank);
72: /* Allocate element stiffness matrices */
73: num1Dnodes = (p+1);
74: num2Dnodes = num1Dnodes*num1Dnodes;
75: PetscMalloc((num1Dnodes*num1Dnodes)*sizeof(PetscScalar),&Me1D);
76: PetscMalloc((num1Dnodes*num1Dnodes)*sizeof(PetscScalar),&Ke1D);
77: PetscMalloc((num2Dnodes*num2Dnodes)*sizeof(PetscScalar),&Me2D);
78: PetscMalloc((num2Dnodes*num2Dnodes)*sizeof(PetscScalar),&Ke2D);
79: PetscMalloc(num2Dnodes*sizeof(PetscInt),&idx);
80: PetscMalloc(num2Dnodes*sizeof(PetscScalar),&r);
81: PetscMalloc(num2Dnodes*sizeof(PetscScalar),&ue);
83: /* Allocate quadrature and create stiffness matrices */
84: PetscMalloc((p+1)*sizeof(PetscReal),&gllNode);
85: PetscMalloc((p+1)*sizeof(PetscReal),&gllWgts);
86: leggaulob(0.0,1.0,gllNode,gllWgts,p); // Get GLL nodes and weights
87: Form1DElementMass(h,p,gllNode,gllWgts,Me1D);
88: Form1DElementStiffness(h,p,gllNode,gllWgts,Ke1D);
89: Form2DElementMass(p,Me1D,Me2D);
90: Form2DElementStiffness(p,Ke1D,Me1D,Ke2D);
92: /* Assemble matrix */
93: for (i=start; i<end; i++) {
94: indx = 0;
95: for(k=0;k<(p+1);++k) {
96: for(j=0;j<(p+1);++j) {
97: idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
98: }
99: }
100: MatSetValues(A,num2Dnodes,idx,num2Dnodes,idx,Ke2D,ADD_VALUES);
101: MatSetValues(Mass,num2Dnodes,idx,num2Dnodes,idx,Me2D,ADD_VALUES);
102: }
103: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
104: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
105: MatAssemblyBegin(Mass,MAT_FINAL_ASSEMBLY);
106: MatAssemblyEnd(Mass,MAT_FINAL_ASSEMBLY);
108: PetscFree(Me1D);
109: PetscFree(Ke1D);
110: PetscFree(Me2D);
111: PetscFree(Ke2D);
113: /* Create right-hand-side and solution vectors */
114: VecCreate(PETSC_COMM_WORLD,&u);
115: VecSetSizes(u,PETSC_DECIDE,N);
116: VecSetFromOptions(u);
117: PetscObjectSetName((PetscObject)u,"Approx. Solution");
118: VecDuplicate(u,&b);
119: PetscObjectSetName((PetscObject)b,"Right hand side");
120: VecDuplicate(u,&q);
121: PetscObjectSetName((PetscObject)q,"Right hand side 2");
122: VecDuplicate(b,&ustar);
123: VecSet(u,0.0);
124: VecSet(b,0.0);
125: VecSet(q,0.0);
127: /* Assemble nodal right-hand-side and soln vector */
128: for (i=start; i<end; i++) {
129: x = h*(i % m);
130: y = h*(i/m);
131: indx = 0;
132: for(k=0;k<(p+1);++k) {
133: for(j=0;j<(p+1);++j) {
134: idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
135: }
136: }
137: FormNodalRhs(p,x,y,h,gllNode,r);
138: FormNodalSoln(p,x,y,h,gllNode,ue);
139: VecSetValues(q,num2Dnodes,idx,r,INSERT_VALUES);
140: VecSetValues(ustar,num2Dnodes,idx,ue,INSERT_VALUES);
141: }
142: VecAssemblyBegin(q);
143: VecAssemblyEnd(q);
144: VecAssemblyBegin(ustar);
145: VecAssemblyEnd(ustar);
147: PetscFree(idx);
148: PetscFree(r);
149: PetscFree(ue);
151: /* Get FE right-hand side vector */
152: MatMult(Mass,q,b);
154: /* Modify matrix and right-hand-side for Dirichlet boundary conditions */
155: PetscMalloc(4*p*m*sizeof(PetscInt),&rows);
156: PetscMalloc(4*p*m*sizeof(PetscReal),&rowsx);
157: PetscMalloc(4*p*m*sizeof(PetscReal),&rowsy);
158: for (i=0; i<p*m+1; i++) {
159: rows[i] = i; /* bottom */
160: rowsx[i] = (i/p)*h+gllNode[i%p]*h;
161: rowsy[i] = 0.0;
162: rows[3*p*m-1+i] = (p*m)*(p*m+1) + i; /* top */
163: rowsx[3*p*m-1+i] = (i/p)*h+gllNode[i%p]*h;
164: rowsy[3*p*m-1+i] = 1.0;
165: }
166: count = p*m+1;/* left side */
167: indx = 1;
168: for (i=p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
169: rows[count] = i;
170: rowsx[count] = 0.0;
171: rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
172: indx++;
173: }
174: count = 2*p*m; /* right side */
175: indx = 1;
176: for (i=2*p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
177: rows[count] = i;
178: rowsx[count] = 1.0;
179: rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
180: indx++;
181: }
182: for (i=0; i<4*p*m; i++) {
183: x = rowsx[i];
184: y = rowsy[i];
185: val = ubdy(x,y);
186: VecSetValues(b,1,&rows[i],&val,INSERT_VALUES);
187: VecSetValues(u,1,&rows[i],&val,INSERT_VALUES);
188: }
189: MatZeroRows(A,4*p*m,rows,1.0,0,0);
190: PetscFree(rows);
191: PetscFree(rowsx);
192: PetscFree(rowsy);
194: VecAssemblyBegin(u);
195: VecAssemblyEnd(u);
196: VecAssemblyBegin(b);
197: VecAssemblyEnd(b);
199: /* Solve linear system */
200: KSPCreate(PETSC_COMM_WORLD,&ksp);
201: KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN);
202: KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);
203: KSPSetFromOptions(ksp);
204: KSPSolve(ksp,b,u);
206: /* Check error */
207: VecAXPY(u,-1.0,ustar);
208: VecNorm(u,NORM_2,&norm);
209: KSPGetIterationNumber(ksp,&its);
210: PetscPrintf(PETSC_COMM_WORLD,"Norm of error %G Iterations %D\n",norm*h,its);
212: PetscFree(gllNode);
213: PetscFree(gllWgts);
215: KSPDestroy(&ksp);
216: VecDestroy(&u);
217: VecDestroy(&b);
218: VecDestroy(&q);
219: VecDestroy(&ustar);
220: MatDestroy(&A);
221: MatDestroy(&Mass);
223: PetscFinalize();
224: return 0;
225: }
227: /* --------------------------------------------------------------------- */
231: /* 1d element stiffness mass matrix */
232: static PetscErrorCode Form1DElementMass(PetscReal H,PetscInt P,double* gqn,
233: double* gqw,PetscScalar *Me1D)
234: {
235: int i,j,k;
236: int indx;
239: for(j=0;j<(P+1);++j) {
240: for(i=0;i<(P+1);++i) {
241: indx = j*(P+1)+i;
242: Me1D[indx] = 0.0;
243: for(k=0; k<(P+1);++k) {
244: Me1D[indx] += H*gqw[k]*polyBasisFunc(P,i,gqn,gqn[k])*polyBasisFunc(P,j,gqn,gqn[k]);
245: }
246: }
247: }
248: return(0);
249: }
251: /* --------------------------------------------------------------------- */
255: /* 1d element stiffness matrix for derivative */
256: static PetscErrorCode Form1DElementStiffness(PetscReal H,PetscInt P,double* gqn,
257: double* gqw,PetscScalar *Ke1D)
258: {
259: int i,j,k;
260: int indx;
263: for(j=0;j<(P+1);++j) {
264: for(i=0;i<(P+1);++i) {
265: indx = j*(P+1)+i;
266: Ke1D[indx] = 0.0;
267: for(k=0; k<(P+1);++k) {
268: Ke1D[indx] += (1./H)*gqw[k]*derivPolyBasisFunc(P,i,gqn,gqn[k])*derivPolyBasisFunc(P,j,gqn,gqn[k]);
269: }
270: }
271: }
273: return(0);
274: }
276: /* --------------------------------------------------------------------- */
280: /* element mass matrix */
281: static PetscErrorCode Form2DElementMass(PetscInt P,PetscScalar *Me1D,PetscScalar *Me2D)
282: {
283: int i1,j1,i2,j2;
284: int indx1,indx2,indx3;
287: for(j2=0;j2<(P+1);++j2) {
288: for(i2=0; i2<(P+1);++i2) {
289: for(j1=0;j1<(P+1);++j1) {
290: for(i1=0;i1<(P+1);++i1) {
291: indx1 = j1*(P+1)+i1;
292: indx2 = j2*(P+1)+i2;
293: indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
294: Me2D[indx3] = Me1D[indx1]*Me1D[indx2];
295: }
296: }
297: }
298: }
299: return(0);
300: }
302: /* --------------------------------------------------------------------- */
306: /* element stiffness for Laplacian */
307: static PetscErrorCode Form2DElementStiffness(PetscInt P,PetscScalar *Ke1D,PetscScalar *Me1D,
308: PetscScalar *Ke2D)
309: {
310: int i1,j1,i2,j2;
311: int indx1,indx2,indx3;
314: for(j2=0;j2<(P+1);++j2) {
315: for(i2=0; i2<(P+1);++i2) {
316: for(j1=0;j1<(P+1);++j1) {
317: for(i1=0;i1<(P+1);++i1) {
318: indx1 = j1*(P+1)+i1;
319: indx2 = j2*(P+1)+i2;
320: indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
321: Ke2D[indx3] = Ke1D[indx1]*Me1D[indx2] + Me1D[indx1]*Ke1D[indx2];
322: }
323: }
324: }
325: }
326: return(0);
327: }
329: /* --------------------------------------------------------------------- */
333: static PetscErrorCode FormNodalRhs(PetscInt P,PetscReal x,PetscReal y,PetscReal H,double* nds,PetscScalar *r)
334: {
335: int i,j,indx;
338: indx=0;
339: for(j=0;j<(P+1);++j) {
340: for(i=0;i<(P+1);++i) {
341: r[indx] = src(x+H*nds[i],y+H*nds[j]);
342: indx++;
343: }
344: }
345: return(0);
346: }
348: /* --------------------------------------------------------------------- */
352: static PetscErrorCode FormNodalSoln(PetscInt P,PetscReal x,PetscReal y,PetscReal H,double* nds,PetscScalar *u)
353: {
354: int i,j,indx;
357: indx=0;
358: for(j=0;j<(P+1);++j) {
359: for(i=0;i<(P+1);++i) {
360: u[indx] = ubdy(x+H*nds[i],y+H*nds[j]);
361: indx++;
362: }
363: }
364: return(0);
365: }
367: /* --------------------------------------------------------------------- */
371: static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal* xLocVal, PetscReal xval) {
372: PetscReal denominator = 1.;
373: PetscReal numerator = 1.;
374: PetscInt i=0;
376: for (i=0; i<(order+1); i++) {
377: if (i!=basis) {
378: numerator *= (xval-xLocVal[i]);
379: denominator *= (xLocVal[basis]-xLocVal[i]);
380: }
381: }
382: return (numerator/denominator);
383: }
385: /* --------------------------------------------------------------------- */
389: static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal* xLocVal, PetscReal xval) {
390: PetscReal denominator;
391: PetscReal numerator;
392: PetscReal numtmp;
393: PetscInt i=0,j=0;
395: denominator=1.;
396: for (i=0; i<(order+1); i++) {
397: if (i!=basis) {
398: denominator *= (xLocVal[basis]-xLocVal[i]);
399: }
400: }
401: numerator = 0.;
402: for(j=0;j<(order+1);++j) {
403: if(j != basis) {
404: numtmp = 1.;
405: for(i=0;i<(order+1);++i) {
406: if(i!=basis && j!=i) {
407: numtmp *= (xval-xLocVal[i]);
408: }
409: }
410: numerator += numtmp;
411: }
412: }
414: return (numerator/denominator);
415: }
417: /* --------------------------------------------------------------------- */
419: static PetscReal ubdy(PetscReal x,PetscReal y)
420: {
421: return x*x*y*y;
422: }
424: static PetscReal src(PetscReal x,PetscReal y)
425: {
426: return -2.*y*y - 2.*x*x;
427: }
428: /* --------------------------------------------------------------------- */
430: static void leggaulob(double x1, double x2, double x[], double w[], int n)
431: /*******************************************************************************
432: Given the lower and upper limits of integration x1 and x2, and given n, this
433: routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas
434: and weights of the Gauss-Lobatto-Legendre n-point quadrature formula.
435: *******************************************************************************/
436: {
437: int j,m;
438: double z1,z,xm,xl,q,qp,Ln,scale;
439: double pi=3.1415926535897932385;
440: if(n==1) {
441: x[0]=x1; /* Scale the root to the desired interval, */
442: x[1]=x2; /* and put in its symmetric counterpart. */
443: w[0]=1.; /* Compute the weight */
444: w[1]=1.; /* and its symmetric counterpart. */
445: } else {
446: x[0]=x1; /* Scale the root to the desired interval, */
447: x[n]=x2; /* and put in its symmetric counterpart. */
448: w[0]=2./(n*(n+1));; /* Compute the weight */
449: w[n]=2./(n*(n+1)); /* and its symmetric counterpart. */
450: m=(n+1)/2; /* The roots are symmetric, so we only find half of them. */
451: xm=0.5*(x2+x1);
452: xl=0.5*(x2-x1);
453: for (j=1;j<=(m-1);j++) { /* Loop over the desired roots. */
454: z=-1.0*cos((pi*(j+0.25)/(n))-(3.0/(8.0*n*pi))*(1.0/(j+0.25)));
455: /* Starting with the above approximation to the ith root, we enter */
456: /* the main loop of refinement by Newton's method. */
457: do {
458: qAndLEvaluation(n,z,&q,&qp,&Ln);
459: z1=z;
460: z=z1-q/qp; /* Newton's method. */
461: } while (fabs(z-z1) > 3.0e-11);
462: qAndLEvaluation(n,z,&q,&qp,&Ln);
463: x[j]=xm+xl*z; /* Scale the root to the desired interval, */
464: x[n-j]=xm-xl*z; /* and put in its symmetric counterpart. */
465: w[j]=2.0/(n*(n+1)*Ln*Ln); /* Compute the weight */
466: w[n-j]=w[j]; /* and its symmetric counterpart. */
467: }
468: }
469: if(n%2==0) {
470: qAndLEvaluation(n,0.0,&q,&qp,&Ln);
471: x[n/2]=(x2-x1)/2.0;
472: w[n/2]=2.0/(n*(n+1)*Ln*Ln);
473: }
474: /* scale the weights according to mapping from [-1,1] to [0,1] */
475: scale = (x2-x1)/2.0;
476: for(j=0;j<=n;++j) w[j] = w[j]*scale;
477: }
480: /******************************************************************************/
481: static void qAndLEvaluation(int n, double x, double* q, double* qp, double* Ln)
482: /*******************************************************************************
483: Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in
484: addition to L_N(x) as these are needed for the GLL points. See the book titled
485: "Implementing Spectral Methods for Partial Differential Equations: Algorithms
486: for Scientists and Engineers" by David A. Kopriva.
487: *******************************************************************************/
488: {
489: int k;
491: double Lnp;
492: double Lnp1, Lnp1p;
493: double Lnm1, Lnm1p;
494: double Lnm2, Lnm2p;
496: Lnm1 = 1.0;
497: *Ln = x;
498: Lnm1p = 0.0;
499: Lnp = 1.0;
501: for(k=2; k<=n; ++k) {
502: Lnm2 = Lnm1;
503: Lnm1 = *Ln;
504: Lnm2p = Lnm1p;
505: Lnm1p = Lnp;
506: *Ln = (2.*k-1.)/(1.0*k)*x*Lnm1 - (k-1.)/(1.0*k)*Lnm2;
507: Lnp = Lnm2p + (2.0*k-1)*Lnm1;
508: }
509: k=n+1;
510: Lnp1 = (2.*k-1.)/(1.0*k)*x*(*Ln) - (k-1.)/(1.0*k)*Lnm1;
511: Lnp1p = Lnm1p + (2.0*k-1)*(*Ln);
512: *q = Lnp1 - Lnm1;
513: *qp = Lnp1p - Lnm1p;
514: }