Actual source code: ex59.c

petsc-3.12.5 2020-03-29
Report Typos and Errors
  1: static char help[] =  "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
  2: Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
  3: Spectral degree can be specified by passing values to -p option.\n\
  4: Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
  5: Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
  6: Exaple usage: \n\
  7: 1D: mpiexec -n 4 ex59 -nex 7\n\
  8: 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
  9: 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
 10: Subdomain decomposition can be specified with -np_ parameters.\n\
 11: Dirichlet boundaries on one side by default:\n\
 12: it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
 13: Pure Neumann case can be requested by passing in -pureneumann.\n\
 14: In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";

 16:  #include <petscksp.h>
 17:  #include <petscpc.h>
 18:  #include <petscdm.h>
 19:  #include <petscdmda.h>
 20:  #include <petscblaslapack.h>
 21: #define DEBUG 0

 23: /* structure holding domain data */
 24: typedef struct {
 25:   /* communicator */
 26:   MPI_Comm gcomm;
 27:   /* space dimension */
 28:   PetscInt dim;
 29:   /* spectral degree */
 30:   PetscInt p;
 31:   /* subdomains per dimension */
 32:   PetscInt npx,npy,npz;
 33:   /* subdomain index in cartesian dimensions */
 34:   PetscInt ipx,ipy,ipz;
 35:   /* elements per dimension */
 36:   PetscInt nex,ney,nez;
 37:   /* local elements per dimension */
 38:   PetscInt nex_l,ney_l,nez_l;
 39:   /* global number of dofs per dimension */
 40:   PetscInt xm,ym,zm;
 41:   /* local number of dofs per dimension */
 42:   PetscInt xm_l,ym_l,zm_l;
 43:   /* starting global indexes for subdomain in lexicographic ordering */
 44:   PetscInt startx,starty,startz;
 45:   /* pure Neumann problem */
 46:   PetscBool pure_neumann;
 47:   /* Dirichlet BC implementation */
 48:   PetscBool DBC_zerorows;
 49:   /* Scaling factor for subdomain */
 50:   PetscScalar scalingfactor;
 51:   PetscBool   testkspfetidp;
 52: } DomainData;

 54: /* structure holding GLL data */
 55: typedef struct {
 56:   /* GLL nodes */
 57:   PetscReal   *zGL;
 58:   /* GLL weights */
 59:   PetscScalar *rhoGL;
 60:   /* aux_mat */
 61:   PetscScalar **A;
 62:   /* Element matrix */
 63:   Mat elem_mat;
 64: } GLLData;


 67: static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
 68: {
 70:   PetscInt       *xadj_temp,*adjncy_temp;
 71:   PetscInt       i,j,k,ii,jj,kk,iindex,count_adj;
 72:   PetscInt       istart_csr,iend_csr,jstart_csr,jend_csr,kstart_csr,kend_csr;
 73:   PetscBool      internal_node;

 75:   /* first count dimension of adjncy */
 76:   count_adj=0;
 77:   for (k=0; k<dd.zm_l; k++) {
 78:     internal_node = PETSC_TRUE;
 79:     kstart_csr    = k>0 ? k-1 : k;
 80:     kend_csr      = k<dd.zm_l-1 ? k+2 : k+1;
 81:     if (k == 0 || k == dd.zm_l-1) {
 82:       internal_node = PETSC_FALSE;
 83:       kstart_csr    = k;
 84:       kend_csr      = k+1;
 85:     }
 86:     for (j=0; j<dd.ym_l; j++) {
 87:       jstart_csr = j>0 ? j-1 : j;
 88:       jend_csr   = j<dd.ym_l-1 ? j+2 : j+1;
 89:       if (j == 0 || j == dd.ym_l-1) {
 90:         internal_node = PETSC_FALSE;
 91:         jstart_csr    = j;
 92:         jend_csr      = j+1;
 93:       }
 94:       for (i=0; i<dd.xm_l; i++) {
 95:         istart_csr = i>0 ? i-1 : i;
 96:         iend_csr   = i<dd.xm_l-1 ? i+2 : i+1;
 97:         if (i == 0 || i == dd.xm_l-1) {
 98:           internal_node = PETSC_FALSE;
 99:           istart_csr    = i;
100:           iend_csr      = i+1;
101:         }
102:         if (internal_node) {
103:           istart_csr = i;
104:           iend_csr   = i+1;
105:           jstart_csr = j;
106:           jend_csr   = j+1;
107:           kstart_csr = k;
108:           kend_csr   = k+1;
109:         }
110:         for (kk=kstart_csr;kk<kend_csr;kk++) {
111:           for (jj=jstart_csr;jj<jend_csr;jj++) {
112:             for (ii=istart_csr;ii<iend_csr;ii++) {
113:               count_adj=count_adj+1;
114:             }
115:           }
116:         }
117:       }
118:     }
119:   }
120:   PetscMalloc1(dd.xm_l*dd.ym_l*dd.zm_l+1,&xadj_temp);
121:   PetscMalloc1(count_adj,&adjncy_temp);

123:   /* now fill CSR data structure */
124:   count_adj=0;
125:   for (k=0; k<dd.zm_l; k++) {
126:     internal_node = PETSC_TRUE;
127:     kstart_csr    = k>0 ? k-1 : k;
128:     kend_csr      = k<dd.zm_l-1 ? k+2 : k+1;
129:     if (k == 0 || k == dd.zm_l-1) {
130:       internal_node = PETSC_FALSE;
131:       kstart_csr    = k;
132:       kend_csr      = k+1;
133:     }
134:     for (j=0; j<dd.ym_l; j++) {
135:       jstart_csr = j>0 ? j-1 : j;
136:       jend_csr   = j<dd.ym_l-1 ? j+2 : j+1;
137:       if (j == 0 || j == dd.ym_l-1) {
138:         internal_node = PETSC_FALSE;
139:         jstart_csr    = j;
140:         jend_csr      = j+1;
141:       }
142:       for (i=0; i<dd.xm_l; i++) {
143:         istart_csr = i>0 ? i-1 : i;
144:         iend_csr   = i<dd.xm_l-1 ? i+2 : i+1;
145:         if (i == 0 || i == dd.xm_l-1) {
146:           internal_node = PETSC_FALSE;
147:           istart_csr    = i;
148:           iend_csr      = i+1;
149:         }
150:         iindex            = k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
151:         xadj_temp[iindex] = count_adj;
152:         if (internal_node) {
153:           istart_csr = i;
154:           iend_csr   = i+1;
155:           jstart_csr = j;
156:           jend_csr   = j+1;
157:           kstart_csr = k;
158:           kend_csr   = k+1;
159:         }
160:         for (kk=kstart_csr; kk<kend_csr; kk++) {
161:           for (jj=jstart_csr; jj<jend_csr; jj++) {
162:             for (ii=istart_csr; ii<iend_csr; ii++) {
163:               iindex = kk*dd.xm_l*dd.ym_l+jj*dd.xm_l+ii;

165:               adjncy_temp[count_adj] = iindex;
166:               count_adj              = count_adj+1;
167:             }
168:           }
169:         }
170:       }
171:     }
172:   }
173:   xadj_temp[dd.xm_l*dd.ym_l*dd.zm_l] = count_adj;

175:   *xadj   = xadj_temp;
176:   *adjncy = adjncy_temp;
177:   return(0);
178: }

180: static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd,IS *dirichlet,IS *neumann)
181: {
183:   IS             temp_dirichlet=0,temp_neumann=0;
184:   PetscInt       localsize,i,j,k,*indices;
185:   PetscBool      *touched;

188:   localsize = dd.xm_l*dd.ym_l*dd.zm_l;

190:   PetscMalloc1(localsize,&indices);
191:   PetscMalloc1(localsize,&touched);
192:   for (i=0; i<localsize; i++) touched[i] = PETSC_FALSE;

194:   if (dirichlet) {
195:     i = 0;
196:     /* west boundary */
197:     if (dd.ipx == 0) {
198:       for (k=0;k<dd.zm_l;k++) {
199:         for (j=0;j<dd.ym_l;j++) {
200:           indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
201:           touched[indices[i]]=PETSC_TRUE;
202:           i++;
203:         }
204:       }
205:     }
206:     ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_dirichlet);
207:   }
208:   if (neumann) {
209:     i = 0;
210:     /* west boundary */
211:     if (dd.ipx == 0) {
212:       for (k=0;k<dd.zm_l;k++) {
213:         for (j=0;j<dd.ym_l;j++) {
214:           indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
215:           if (!touched[indices[i]]) {
216:             touched[indices[i]]=PETSC_TRUE;
217:             i++;
218:           }
219:         }
220:       }
221:     }
222:     /* east boundary */
223:     if (dd.ipx == dd.npx-1) {
224:       for (k=0;k<dd.zm_l;k++) {
225:         for (j=0;j<dd.ym_l;j++) {
226:           indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l+dd.xm_l-1;
227:           if (!touched[indices[i]]) {
228:             touched[indices[i]]=PETSC_TRUE;
229:             i++;
230:           }
231:         }
232:       }
233:     }
234:     /* south boundary */
235:     if (dd.ipy == 0 && dd.dim > 1) {
236:       for (k=0;k<dd.zm_l;k++) {
237:         for (j=0;j<dd.xm_l;j++) {
238:           indices[i]=k*dd.ym_l*dd.xm_l+j;
239:           if (!touched[indices[i]]) {
240:             touched[indices[i]]=PETSC_TRUE;
241:             i++;
242:           }
243:         }
244:       }
245:     }
246:     /* north boundary */
247:     if (dd.ipy == dd.npy-1 && dd.dim > 1) {
248:       for (k=0;k<dd.zm_l;k++) {
249:         for (j=0;j<dd.xm_l;j++) {
250:           indices[i]=k*dd.ym_l*dd.xm_l+(dd.ym_l-1)*dd.xm_l+j;
251:           if (!touched[indices[i]]) {
252:             touched[indices[i]]=PETSC_TRUE;
253:             i++;
254:           }
255:         }
256:       }
257:     }
258:     /* bottom boundary */
259:     if (dd.ipz == 0 && dd.dim > 2) {
260:       for (k=0;k<dd.ym_l;k++) {
261:         for (j=0;j<dd.xm_l;j++) {
262:           indices[i]=k*dd.xm_l+j;
263:           if (!touched[indices[i]]) {
264:             touched[indices[i]]=PETSC_TRUE;
265:             i++;
266:           }
267:         }
268:       }
269:     }
270:     /* top boundary */
271:     if (dd.ipz == dd.npz-1 && dd.dim > 2) {
272:       for (k=0;k<dd.ym_l;k++) {
273:         for (j=0;j<dd.xm_l;j++) {
274:           indices[i]=(dd.zm_l-1)*dd.ym_l*dd.xm_l+k*dd.xm_l+j;
275:           if (!touched[indices[i]]) {
276:             touched[indices[i]]=PETSC_TRUE;
277:             i++;
278:           }
279:         }
280:       }
281:     }
282:     ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_neumann);
283:   }
284:   if (dirichlet) *dirichlet = temp_dirichlet;
285:   if (neumann) *neumann = temp_neumann;
286:   PetscFree(indices);
287:   PetscFree(touched);
288:   return(0);
289: }

291: static PetscErrorCode ComputeMapping(DomainData dd,ISLocalToGlobalMapping *isg2lmap)
292: {
293:   PetscErrorCode         ierr;
294:   DM                     da;
295:   AO                     ao;
296:   DMBoundaryType         bx = DM_BOUNDARY_NONE,by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
297:   DMDAStencilType        stype = DMDA_STENCIL_BOX;
298:   ISLocalToGlobalMapping temp_isg2lmap;
299:   PetscInt               i,j,k,ig,jg,kg,lindex,gindex,localsize;
300:   PetscInt               *global_indices;

303:   /* Not an efficient mapping: this function computes a very simple lexicographic mapping
304:      just to illustrate the creation of a MATIS object */
305:   localsize = dd.xm_l*dd.ym_l*dd.zm_l;
306:   PetscMalloc1(localsize,&global_indices);
307:   for (k=0; k<dd.zm_l; k++) {
308:     kg=dd.startz+k;
309:     for (j=0; j<dd.ym_l; j++) {
310:       jg=dd.starty+j;
311:       for (i=0; i<dd.xm_l; i++) {
312:         ig                    =dd.startx+i;
313:         lindex                =k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
314:         gindex                =kg*dd.xm*dd.ym+jg*dd.xm+ig;
315:         global_indices[lindex]=gindex;
316:       }
317:     }
318:   }
319:   if (dd.dim==3) {
320:     DMDACreate3d(dd.gcomm,bx,by,bz,stype,dd.xm,dd.ym,dd.zm,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,NULL,&da);
321:   } else if (dd.dim==2) {
322:     DMDACreate2d(dd.gcomm,bx,by,stype,dd.xm,dd.ym,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,&da);
323:   } else {
324:     DMDACreate1d(dd.gcomm,bx,dd.xm,1,1,NULL,&da);
325:   }
326:   DMSetFromOptions(da);
327:   DMSetUp(da);
328:   DMDASetAOType(da,AOMEMORYSCALABLE);
329:   DMDAGetAO(da,&ao);
330:   AOApplicationToPetsc(ao,dd.xm_l*dd.ym_l*dd.zm_l,global_indices);
331:   ISLocalToGlobalMappingCreate(dd.gcomm,1,localsize,global_indices,PETSC_OWN_POINTER,&temp_isg2lmap);
332:   DMDestroy(&da);
333:   *isg2lmap = temp_isg2lmap;
334:   return(0);
335: }
336: static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
337: {
339:   PetscInt       localsize,zloc,yloc,xloc,auxnex,auxney,auxnez;
340:   PetscInt       ie,je,ke,i,j,k,ig,jg,kg,ii,ming;
341:   PetscInt       *indexg,*cols,*colsg;
342:   PetscScalar    *vals;
343:   Mat            temp_local_mat,elem_mat_DBC=0,*usedmat;
344:   IS             submatIS;

347:   MatGetSize(glldata.elem_mat,&i,&j);
348:   PetscMalloc1(i,&indexg);
349:   PetscMalloc1(i,&colsg);
350:   /* get submatrix of elem_mat without dirichlet nodes */
351:   if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
352:     xloc = dd.p+1;
353:     yloc = 1;
354:     zloc = 1;
355:     if (dd.dim>1) yloc = dd.p+1;
356:     if (dd.dim>2) zloc = dd.p+1;
357:     ii = 0;
358:     for (k=0;k<zloc;k++) {
359:       for (j=0;j<yloc;j++) {
360:         for (i=1;i<xloc;i++) {
361:           indexg[ii]=k*xloc*yloc+j*xloc+i;
362:           ii++;
363:         }
364:       }
365:     }
366:     ISCreateGeneral(PETSC_COMM_SELF,ii,indexg,PETSC_COPY_VALUES,&submatIS);
367:     MatCreateSubMatrix(glldata.elem_mat,submatIS,submatIS,MAT_INITIAL_MATRIX,&elem_mat_DBC);
368:     ISDestroy(&submatIS);
369:   }

371:   /* Assemble subdomain matrix */
372:   localsize = dd.xm_l*dd.ym_l*dd.zm_l;
373:   MatCreate(PETSC_COMM_SELF,&temp_local_mat);
374:   MatSetSizes(temp_local_mat,localsize,localsize,localsize,localsize);
375:   MatSetOptionsPrefix(temp_local_mat,"subdomain_");
376:   /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
377:   /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
378:   if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
379:     MatSetType(temp_local_mat,MATSEQAIJ);
380:   } else {
381:     MatSetType(temp_local_mat,MATSEQSBAIJ);
382:   }
383:   MatSetFromOptions(temp_local_mat);

385:   i = PetscPowRealInt(3.0*(dd.p+1.0),dd.dim);

387:   MatSeqAIJSetPreallocation(temp_local_mat,i,NULL);      /* very overestimated */
388:   MatSeqSBAIJSetPreallocation(temp_local_mat,1,i,NULL);      /* very overestimated */
389:   MatSeqBAIJSetPreallocation(temp_local_mat,1,i,NULL);      /* very overestimated */
390:   MatSetOption(temp_local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);

392:   yloc = dd.p+1;
393:   zloc = dd.p+1;
394:   if (dd.dim < 3) zloc = 1;
395:   if (dd.dim < 2) yloc = 1;

397:   auxnez = dd.nez_l;
398:   auxney = dd.ney_l;
399:   auxnex = dd.nex_l;
400:   if (dd.dim < 3) auxnez = 1;
401:   if (dd.dim < 2) auxney = 1;

403:   for (ke=0; ke<auxnez; ke++) {
404:     for (je=0; je<auxney; je++) {
405:       for (ie=0; ie<auxnex; ie++) {
406:         /* customize element accounting for BC */
407:         xloc    = dd.p+1;
408:         ming    = 0;
409:         usedmat = &glldata.elem_mat;
410:         if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
411:           if (ie == 0) {
412:             xloc    = dd.p;
413:             usedmat = &elem_mat_DBC;
414:           } else {
415:             ming    = -1;
416:             usedmat = &glldata.elem_mat;
417:           }
418:         }
419:         /* local to the element/global to the subdomain indexing */
420:         for (k=0; k<zloc; k++) {
421:           kg = ke*dd.p+k;
422:           for (j=0; j<yloc; j++) {
423:             jg = je*dd.p+j;
424:             for (i=0; i<xloc; i++) {
425:               ig         = ie*dd.p+i+ming;
426:               ii         = k*xloc*yloc+j*xloc+i;
427:               indexg[ii] = kg*dd.xm_l*dd.ym_l+jg*dd.xm_l+ig;
428:             }
429:           }
430:         }
431:         /* Set values */
432:         for (i=0; i<xloc*yloc*zloc; i++) {
433:           MatGetRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);
434:           for (k=0; k<j; k++) colsg[k] = indexg[cols[k]];
435:           MatSetValues(temp_local_mat,1,&indexg[i],j,colsg,vals,ADD_VALUES);
436:           MatRestoreRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);
437:         }
438:       }
439:     }
440:   }
441:   PetscFree(indexg);
442:   PetscFree(colsg);
443:   MatAssemblyBegin(temp_local_mat,MAT_FINAL_ASSEMBLY);
444:   MatAssemblyEnd  (temp_local_mat,MAT_FINAL_ASSEMBLY);
445: #if DEBUG
446:   {
447:     Vec       lvec,rvec;
448:     PetscReal norm;
449:     MatCreateVecs(temp_local_mat,&lvec,&rvec);
450:     VecSet(lvec,1.0);
451:     MatMult(temp_local_mat,lvec,rvec);
452:     VecNorm(rvec,NORM_INFINITY,&norm);
453:     VecDestroy(&lvec);
454:     VecDestroy(&rvec);
455:   }
456: #endif
457:   *local_mat = temp_local_mat;
458:   MatDestroy(&elem_mat_DBC);
459:   return(0);
460: }

462: static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
463: {
465:   PetscReal      *M,si;
466:   PetscScalar    x,z0,z1,z2,Lpj,Lpr,rhoGLj,rhoGLk;
467:   PetscBLASInt   pm1,lierr;
468:   PetscInt       i,j,n,k,s,r,q,ii,jj,p=dd.p;
469:   PetscInt       xloc,yloc,zloc,xyloc,xyzloc;

472:   /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
473:   PetscCalloc1(p+1,&glldata->zGL);

475:   glldata->zGL[0]=-1.0;
476:   glldata->zGL[p]= 1.0;
477:   if (p > 1) {
478:     if (p == 2) glldata->zGL[1]=0.0;
479:     else {
480:       PetscMalloc1(p-1,&M);
481:       for (i=0; i<p-1; i++) {
482:         si  = (PetscReal)(i+1.0);
483:         M[i]=0.5*PetscSqrtReal(si*(si+2.0)/((si+0.5)*(si+1.5)));
484:       }
485:       pm1  = p-1;
486:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
487: #if defined(PETSC_MISSING_LAPACK_STEQR)
488:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable.");
489: #else
490:       PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("N",&pm1,&glldata->zGL[1],M,&x,&pm1,M,&lierr));
491:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in STERF Lapack routine %d",(int)lierr);
492: #endif
493:       PetscFPTrapPop();
494:       PetscFree(M);
495:     }
496:   }

498:   /* Weights for 1D quadrature */
499:   PetscMalloc1(p+1,&glldata->rhoGL);

501:   glldata->rhoGL[0]=2.0/(PetscScalar)(p*(p+1.0));
502:   glldata->rhoGL[p]=glldata->rhoGL[0];
503:   z2 = -1;                      /* Dummy value to avoid -Wmaybe-initialized */
504:   for (i=1; i<p; i++) {
505:     x  = glldata->zGL[i];
506:     z0 = 1.0;
507:     z1 = x;
508:     for (n=1; n<p; n++) {
509:       z2 = x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
510:       z0 = z1;
511:       z1 = z2;
512:     }
513:     glldata->rhoGL[i]=2.0/(p*(p+1.0)*z2*z2);
514:   }

516:   /* Auxiliary mat for laplacian */
517:   PetscMalloc1(p+1,&glldata->A);
518:   PetscMalloc1((p+1)*(p+1),&glldata->A[0]);
519:   for (i=1; i<p+1; i++) glldata->A[i]=glldata->A[i-1]+p+1;

521:   for (j=1; j<p; j++) {
522:     x =glldata->zGL[j];
523:     z0=1.0;
524:     z1=x;
525:     for (n=1; n<p; n++) {
526:       z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
527:       z0=z1;
528:       z1=z2;
529:     }
530:     Lpj=z2;
531:     for (r=1; r<p; r++) {
532:       if (r == j) {
533:         glldata->A[j][j]=2.0/(3.0*(1.0-glldata->zGL[j]*glldata->zGL[j])*Lpj*Lpj);
534:       } else {
535:         x  = glldata->zGL[r];
536:         z0 = 1.0;
537:         z1 = x;
538:         for (n=1; n<p; n++) {
539:           z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
540:           z0=z1;
541:           z1=z2;
542:         }
543:         Lpr             = z2;
544:         glldata->A[r][j]=4.0/(p*(p+1.0)*Lpj*Lpr*(glldata->zGL[j]-glldata->zGL[r])*(glldata->zGL[j]-glldata->zGL[r]));
545:       }
546:     }
547:   }
548:   for (j=1; j<p+1; j++) {
549:     x  = glldata->zGL[j];
550:     z0 = 1.0;
551:     z1 = x;
552:     for (n=1; n<p; n++) {
553:       z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
554:       z0=z1;
555:       z1=z2;
556:     }
557:     Lpj             = z2;
558:     glldata->A[j][0]=4.0*PetscPowRealInt(-1.0,p)/(p*(p+1.0)*Lpj*(1.0+glldata->zGL[j])*(1.0+glldata->zGL[j]));
559:     glldata->A[0][j]=glldata->A[j][0];
560:   }
561:   for (j=0; j<p; j++) {
562:     x  = glldata->zGL[j];
563:     z0 = 1.0;
564:     z1 = x;
565:     for (n=1; n<p; n++) {
566:       z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
567:       z0=z1;
568:       z1=z2;
569:     }
570:     Lpj=z2;

572:     glldata->A[p][j]=4.0/(p*(p+1.0)*Lpj*(1.0-glldata->zGL[j])*(1.0-glldata->zGL[j]));
573:     glldata->A[j][p]=glldata->A[p][j];
574:   }
575:   glldata->A[0][0]=0.5+(p*(p+1.0)-2.0)/6.0;
576:   glldata->A[p][p]=glldata->A[0][0];

578:   /* compute element matrix */
579:   xloc = p+1;
580:   yloc = p+1;
581:   zloc = p+1;
582:   if (dd.dim<2) yloc=1;
583:   if (dd.dim<3) zloc=1;
584:   xyloc  = xloc*yloc;
585:   xyzloc = xloc*yloc*zloc;

587:   MatCreate(PETSC_COMM_SELF,&glldata->elem_mat);
588:   MatSetSizes(glldata->elem_mat,xyzloc,xyzloc,xyzloc,xyzloc);
589:   MatSetType(glldata->elem_mat,MATSEQAIJ);
590:   MatSeqAIJSetPreallocation(glldata->elem_mat,xyzloc,NULL); /* overestimated */
591:   MatZeroEntries(glldata->elem_mat);
592:   MatSetOption(glldata->elem_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);

594:   for (k=0; k<zloc; k++) {
595:     if (dd.dim>2) rhoGLk=glldata->rhoGL[k];
596:     else rhoGLk=1.0;

598:     for (j=0; j<yloc; j++) {
599:       if (dd.dim>1) rhoGLj=glldata->rhoGL[j];
600:       else rhoGLj=1.0;

602:       for (i=0; i<xloc; i++) {
603:         ii = k*xyloc+j*xloc+i;
604:         s  = k;
605:         r  = j;
606:         for (q=0; q<xloc; q++) {
607:           jj   = s*xyloc+r*xloc+q;
608:           MatSetValue(glldata->elem_mat,jj,ii,glldata->A[i][q]*rhoGLj*rhoGLk,ADD_VALUES);
609:         }
610:         if (dd.dim>1) {
611:           s=k;
612:           q=i;
613:           for (r=0; r<yloc; r++) {
614:             jj   = s*xyloc+r*xloc+q;
615:             MatSetValue(glldata->elem_mat,jj,ii,glldata->A[j][r]*glldata->rhoGL[i]*rhoGLk,ADD_VALUES);
616:           }
617:         }
618:         if (dd.dim>2) {
619:           r=j;
620:           q=i;
621:           for (s=0; s<zloc; s++) {
622:             jj   = s*xyloc+r*xloc+q;
623:             MatSetValue(glldata->elem_mat,jj,ii,glldata->A[k][s]*rhoGLj*glldata->rhoGL[i],ADD_VALUES);
624:           }
625:         }
626:       }
627:     }
628:   }
629:   MatAssemblyBegin(glldata->elem_mat,MAT_FINAL_ASSEMBLY);
630:   MatAssemblyEnd  (glldata->elem_mat,MAT_FINAL_ASSEMBLY);
631: #if DEBUG
632:   {
633:     Vec       lvec,rvec;
634:     PetscReal norm;
635:     MatCreateVecs(glldata->elem_mat,&lvec,&rvec);
636:     VecSet(lvec,1.0);
637:     MatMult(glldata->elem_mat,lvec,rvec);
638:     VecNorm(rvec,NORM_INFINITY,&norm);
639:     VecDestroy(&lvec);
640:     VecDestroy(&rvec);
641:   }
642: #endif
643:   return(0);
644: }

646: static PetscErrorCode DomainDecomposition(DomainData *dd)
647: {
648:   PetscMPIInt rank;
649:   PetscInt    i,j,k;

652:   /* Subdomain index in cartesian coordinates */
653:   MPI_Comm_rank(dd->gcomm,&rank);
654:   dd->ipx = rank%dd->npx;
655:   if (dd->dim>1) dd->ipz = rank/(dd->npx*dd->npy);
656:   else dd->ipz = 0;

658:   dd->ipy = rank/dd->npx-dd->ipz*dd->npy;
659:   /* number of local elements */
660:   dd->nex_l = dd->nex/dd->npx;
661:   if (dd->ipx < dd->nex%dd->npx) dd->nex_l++;
662:   if (dd->dim>1) {
663:     dd->ney_l = dd->ney/dd->npy;
664:     if (dd->ipy < dd->ney%dd->npy) dd->ney_l++;
665:   } else {
666:     dd->ney_l = 0;
667:   }
668:   if (dd->dim>2) {
669:     dd->nez_l = dd->nez/dd->npz;
670:     if (dd->ipz < dd->nez%dd->npz) dd->nez_l++;
671:   } else {
672:     dd->nez_l = 0;
673:   }
674:   /* local and global number of dofs */
675:   dd->xm_l = dd->nex_l*dd->p+1;
676:   dd->xm   = dd->nex*dd->p+1;
677:   dd->ym_l = dd->ney_l*dd->p+1;
678:   dd->ym   = dd->ney*dd->p+1;
679:   dd->zm_l = dd->nez_l*dd->p+1;
680:   dd->zm   = dd->nez*dd->p+1;
681:   if (!dd->pure_neumann) {
682:     if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
683:     if (!dd->DBC_zerorows) dd->xm--;
684:   }

686:   /* starting global index for local dofs (simple lexicographic order) */
687:   dd->startx = 0;
688:   j          = dd->nex/dd->npx;
689:   for (i=0; i<dd->ipx; i++) {
690:     k = j;
691:     if (i<dd->nex%dd->npx) k++;
692:     dd->startx = dd->startx+k*dd->p;
693:   }
694:   if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;

696:   dd->starty = 0;
697:   if (dd->dim > 1) {
698:     j = dd->ney/dd->npy;
699:     for (i=0; i<dd->ipy; i++) {
700:       k = j;
701:       if (i<dd->ney%dd->npy) k++;
702:       dd->starty = dd->starty+k*dd->p;
703:     }
704:   }
705:   dd->startz = 0;
706:   if (dd->dim > 2) {
707:     j = dd->nez/dd->npz;
708:     for (i=0; i<dd->ipz; i++) {
709:       k = j;
710:       if (i<dd->nez%dd->npz) k++;
711:       dd->startz = dd->startz+k*dd->p;
712:     }
713:   }
714:   return(0);
715: }
716: static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
717: {
718:   PetscErrorCode         ierr;
719:   GLLData                gll;
720:   Mat                    local_mat  =0,temp_A=0;
721:   ISLocalToGlobalMapping matis_map  =0;
722:   IS                     dirichletIS=0;

725:   /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
726:   GLLStuffs(dd,&gll);
727:   /* Compute matrix of subdomain Neumann problem */
728:   ComputeSubdomainMatrix(dd,gll,&local_mat);
729:   /* Compute global mapping of local dofs */
730:   ComputeMapping(dd,&matis_map);
731:   /* Create MATIS object needed by BDDC */
732:   MatCreateIS(dd.gcomm,1,PETSC_DECIDE,PETSC_DECIDE,dd.xm*dd.ym*dd.zm,dd.xm*dd.ym*dd.zm,matis_map,NULL,&temp_A);
733:   /* Set local subdomain matrices into MATIS object */
734:   MatScale(local_mat,dd.scalingfactor);
735:   MatISSetLocalMat(temp_A,local_mat);
736:   /* Call assembly functions */
737:   MatAssemblyBegin(temp_A,MAT_FINAL_ASSEMBLY);
738:   MatAssemblyEnd(temp_A,MAT_FINAL_ASSEMBLY);

740:   if (dd.DBC_zerorows) {
741:     PetscInt dirsize;

743:     ComputeSpecialBoundaryIndices(dd,&dirichletIS,NULL);
744:     MatSetOption(local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
745:     MatZeroRowsColumnsLocalIS(temp_A,dirichletIS,1.0,NULL,NULL);
746:     ISGetLocalSize(dirichletIS,&dirsize);
747:     ISDestroy(&dirichletIS);
748:   }

750:   /* giving hints to local and global matrices could be useful for the BDDC */
751:   MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_TRUE);
752:   MatSetOption(local_mat,MAT_SPD,PETSC_TRUE);
753: #if DEBUG
754:   {
755:     Vec       lvec,rvec;
756:     PetscReal norm;
757:     MatCreateVecs(temp_A,&lvec,&rvec);
758:     VecSet(lvec,1.0);
759:     MatMult(temp_A,lvec,rvec);
760:     VecNorm(rvec,NORM_INFINITY,&norm);
761:     VecDestroy(&lvec);
762:     VecDestroy(&rvec);
763:   }
764: #endif
765:   /* free allocated workspace */
766:   PetscFree(gll.zGL);
767:   PetscFree(gll.rhoGL);
768:   PetscFree(gll.A[0]);
769:   PetscFree(gll.A);
770:   MatDestroy(&gll.elem_mat);
771:   MatDestroy(&local_mat);
772:   ISLocalToGlobalMappingDestroy(&matis_map);
773:   /* give back the pointer to te MATIS object */
774:   *A = temp_A;
775:   return(0);
776: }

778: static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
779: {
781:   KSP            temp_ksp;
782:   PC             pc;

785:   KSPGetPC(ksp_bddc,&pc);
786:   if (!dd.testkspfetidp) {
787:     PC  D;
788:     Mat F;

790:     PCBDDCCreateFETIDPOperators(pc,PETSC_TRUE,NULL,&F,&D);
791:     KSPCreate(PetscObjectComm((PetscObject)F),&temp_ksp);
792:     KSPSetOperators(temp_ksp,F,F);
793:     KSPSetType(temp_ksp,KSPCG);
794:     KSPSetPC(temp_ksp,D);
795:     KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);
796:     KSPSetOptionsPrefix(temp_ksp,"fluxes_");
797:     KSPSetFromOptions(temp_ksp);
798:     KSPSetUp(temp_ksp);
799:     MatDestroy(&F);
800:     PCDestroy(&D);
801:   } else {
802:     Mat A,Ap;

804:     KSPCreate(PetscObjectComm((PetscObject)ksp_bddc),&temp_ksp);
805:     KSPGetOperators(ksp_bddc,&A,&Ap);
806:     KSPSetOperators(temp_ksp,A,Ap);
807:     KSPSetOptionsPrefix(temp_ksp,"fluxes_");
808:     KSPSetType(temp_ksp,KSPFETIDP);
809:     KSPFETIDPSetInnerBDDC(temp_ksp,pc);
810:     KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);
811:     KSPSetFromOptions(temp_ksp);
812:     KSPSetUp(temp_ksp);
813:   }
814:   *ksp_fetidp = temp_ksp;
815:   return(0);
816: }


819: static PetscErrorCode ComputeKSPBDDC(DomainData dd,Mat A,KSP *ksp)
820: {
822:   KSP            temp_ksp;
823:   PC             pc;
824:   IS             primals,dirichletIS=0,neumannIS=0,*bddc_dofs_splitting;
825:   PetscInt       vidx[8],localsize,*xadj=NULL,*adjncy=NULL;
826:   MatNullSpace   near_null_space;

829:   KSPCreate(dd.gcomm,&temp_ksp);
830:   KSPSetOperators(temp_ksp,A,A);
831:   KSPSetType(temp_ksp,KSPCG);
832:   KSPGetPC(temp_ksp,&pc);
833:   PCSetType(pc,PCBDDC);

835:   localsize = dd.xm_l*dd.ym_l*dd.zm_l;

837:   /* BDDC customization */

839:   /* jumping coefficients case */
840:   PCISSetSubdomainScalingFactor(pc,dd.scalingfactor);

842:   /* Dofs splitting
843:      Simple stride-1 IS
844:      It is not needed since, by default, PCBDDC assumes a stride-1 split */
845:   PetscMalloc1(1,&bddc_dofs_splitting);
846: #if 1
847:   ISCreateStride(PETSC_COMM_WORLD,localsize,0,1,&bddc_dofs_splitting[0]);
848:   PCBDDCSetDofsSplittingLocal(pc,1,bddc_dofs_splitting);
849: #else
850:   /* examples for global ordering */

852:   /* each process lists the nodes it owns */
853:   PetscInt sr,er;
854:   MatGetOwnershipRange(A,&sr,&er);
855:   ISCreateStride(PETSC_COMM_WORLD,er-sr,sr,1,&bddc_dofs_splitting[0]);
856:   PCBDDCSetDofsSplitting(pc,1,bddc_dofs_splitting);
857:   /* Split can be passed in a more general way since any process can list any node */
858: #endif
859:   ISDestroy(&bddc_dofs_splitting[0]);
860:   PetscFree(bddc_dofs_splitting);

862:   /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
863:     (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
864: #if 0
865:   Vec vecs[2];
866:   PetscRandom rctx;
867:   MatCreateVecs(A,&vecs[0],&vecs[1]);
868:   PetscRandomCreate(dd.gcomm,&rctx);
869:   VecSetRandom(vecs[0],rctx);
870:   VecSetRandom(vecs[1],rctx);
871:   MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space);
872:   VecDestroy(&vecs[0]);
873:   VecDestroy(&vecs[1]);
874:   PetscRandomDestroy(&rctx);
875: #else
876:   MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,0,NULL,&near_null_space);
877: #endif
878:   MatSetNearNullSpace(A,near_null_space);
879:   MatNullSpaceDestroy(&near_null_space);

881:   /* CSR graph of subdomain dofs */
882:   BuildCSRGraph(dd,&xadj,&adjncy);
883:   PCBDDCSetLocalAdjacencyGraph(pc,localsize,xadj,adjncy,PETSC_OWN_POINTER);

885:   /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
886:   vidx[0] = 0*dd.xm_l+0;
887:   vidx[1] = 0*dd.xm_l+dd.xm_l-1;
888:   vidx[2] = (dd.ym_l-1)*dd.xm_l+0;
889:   vidx[3] = (dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
890:   vidx[4] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+0;
891:   vidx[5] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+dd.xm_l-1;
892:   vidx[6] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+0;
893:   vidx[7] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
894:   ISCreateGeneral(dd.gcomm,8,vidx,PETSC_COPY_VALUES,&primals);
895:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
896:   ISDestroy(&primals);

898:   /* Neumann/Dirichlet indices on the global boundary */
899:   if (dd.DBC_zerorows) {
900:     /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
901:     ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);
902:     PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);
903:     PCBDDCSetDirichletBoundariesLocal(pc,dirichletIS);
904:   } else {
905:     if (dd.pure_neumann) {
906:       /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
907:       ComputeSpecialBoundaryIndices(dd,NULL,&neumannIS);
908:       PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);
909:     } else {
910:       /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
911:       /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
912:       ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);
913:       PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);
914:     }
915:   }

917:   /* Pass local null space information to local matrices (needed when using approximate local solvers) */
918:   if (dd.ipx || dd.pure_neumann) {
919:     MatNullSpace nsp;
920:     Mat          local_mat;

922:     MatISGetLocalMat(A,&local_mat);
923:     MatNullSpaceCreate(PETSC_COMM_SELF,PETSC_TRUE,0,NULL,&nsp);
924:     MatSetNullSpace(local_mat,nsp);
925:     MatNullSpaceDestroy(&nsp);
926:   }
927:   KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);
928:   KSPSetOptionsPrefix(temp_ksp,"physical_");
929:   KSPSetFromOptions(temp_ksp);
930:   KSPSetUp(temp_ksp);
931:   *ksp = temp_ksp;
932:   ISDestroy(&dirichletIS);
933:   ISDestroy(&neumannIS);
934:   return(0);
935: }

937: static PetscErrorCode InitializeDomainData(DomainData *dd)
938: {
940:   PetscMPIInt    sizes,rank;
941:   PetscInt       factor;

944:   dd->gcomm = PETSC_COMM_WORLD;
945:   MPI_Comm_size(dd->gcomm,&sizes);
946:   MPI_Comm_rank(dd->gcomm,&rank);
947:   /* Get informations from command line */
948:   /* Processors/subdomains per dimension */
949:   /* Default is 1d problem */
950:   dd->npx = sizes;
951:   dd->npy = 0;
952:   dd->npz = 0;
953:   dd->dim = 1;
954:   PetscOptionsGetInt(NULL,NULL,"-npx",&dd->npx,NULL);
955:   PetscOptionsGetInt(NULL,NULL,"-npy",&dd->npy,NULL);
956:   PetscOptionsGetInt(NULL,NULL,"-npz",&dd->npz,NULL);
957:   if (dd->npy) dd->dim++;
958:   if (dd->npz) dd->dim++;
959:   /* Number of elements per dimension */
960:   /* Default is one element per subdomain */
961:   dd->nex = dd->npx;
962:   dd->ney = dd->npy;
963:   dd->nez = dd->npz;
964:   PetscOptionsGetInt(NULL,NULL,"-nex",&dd->nex,NULL);
965:   PetscOptionsGetInt(NULL,NULL,"-ney",&dd->ney,NULL);
966:   PetscOptionsGetInt(NULL,NULL,"-nez",&dd->nez,NULL);
967:   if (!dd->npy) {
968:     dd->ney=0;
969:     dd->nez=0;
970:   }
971:   if (!dd->npz) dd->nez=0;
972:   /* Spectral degree */
973:   dd->p = 3;
974:   PetscOptionsGetInt(NULL,NULL,"-p",&dd->p,NULL);
975:   /* pure neumann problem? */
976:   dd->pure_neumann = PETSC_FALSE;
977:   PetscOptionsGetBool(NULL,NULL,"-pureneumann",&dd->pure_neumann,NULL);

979:   /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
980:   dd->DBC_zerorows = PETSC_FALSE;

982:   PetscOptionsGetBool(NULL,NULL,"-usezerorows",&dd->DBC_zerorows,NULL);
983:   if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
984:   dd->scalingfactor = 1.0;

986:   factor = 0.0;
987:   PetscOptionsGetInt(NULL,NULL,"-jump",&factor,NULL);
988:   /* checkerboard pattern */
989:   dd->scalingfactor = PetscPowScalar(10.0,(PetscScalar)factor*PetscPowScalar(-1.0,(PetscScalar)rank));
990:   /* test data passed in */
991:   if (dd->dim==1) {
992:     if (sizes!=dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 1D must be equal to npx");
993:     if (dd->nex<dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
994:   } else if (dd->dim==2) {
995:     if (sizes!=dd->npx*dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 2D must be equal to npx*npy");
996:     if (dd->nex<dd->npx || dd->ney<dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
997:   } else {
998:     if (sizes!=dd->npx*dd->npy*dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 3D must be equal to npx*npy*npz");
999:     if (dd->nex<dd->npx || dd->ney<dd->npy || dd->nez<dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
1000:   }
1001:   return(0);
1002: }

1004: int main(int argc,char **args)
1005: {
1006:   PetscErrorCode     ierr;
1007:   DomainData         dd;
1008:   PetscReal          norm,maxeig,mineig;
1009:   PetscScalar        scalar_value;
1010:   PetscInt           ndofs,its;
1011:   Mat                A = NULL,F = NULL;
1012:   KSP                KSPwithBDDC = NULL,KSPwithFETIDP = NULL;
1013:   KSPConvergedReason reason;
1014:   Vec                exact_solution = NULL,bddc_solution = NULL,bddc_rhs = NULL;
1015:   PetscBool          testfetidp = PETSC_TRUE;

1017:   /* Init PETSc */
1018:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
1019:   /* Initialize DomainData */
1020:   InitializeDomainData(&dd);
1021:   /* Decompose domain */
1022:   DomainDecomposition(&dd);
1023: #if DEBUG
1024:   printf("Subdomain data\n");
1025:   printf("IPS   : %d %d %d\n",dd.ipx,dd.ipy,dd.ipz);
1026:   printf("NEG   : %d %d %d\n",dd.nex,dd.ney,dd.nez);
1027:   printf("NEL   : %d %d %d\n",dd.nex_l,dd.ney_l,dd.nez_l);
1028:   printf("LDO   : %d %d %d\n",dd.xm_l,dd.ym_l,dd.zm_l);
1029:   printf("SIZES : %d %d %d\n",dd.xm,dd.ym,dd.zm);
1030:   printf("STARTS: %d %d %d\n",dd.startx,dd.starty,dd.startz);
1031: #endif
1032:   dd.testkspfetidp = PETSC_TRUE;
1033:   PetscOptionsGetBool(NULL,NULL,"-testfetidp",&testfetidp,NULL);
1034:   PetscOptionsGetBool(NULL,NULL,"-testkspfetidp",&dd.testkspfetidp,NULL);
1035:   /* assemble global matrix */
1036:   ComputeMatrix(dd,&A);
1037:   /* get work vectors */
1038:   MatCreateVecs(A,&bddc_solution,NULL);
1039:   VecDuplicate(bddc_solution,&bddc_rhs);
1040:   VecDuplicate(bddc_solution,&exact_solution);
1041:   /* create and customize KSP/PC for BDDC */
1042:   ComputeKSPBDDC(dd,A,&KSPwithBDDC);
1043:   /* create KSP/PC for FETIDP */
1044:   if (testfetidp) {
1045:     ComputeKSPFETIDP(dd,KSPwithBDDC,&KSPwithFETIDP);
1046:   }
1047:   /* create random exact solution */
1048: #if defined(PETSC_USE_COMPLEX)
1049:   VecSet(exact_solution,1.0 + PETSC_i);
1050: #else
1051:   VecSetRandom(exact_solution,NULL);
1052: #endif
1053:   VecShift(exact_solution,-0.5);
1054:   VecScale(exact_solution,100.0);
1055:   VecGetSize(exact_solution,&ndofs);
1056:   if (dd.pure_neumann) {
1057:     VecSum(exact_solution,&scalar_value);
1058:     scalar_value = -scalar_value/(PetscScalar)ndofs;
1059:     VecShift(exact_solution,scalar_value);
1060:   }
1061:   /* assemble BDDC rhs */
1062:   MatMult(A,exact_solution,bddc_rhs);
1063:   /* test ksp with BDDC */
1064:   KSPSolve(KSPwithBDDC,bddc_rhs,bddc_solution);
1065:   KSPGetIterationNumber(KSPwithBDDC,&its);
1066:   KSPGetConvergedReason(KSPwithBDDC,&reason);
1067:   KSPComputeExtremeSingularValues(KSPwithBDDC,&maxeig,&mineig);
1068:   if (dd.pure_neumann) {
1069:     VecSum(bddc_solution,&scalar_value);
1070:     scalar_value = -scalar_value/(PetscScalar)ndofs;
1071:     VecShift(bddc_solution,scalar_value);
1072:   }
1073:   /* check exact_solution and BDDC solultion */
1074:   VecAXPY(bddc_solution,-1.0,exact_solution);
1075:   VecNorm(bddc_solution,NORM_INFINITY,&norm);
1076:   PetscPrintf(dd.gcomm,"---------------------BDDC stats-------------------------------\n");
1077:   PetscPrintf(dd.gcomm,"Number of degrees of freedom               : %8D\n",ndofs);
1078:   if (reason < 0) {
1079:     PetscPrintf(dd.gcomm,"Number of iterations                       : %8D\n",its);
1080:     PetscPrintf(dd.gcomm,"Converged reason                           : %s\n",KSPConvergedReasons[reason]);
1081:   }
1082:   if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1083:   PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator        : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);
1084:   if (norm > 1.e-1 || reason < 0) {
1085:     PetscPrintf(dd.gcomm,"Error betweeen exact and computed solution : %1.2e\n",(double)norm);
1086:   }
1087:   PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");
1088:   if (testfetidp) {
1089:     Vec fetidp_solution_all = NULL,fetidp_solution = NULL,fetidp_rhs = NULL;

1091:     VecDuplicate(bddc_solution,&fetidp_solution_all);
1092:     if (!dd.testkspfetidp) {
1093:       /* assemble fetidp rhs on the space of Lagrange multipliers */
1094:       KSPGetOperators(KSPwithFETIDP,&F,NULL);
1095:       MatCreateVecs(F,&fetidp_solution,&fetidp_rhs);
1096:       PCBDDCMatFETIDPGetRHS(F,bddc_rhs,fetidp_rhs);
1097:       VecSet(fetidp_solution,0.0);
1098:       /* test ksp with FETIDP */
1099:       KSPSolve(KSPwithFETIDP,fetidp_rhs,fetidp_solution);
1100:       /* assemble fetidp solution on physical domain */
1101:       PCBDDCMatFETIDPGetSolution(F,fetidp_solution,fetidp_solution_all);
1102:     } else {
1103:       KSP kspF;
1104:       KSPSolve(KSPwithFETIDP,bddc_rhs,fetidp_solution_all);
1105:       KSPFETIDPGetInnerKSP(KSPwithFETIDP,&kspF);
1106:       KSPGetOperators(kspF,&F,NULL);
1107:     }
1108:     MatGetSize(F,&ndofs,NULL);
1109:     KSPGetIterationNumber(KSPwithFETIDP,&its);
1110:     KSPGetConvergedReason(KSPwithFETIDP,&reason);
1111:     KSPComputeExtremeSingularValues(KSPwithFETIDP,&maxeig,&mineig);
1112:     /* check FETIDP sol */
1113:     if (dd.pure_neumann) {
1114:       VecSum(fetidp_solution_all,&scalar_value);
1115:       scalar_value = -scalar_value/(PetscScalar)ndofs;
1116:       VecShift(fetidp_solution_all,scalar_value);
1117:     }
1118:     VecAXPY(fetidp_solution_all,-1.0,exact_solution);
1119:     VecNorm(fetidp_solution_all,NORM_INFINITY,&norm);
1120:     PetscPrintf(dd.gcomm,"------------------FETI-DP stats-------------------------------\n");
1121:     PetscPrintf(dd.gcomm,"Number of degrees of freedom               : %8D\n",ndofs);
1122:     if (reason < 0) {
1123:       PetscPrintf(dd.gcomm,"Number of iterations                       : %8D\n",its);
1124:       PetscPrintf(dd.gcomm,"Converged reason                           : %D\n",reason);
1125:     }
1126:     if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1127:     PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator        : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);
1128:     if (norm > 1.e-1 || reason < 0) {
1129:       PetscPrintf(dd.gcomm,"Error betweeen exact and computed solution : %1.2e\n",(double)norm);
1130:     }
1131:     PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");
1132:     VecDestroy(&fetidp_solution);
1133:     VecDestroy(&fetidp_solution_all);
1134:     VecDestroy(&fetidp_rhs);
1135:   }
1136:   KSPDestroy(&KSPwithFETIDP);
1137:   VecDestroy(&exact_solution);
1138:   VecDestroy(&bddc_solution);
1139:   VecDestroy(&bddc_rhs);
1140:   MatDestroy(&A);
1141:   KSPDestroy(&KSPwithBDDC);
1142:   /* Quit PETSc */
1143:   PetscFinalize();
1144:   return ierr;
1145: }

1147: /*TEST

1149:  testset:
1150:    nsize: 4
1151:    args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1152:    output_file: output/ex59_bddc_fetidp_1.out
1153:    test:
1154:      suffix: bddc_fetidp_1
1155:    test:
1156:      requires: viennacl
1157:      suffix: bddc_fetidp_1_viennacl
1158:      args: -subdomain_mat_type aijviennacl
1159:    test:
1160:      requires: cuda
1161:      suffix: bddc_fetidp_1_cuda
1162:      args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse

1164:  testset:
1165:    nsize: 4
1166:    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1167:    requires: !single
1168:    test:
1169:      suffix: bddc_fetidp_2
1170:    test:
1171:      suffix: bddc_fetidp_3
1172:      args: -npz 1 -nez 1
1173:    test:
1174:      suffix: bddc_fetidp_4
1175:      args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg

1177:  testset:
1178:    nsize: 8
1179:    suffix: bddc_fetidp_approximate
1180:    args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0

1182:  testset:
1183:    nsize: 4
1184:    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1185:    filter: grep -v "variant HERMITIAN"
1186:    requires: !single
1187:    test:
1188:      suffix: bddc_fetidp_ml_1
1189:      args: -physical_pc_bddc_coarsening_ratio 1
1190:    test:
1191:      suffix: bddc_fetidp_ml_2
1192:      args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1193:    test:
1194:      suffix: bddc_fetidp_ml_3
1195:      args: -physical_pc_bddc_coarsening_ratio 4

1197:  testset:
1198:    nsize: 9
1199:    args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1200:    output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1201:    test:
1202:      suffix: bddc_fetidp_ml_eqlimit_1
1203:      args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1204:    test:
1205:      suffix: bddc_fetidp_ml_eqlimit_2
1206:      args: -physical_pc_bddc_coarse_eqs_limit 46


1209: TEST*/