Actual source code: asm.c

petsc-3.4.5 2014-06-29
  2: /*
  3:   This file defines an additive Schwarz preconditioner for any Mat implementation.

  5:   Note that each processor may have any number of subdomains. But in order to
  6:   deal easily with the VecScatter(), we treat each processor as if it has the
  7:   same number of subdomains.

  9:        n - total number of true subdomains on all processors
 10:        n_local_true - actual number of subdomains on this processor
 11:        n_local = maximum over all processors of n_local_true
 12: */
 13: #include <petsc-private/pcimpl.h>     /*I "petscpc.h" I*/
 14: #include <petscdm.h>

 16: typedef struct {
 17:   PetscInt   n, n_local, n_local_true;
 18:   PetscInt   overlap;             /* overlap requested by user */
 19:   KSP        *ksp;                /* linear solvers for each block */
 20:   VecScatter *restriction;        /* mapping from global to subregion */
 21:   VecScatter *localization;       /* mapping from overlapping to non-overlapping subregion */
 22:   VecScatter *prolongation;       /* mapping from subregion to global */
 23:   Vec        *x,*y,*y_local;      /* work vectors */
 24:   IS         *is;                 /* index set that defines each overlapping subdomain */
 25:   IS         *is_local;           /* index set that defines each non-overlapping subdomain, may be NULL */
 26:   Mat        *mat,*pmat;          /* mat is not currently used */
 27:   PCASMType  type;                /* use reduced interpolation, restriction or both */
 28:   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
 29:   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
 30:   PetscBool  sort_indices;        /* flag to sort subdomain indices */
 31:   PetscBool  dm_subdomains;       /* whether DM is allowed to define subdomains */
 32: } PC_ASM;

 36: static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
 37: {
 38:   PC_ASM         *osm = (PC_ASM*)pc->data;
 40:   PetscMPIInt    rank;
 41:   PetscInt       i,bsz;
 42:   PetscBool      iascii,isstring;
 43:   PetscViewer    sviewer;

 46:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 47:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
 48:   if (iascii) {
 49:     char overlaps[256] = "user-defined overlap",blocks[256] = "total subdomain blocks not yet set";
 50:     if (osm->overlap >= 0) {PetscSNPrintf(overlaps,sizeof(overlaps),"amount of overlap = %D",osm->overlap);}
 51:     if (osm->n > 0) {PetscSNPrintf(blocks,sizeof(blocks),"total subdomain blocks = %D",osm->n);}
 52:     PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: %s, %s\n",blocks,overlaps);
 53:     PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);
 54:     MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
 55:     if (osm->same_local_solves) {
 56:       if (osm->ksp) {
 57:         PetscViewerASCIIPrintf(viewer,"  Local solve is same for all blocks, in the following KSP and PC objects:\n");
 58:         PetscViewerGetSingleton(viewer,&sviewer);
 59:         if (!rank) {
 60:           PetscViewerASCIIPushTab(viewer);
 61:           KSPView(osm->ksp[0],sviewer);
 62:           PetscViewerASCIIPopTab(viewer);
 63:         }
 64:         PetscViewerRestoreSingleton(viewer,&sviewer);
 65:       }
 66:     } else {
 67:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
 68:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);
 69:       PetscViewerFlush(viewer);
 70:       PetscViewerASCIIPrintf(viewer,"  Local solve info for each block is in the following KSP and PC objects:\n");
 71:       PetscViewerASCIIPushTab(viewer);
 72:       PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
 73:       for (i=0; i<osm->n_local; i++) {
 74:         PetscViewerGetSingleton(viewer,&sviewer);
 75:         if (i < osm->n_local_true) {
 76:           ISGetLocalSize(osm->is[i],&bsz);
 77:           PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);
 78:           KSPView(osm->ksp[i],sviewer);
 79:           PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");
 80:         }
 81:         PetscViewerRestoreSingleton(viewer,&sviewer);
 82:       }
 83:       PetscViewerASCIIPopTab(viewer);
 84:       PetscViewerFlush(viewer);
 85:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
 86:     }
 87:   } else if (isstring) {
 88:     PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);
 89:     PetscViewerGetSingleton(viewer,&sviewer);
 90:     if (osm->ksp) {KSPView(osm->ksp[0],sviewer);}
 91:     PetscViewerRestoreSingleton(viewer,&sviewer);
 92:   }
 93:   return(0);
 94: }

 98: static PetscErrorCode PCASMPrintSubdomains(PC pc)
 99: {
100:   PC_ASM         *osm = (PC_ASM*)pc->data;
101:   const char     *prefix;
102:   char           fname[PETSC_MAX_PATH_LEN+1];
103:   PetscViewer    viewer, sviewer;
104:   char           *s;
105:   PetscInt       i,j,nidx;
106:   const PetscInt *idx;
107:   PetscMPIInt    rank, size;

111:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
112:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
113:   PCGetOptionsPrefix(pc,&prefix);
114:   PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,NULL);
115:   if (fname[0] == 0) { PetscStrcpy(fname,"stdout"); };
116:   PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
117:   for (i=0; i<osm->n_local; i++) {
118:     if (i < osm->n_local_true) {
119:       ISGetLocalSize(osm->is[i],&nidx);
120:       ISGetIndices(osm->is[i],&idx);
121:       /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
122:       PetscMalloc(sizeof(char)*(16*(nidx+1)+512), &s);
123:       PetscViewerStringOpen(PETSC_COMM_SELF, s, 16*(nidx+1)+512, &sviewer);
124:       PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D with overlap:\n", rank, size, i);
125:       for (j=0; j<nidx; j++) {
126:         PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
127:       }
128:       ISRestoreIndices(osm->is[i],&idx);
129:       PetscViewerStringSPrintf(sviewer,"\n");
130:       PetscViewerDestroy(&sviewer);
131:       PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
132:       PetscViewerASCIISynchronizedPrintf(viewer, s);
133:       PetscViewerFlush(viewer);
134:       PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
135:       PetscFree(s);
136:       if (osm->is_local) {
137:         /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
138:         PetscMalloc(sizeof(char)*(16*(nidx+1)+512), &s);
139:         PetscViewerStringOpen(PETSC_COMM_SELF, s, 16*(nidx+1)+512, &sviewer);
140:         PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D without overlap:\n", rank, size, i);
141:         ISGetLocalSize(osm->is_local[i],&nidx);
142:         ISGetIndices(osm->is_local[i],&idx);
143:         for (j=0; j<nidx; j++) {
144:           PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
145:         }
146:         ISRestoreIndices(osm->is_local[i],&idx);
147:         PetscViewerStringSPrintf(sviewer,"\n");
148:         PetscViewerDestroy(&sviewer);
149:         PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
150:         PetscViewerASCIISynchronizedPrintf(viewer, s);
151:         PetscViewerFlush(viewer);
152:         PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
153:         PetscFree(s);
154:       }
155:     } else {
156:       /* Participate in collective viewer calls. */
157:       PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
158:       PetscViewerFlush(viewer);
159:       PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
160:       /* Assume either all ranks have is_local or none do. */
161:       if (osm->is_local) {
162:         PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
163:         PetscViewerFlush(viewer);
164:         PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
165:       }
166:     }
167:   }
168:   PetscViewerFlush(viewer);
169:   PetscViewerDestroy(&viewer);
170:   return(0);
171: }

175: static PetscErrorCode PCSetUp_ASM(PC pc)
176: {
177:   PC_ASM         *osm = (PC_ASM*)pc->data;
179:   PetscBool      symset,flg;
180:   PetscInt       i,m,m_local,firstRow,lastRow;
181:   MatReuse       scall = MAT_REUSE_MATRIX;
182:   IS             isl;
183:   KSP            ksp;
184:   PC             subpc;
185:   const char     *prefix,*pprefix;
186:   Vec            vec;
187:   DM             *domain_dm = NULL;

190:   if (!pc->setupcalled) {

192:     if (!osm->type_set) {
193:       MatIsSymmetricKnown(pc->pmat,&symset,&flg);
194:       if (symset && flg) osm->type = PC_ASM_BASIC;
195:     }

197:     /* Note: if subdomains have been set either via PCASMSetTotalSubdomains() or via PCASMSetLocalSubdomains(), osm->n_local_true will not be PETSC_DECIDE */
198:     if (osm->n_local_true == PETSC_DECIDE) {
199:       /* no subdomains given */
200:       /* try pc->dm first, if allowed */
201:       if (osm->dm_subdomains && pc->dm) {
202:         PetscInt  num_domains, d;
203:         char      **domain_names;
204:         IS        *inner_domain_is, *outer_domain_is;
205:         DMCreateDomainDecomposition(pc->dm, &num_domains, &domain_names, &inner_domain_is, &outer_domain_is, &domain_dm);
206:         if (num_domains) {
207:           PCASMSetLocalSubdomains(pc, num_domains, outer_domain_is, inner_domain_is);
208:         }
209:         for (d = 0; d < num_domains; ++d) {
210:           if (domain_names)    {PetscFree(domain_names[d]);}
211:           if (inner_domain_is) {ISDestroy(&inner_domain_is[d]);}
212:           if (outer_domain_is) {ISDestroy(&outer_domain_is[d]);}
213:         }
214:         PetscFree(domain_names);
215:         PetscFree(inner_domain_is);
216:         PetscFree(outer_domain_is);
217:       }
218:       if (osm->n_local_true == PETSC_DECIDE) {
219:         /* still no subdomains; use one subdomain per processor */
220:         osm->n_local_true = 1;
221:       }
222:     }
223:     { /* determine the global and max number of subdomains */
224:       struct {PetscInt max,sum;} inwork,outwork;
225:       inwork.max   = osm->n_local_true;
226:       inwork.sum   = osm->n_local_true;
227:       MPI_Allreduce(&inwork,&outwork,1,MPIU_2INT,PetscMaxSum_Op,PetscObjectComm((PetscObject)pc));
228:       osm->n_local = outwork.max;
229:       osm->n       = outwork.sum;
230:     }
231:     if (!osm->is) { /* create the index sets */
232:       PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);
233:     }
234:     if (osm->n_local_true > 1 && !osm->is_local) {
235:       PetscMalloc(osm->n_local_true*sizeof(IS),&osm->is_local);
236:       for (i=0; i<osm->n_local_true; i++) {
237:         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
238:           ISDuplicate(osm->is[i],&osm->is_local[i]);
239:           ISCopy(osm->is[i],osm->is_local[i]);
240:         } else {
241:           PetscObjectReference((PetscObject)osm->is[i]);
242:           osm->is_local[i] = osm->is[i];
243:         }
244:       }
245:     }
246:     PCGetOptionsPrefix(pc,&prefix);
247:     flg  = PETSC_FALSE;
248:     PetscOptionsGetBool(prefix,"-pc_asm_print_subdomains",&flg,NULL);
249:     if (flg) { PCASMPrintSubdomains(pc); }

251:     if (osm->overlap > 0) {
252:       /* Extend the "overlapping" regions by a number of steps */
253:       MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);
254:     }
255:     if (osm->sort_indices) {
256:       for (i=0; i<osm->n_local_true; i++) {
257:         ISSort(osm->is[i]);
258:         if (osm->is_local) {
259:           ISSort(osm->is_local[i]);
260:         }
261:       }
262:     }
263:     /* Create the local work vectors and scatter contexts */
264:     MatGetVecs(pc->pmat,&vec,0);
265:     PetscMalloc(osm->n_local*sizeof(VecScatter),&osm->restriction);
266:     if (osm->is_local) {PetscMalloc(osm->n_local*sizeof(VecScatter),&osm->localization);}
267:     PetscMalloc(osm->n_local*sizeof(VecScatter),&osm->prolongation);
268:     PetscMalloc(osm->n_local*sizeof(Vec),&osm->x);
269:     PetscMalloc(osm->n_local*sizeof(Vec),&osm->y);
270:     PetscMalloc(osm->n_local*sizeof(Vec),&osm->y_local);
271:     VecGetOwnershipRange(vec, &firstRow, &lastRow);
272:     for (i=0; i<osm->n_local_true; ++i, firstRow += m_local) {
273:       ISGetLocalSize(osm->is[i],&m);
274:       VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);
275:       ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);
276:       VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);
277:       ISDestroy(&isl);
278:       VecDuplicate(osm->x[i],&osm->y[i]);
279:       if (osm->is_local) {
280:         ISLocalToGlobalMapping ltog;
281:         IS                     isll;
282:         const PetscInt         *idx_local;
283:         PetscInt               *idx,nout;

285:         ISLocalToGlobalMappingCreateIS(osm->is[i],&ltog);
286:         ISGetLocalSize(osm->is_local[i],&m_local);
287:         ISGetIndices(osm->is_local[i], &idx_local);
288:         PetscMalloc(m_local*sizeof(PetscInt),&idx);
289:         ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,m_local,idx_local,&nout,idx);
290:         ISLocalToGlobalMappingDestroy(&ltog);
291:         if (nout != m_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"is_local not a subset of is");
292:         ISRestoreIndices(osm->is_local[i], &idx_local);
293:         ISCreateGeneral(PETSC_COMM_SELF,m_local,idx,PETSC_OWN_POINTER,&isll);
294:         ISCreateStride(PETSC_COMM_SELF,m_local,0,1,&isl);
295:         VecCreateSeq(PETSC_COMM_SELF,m_local,&osm->y_local[i]);
296:         VecScatterCreate(osm->y[i],isll,osm->y_local[i],isl,&osm->localization[i]);
297:         ISDestroy(&isll);

299:         VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);
300:         ISDestroy(&isl);
301:       } else {
302:         VecGetLocalSize(vec,&m_local);

304:         osm->y_local[i] = osm->y[i];

306:         PetscObjectReference((PetscObject) osm->y[i]);

308:         osm->prolongation[i] = osm->restriction[i];

310:         PetscObjectReference((PetscObject) osm->restriction[i]);
311:       }
312:     }
313:     if (firstRow != lastRow) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Specified ASM subdomain sizes were invalid: %d != %d", firstRow, lastRow);
314:     for (i=osm->n_local_true; i<osm->n_local; i++) {
315:       VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);
316:       VecDuplicate(osm->x[i],&osm->y[i]);
317:       VecDuplicate(osm->x[i],&osm->y_local[i]);
318:       ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);
319:       VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);
320:       if (osm->is_local) {
321:         VecScatterCreate(osm->y[i],isl,osm->y_local[i],isl,&osm->localization[i]);
322:         VecScatterCreate(vec,isl,osm->x[i],isl,&osm->prolongation[i]);
323:       } else {
324:         osm->prolongation[i] = osm->restriction[i];
325:         PetscObjectReference((PetscObject) osm->restriction[i]);
326:       }
327:       ISDestroy(&isl);
328:     }
329:     VecDestroy(&vec);

331:     if (!osm->ksp) {
332:       /* Create the local solvers */
333:       PetscMalloc(osm->n_local_true*sizeof(KSP*),&osm->ksp);
334:       if (domain_dm) {
335:         PetscInfo(pc,"Setting up ASM subproblems using the embedded DM\n");
336:       }
337:       for (i=0; i<osm->n_local_true; i++) {
338:         KSPCreate(PETSC_COMM_SELF,&ksp);
339:         PetscLogObjectParent(pc,ksp);
340:         PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
341:         KSPSetType(ksp,KSPPREONLY);
342:         KSPGetPC(ksp,&subpc);
343:         PCGetOptionsPrefix(pc,&prefix);
344:         KSPSetOptionsPrefix(ksp,prefix);
345:         KSPAppendOptionsPrefix(ksp,"sub_");
346:         if (domain_dm) {
347:           KSPSetDM(ksp, domain_dm[i]);
348:           KSPSetDMActive(ksp, PETSC_FALSE);
349:           DMDestroy(&domain_dm[i]);
350:         }
351:         osm->ksp[i] = ksp;
352:       }
353:       if (domain_dm) {
354:         PetscFree(domain_dm);
355:       }
356:     }
357:     scall = MAT_INITIAL_MATRIX;
358:   } else {
359:     /*
360:        Destroy the blocks from the previous iteration
361:     */
362:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
363:       MatDestroyMatrices(osm->n_local_true,&osm->pmat);
364:       scall = MAT_INITIAL_MATRIX;
365:     }
366:   }

368:   /*
369:      Extract out the submatrices
370:   */
371:   MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);
372:   if (scall == MAT_INITIAL_MATRIX) {
373:     PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
374:     for (i=0; i<osm->n_local_true; i++) {
375:       PetscLogObjectParent(pc,osm->pmat[i]);
376:       PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
377:     }
378:   }

380:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
381:      different boundary conditions for the submatrices than for the global problem) */
382:   PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);

384:   /*
385:      Loop over subdomains putting them into local ksp
386:   */
387:   for (i=0; i<osm->n_local_true; i++) {
388:     KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);
389:     if (!pc->setupcalled) {
390:       KSPSetFromOptions(osm->ksp[i]);
391:     }
392:   }
393:   return(0);
394: }

398: static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
399: {
400:   PC_ASM         *osm = (PC_ASM*)pc->data;
402:   PetscInt       i;

405:   for (i=0; i<osm->n_local_true; i++) {
406:     KSPSetUp(osm->ksp[i]);
407:   }
408:   return(0);
409: }

413: static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
414: {
415:   PC_ASM         *osm = (PC_ASM*)pc->data;
417:   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
418:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

421:   /*
422:      Support for limiting the restriction or interpolation to only local
423:      subdomain values (leaving the other values 0).
424:   */
425:   if (!(osm->type & PC_ASM_RESTRICT)) {
426:     forward = SCATTER_FORWARD_LOCAL;
427:     /* have to zero the work RHS since scatter may leave some slots empty */
428:     for (i=0; i<n_local_true; i++) {
429:       VecZeroEntries(osm->x[i]);
430:     }
431:   }
432:   if (!(osm->type & PC_ASM_INTERPOLATE)) reverse = SCATTER_REVERSE_LOCAL;

434:   for (i=0; i<n_local; i++) {
435:     VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
436:   }
437:   VecZeroEntries(y);
438:   /* do the local solves */
439:   for (i=0; i<n_local_true; i++) {
440:     VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
441:     KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
442:     if (osm->localization) {
443:       VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
444:       VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
445:     }
446:     VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
447:   }
448:   /* handle the rest of the scatters that do not have local solves */
449:   for (i=n_local_true; i<n_local; i++) {
450:     VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
451:     VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
452:   }
453:   for (i=0; i<n_local; i++) {
454:     VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
455:   }
456:   return(0);
457: }

461: static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
462: {
463:   PC_ASM         *osm = (PC_ASM*)pc->data;
465:   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
466:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

469:   /*
470:      Support for limiting the restriction or interpolation to only local
471:      subdomain values (leaving the other values 0).

473:      Note: these are reversed from the PCApply_ASM() because we are applying the
474:      transpose of the three terms
475:   */
476:   if (!(osm->type & PC_ASM_INTERPOLATE)) {
477:     forward = SCATTER_FORWARD_LOCAL;
478:     /* have to zero the work RHS since scatter may leave some slots empty */
479:     for (i=0; i<n_local_true; i++) {
480:       VecZeroEntries(osm->x[i]);
481:     }
482:   }
483:   if (!(osm->type & PC_ASM_RESTRICT)) reverse = SCATTER_REVERSE_LOCAL;

485:   for (i=0; i<n_local; i++) {
486:     VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
487:   }
488:   VecZeroEntries(y);
489:   /* do the local solves */
490:   for (i=0; i<n_local_true; i++) {
491:     VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
492:     KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
493:     if (osm->localization) {
494:       VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
495:       VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
496:     }
497:     VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
498:   }
499:   /* handle the rest of the scatters that do not have local solves */
500:   for (i=n_local_true; i<n_local; i++) {
501:     VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
502:     VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
503:   }
504:   for (i=0; i<n_local; i++) {
505:     VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
506:   }
507:   return(0);
508: }

512: static PetscErrorCode PCReset_ASM(PC pc)
513: {
514:   PC_ASM         *osm = (PC_ASM*)pc->data;
516:   PetscInt       i;

519:   if (osm->ksp) {
520:     for (i=0; i<osm->n_local_true; i++) {
521:       KSPReset(osm->ksp[i]);
522:     }
523:   }
524:   if (osm->pmat) {
525:     if (osm->n_local_true > 0) {
526:       MatDestroyMatrices(osm->n_local_true,&osm->pmat);
527:     }
528:   }
529:   if (osm->restriction) {
530:     for (i=0; i<osm->n_local; i++) {
531:       VecScatterDestroy(&osm->restriction[i]);
532:       if (osm->localization) {VecScatterDestroy(&osm->localization[i]);}
533:       VecScatterDestroy(&osm->prolongation[i]);
534:       VecDestroy(&osm->x[i]);
535:       VecDestroy(&osm->y[i]);
536:       VecDestroy(&osm->y_local[i]);
537:     }
538:     PetscFree(osm->restriction);
539:     if (osm->localization) {PetscFree(osm->localization);}
540:     PetscFree(osm->prolongation);
541:     PetscFree(osm->x);
542:     PetscFree(osm->y);
543:     PetscFree(osm->y_local);
544:   }
545:   PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);

547:   osm->is       = 0;
548:   osm->is_local = 0;
549:   return(0);
550: }

554: static PetscErrorCode PCDestroy_ASM(PC pc)
555: {
556:   PC_ASM         *osm = (PC_ASM*)pc->data;
558:   PetscInt       i;

561:   PCReset_ASM(pc);
562:   if (osm->ksp) {
563:     for (i=0; i<osm->n_local_true; i++) {
564:       KSPDestroy(&osm->ksp[i]);
565:     }
566:     PetscFree(osm->ksp);
567:   }
568:   PetscFree(pc->data);
569:   return(0);
570: }

574: static PetscErrorCode PCSetFromOptions_ASM(PC pc)
575: {
576:   PC_ASM         *osm = (PC_ASM*)pc->data;
578:   PetscInt       blocks,ovl;
579:   PetscBool      symset,flg;
580:   PCASMType      asmtype;

583:   /* set the type to symmetric if matrix is symmetric */
584:   if (!osm->type_set && pc->pmat) {
585:     MatIsSymmetricKnown(pc->pmat,&symset,&flg);
586:     if (symset && flg) osm->type = PC_ASM_BASIC;
587:   }
588:   PetscOptionsHead("Additive Schwarz options");
589:   PetscOptionsBool("-pc_asm_dm_subdomains","Use DMCreateDomainDecomposition() to define subdomains","PCASMSetDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
590:   PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);
591:   if (flg) {
592:     PCASMSetTotalSubdomains(pc,blocks,NULL,NULL);
593:     osm->dm_subdomains = PETSC_FALSE;
594:   }
595:   PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);
596:   if (flg) {
597:     PCASMSetOverlap(pc,ovl);
598:     osm->dm_subdomains = PETSC_FALSE;
599:   }
600:   flg  = PETSC_FALSE;
601:   PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);
602:   if (flg) {PCASMSetType(pc,asmtype); }
603:   PetscOptionsTail();
604:   return(0);
605: }

607: /*------------------------------------------------------------------------------------*/

611: static PetscErrorCode  PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[])
612: {
613:   PC_ASM         *osm = (PC_ASM*)pc->data;
615:   PetscInt       i;

618:   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
619:   if (pc->setupcalled && (n != osm->n_local_true || is)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetUp().");

621:   if (!pc->setupcalled) {
622:     if (is) {
623:       for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is[i]);}
624:     }
625:     if (is_local) {
626:       for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is_local[i]);}
627:     }
628:     PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);

630:     osm->n_local_true = n;
631:     osm->is           = 0;
632:     osm->is_local     = 0;
633:     if (is) {
634:       PetscMalloc(n*sizeof(IS),&osm->is);
635:       for (i=0; i<n; i++) osm->is[i] = is[i];
636:       /* Flag indicating that the user has set overlapping subdomains so PCASM should not increase their size. */
637:       osm->overlap = -1;
638:     }
639:     if (is_local) {
640:       PetscMalloc(n*sizeof(IS),&osm->is_local);
641:       for (i=0; i<n; i++) osm->is_local[i] = is_local[i];
642:       if (!is) {
643:         PetscMalloc(osm->n_local_true*sizeof(IS),&osm->is);
644:         for (i=0; i<osm->n_local_true; i++) {
645:           if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
646:             ISDuplicate(osm->is_local[i],&osm->is[i]);
647:             ISCopy(osm->is_local[i],osm->is[i]);
648:           } else {
649:             PetscObjectReference((PetscObject)osm->is_local[i]);
650:             osm->is[i] = osm->is_local[i];
651:           }
652:         }
653:       }
654:     }
655:   }
656:   return(0);
657: }

661: static PetscErrorCode  PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is,IS *is_local)
662: {
663:   PC_ASM         *osm = (PC_ASM*)pc->data;
665:   PetscMPIInt    rank,size;
666:   PetscInt       n;

669:   if (N < 1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
670:   if (is || is_local) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");

672:   /*
673:      Split the subdomains equally among all processors
674:   */
675:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
676:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
677:   n    = N/size + ((N % size) > rank);
678:   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
679:   if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetUp().");
680:   if (!pc->setupcalled) {
681:     PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);

683:     osm->n_local_true = n;
684:     osm->is           = 0;
685:     osm->is_local     = 0;
686:   }
687:   return(0);
688: }

692: static PetscErrorCode  PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
693: {
694:   PC_ASM *osm = (PC_ASM*)pc->data;

697:   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
698:   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetUp().");
699:   if (!pc->setupcalled) osm->overlap = ovl;
700:   return(0);
701: }

705: static PetscErrorCode  PCASMSetType_ASM(PC pc,PCASMType type)
706: {
707:   PC_ASM *osm = (PC_ASM*)pc->data;

710:   osm->type     = type;
711:   osm->type_set = PETSC_TRUE;
712:   return(0);
713: }

717: static PetscErrorCode  PCASMSetSortIndices_ASM(PC pc,PetscBool  doSort)
718: {
719:   PC_ASM *osm = (PC_ASM*)pc->data;

722:   osm->sort_indices = doSort;
723:   return(0);
724: }

728: static PetscErrorCode  PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
729: {
730:   PC_ASM         *osm = (PC_ASM*)pc->data;

734:   if (osm->n_local_true < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");

736:   if (n_local) *n_local = osm->n_local_true;
737:   if (first_local) {
738:     MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
739:     *first_local -= osm->n_local_true;
740:   }
741:   if (ksp) {
742:     /* Assume that local solves are now different; not necessarily
743:        true though!  This flag is used only for PCView_ASM() */
744:     *ksp                   = osm->ksp;
745:     osm->same_local_solves = PETSC_FALSE;
746:   }
747:   return(0);
748: }

752: /*@C
753:     PCASMSetLocalSubdomains - Sets the local subdomains (for this processor only) for the additive Schwarz preconditioner.

755:     Collective on PC

757:     Input Parameters:
758: +   pc - the preconditioner context
759: .   n - the number of subdomains for this processor (default value = 1)
760: .   is - the index set that defines the subdomains for this processor
761:          (or NULL for PETSc to determine subdomains)
762: -   is_local - the index sets that define the local part of the subdomains for this processor
763:          (or NULL to use the default of 1 subdomain per process)

765:     Notes:
766:     The IS numbering is in the parallel, global numbering of the vector for both is and is_local

768:     By default the ASM preconditioner uses 1 block per processor.

770:     Use PCASMSetTotalSubdomains() to set the subdomains for all processors.

772:     Level: advanced

774: .keywords: PC, ASM, set, local, subdomains, additive Schwarz

776: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
777:           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
778: @*/
779: PetscErrorCode  PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
780: {

785:   PetscTryMethod(pc,"PCASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));
786:   return(0);
787: }

791: /*@C
792:     PCASMSetTotalSubdomains - Sets the subdomains for all processors for the
793:     additive Schwarz preconditioner.  Either all or no processors in the
794:     PC communicator must call this routine, with the same index sets.

796:     Collective on PC

798:     Input Parameters:
799: +   pc - the preconditioner context
800: .   N  - the number of subdomains for all processors
801: .   is - the index sets that define the subdomains for all processors
802:          (or NULL to ask PETSc to compe up with subdomains)
803: -   is_local - the index sets that define the local part of the subdomains for this processor
804:          (or NULL to use the default of 1 subdomain per process)

806:     Options Database Key:
807:     To set the total number of subdomain blocks rather than specify the
808:     index sets, use the option
809: .    -pc_asm_blocks <blks> - Sets total blocks

811:     Notes:
812:     Currently you cannot use this to set the actual subdomains with the argument is.

814:     By default the ASM preconditioner uses 1 block per processor.

816:     These index sets cannot be destroyed until after completion of the
817:     linear solves for which the ASM preconditioner is being used.

819:     Use PCASMSetLocalSubdomains() to set local subdomains.

821:     The IS numbering is in the parallel, global numbering of the vector for both is and is_local

823:     Level: advanced

825: .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz

827: .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
828:           PCASMCreateSubdomains2D()
829: @*/
830: PetscErrorCode  PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[])
831: {

836:   PetscTryMethod(pc,"PCASMSetTotalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,N,is,is_local));
837:   return(0);
838: }

842: /*@
843:     PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
844:     additive Schwarz preconditioner.  Either all or no processors in the
845:     PC communicator must call this routine.

847:     Logically Collective on PC

849:     Input Parameters:
850: +   pc  - the preconditioner context
851: -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)

853:     Options Database Key:
854: .   -pc_asm_overlap <ovl> - Sets overlap

856:     Notes:
857:     By default the ASM preconditioner uses 1 block per processor.  To use
858:     multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
859:     PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).

861:     The overlap defaults to 1, so if one desires that no additional
862:     overlap be computed beyond what may have been set with a call to
863:     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
864:     must be set to be 0.  In particular, if one does not explicitly set
865:     the subdomains an application code, then all overlap would be computed
866:     internally by PETSc, and using an overlap of 0 would result in an ASM
867:     variant that is equivalent to the block Jacobi preconditioner.

869:     Note that one can define initial index sets with any overlap via
870:     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
871:     PCASMSetOverlap() merely allows PETSc to extend that overlap further
872:     if desired.

874:     Level: intermediate

876: .keywords: PC, ASM, set, overlap

878: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
879:           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
880: @*/
881: PetscErrorCode  PCASMSetOverlap(PC pc,PetscInt ovl)
882: {

888:   PetscTryMethod(pc,"PCASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
889:   return(0);
890: }

894: /*@
895:     PCASMSetType - Sets the type of restriction and interpolation used
896:     for local problems in the additive Schwarz method.

898:     Logically Collective on PC

900:     Input Parameters:
901: +   pc  - the preconditioner context
902: -   type - variant of ASM, one of
903: .vb
904:       PC_ASM_BASIC       - full interpolation and restriction
905:       PC_ASM_RESTRICT    - full restriction, local processor interpolation
906:       PC_ASM_INTERPOLATE - full interpolation, local processor restriction
907:       PC_ASM_NONE        - local processor restriction and interpolation
908: .ve

910:     Options Database Key:
911: .   -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type

913:     Level: intermediate

915: .keywords: PC, ASM, set, type

917: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
918:           PCASMCreateSubdomains2D()
919: @*/
920: PetscErrorCode  PCASMSetType(PC pc,PCASMType type)
921: {

927:   PetscTryMethod(pc,"PCASMSetType_C",(PC,PCASMType),(pc,type));
928:   return(0);
929: }

933: /*@
934:     PCASMSetSortIndices - Determines whether subdomain indices are sorted.

936:     Logically Collective on PC

938:     Input Parameters:
939: +   pc  - the preconditioner context
940: -   doSort - sort the subdomain indices

942:     Level: intermediate

944: .keywords: PC, ASM, set, type

946: .seealso: PCASMSetLocalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
947:           PCASMCreateSubdomains2D()
948: @*/
949: PetscErrorCode  PCASMSetSortIndices(PC pc,PetscBool doSort)
950: {

956:   PetscTryMethod(pc,"PCASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
957:   return(0);
958: }

962: /*@C
963:    PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
964:    this processor.

966:    Collective on PC iff first_local is requested

968:    Input Parameter:
969: .  pc - the preconditioner context

971:    Output Parameters:
972: +  n_local - the number of blocks on this processor or NULL
973: .  first_local - the global number of the first block on this processor or NULL,
974:                  all processors must request or all must pass NULL
975: -  ksp - the array of KSP contexts

977:    Note:
978:    After PCASMGetSubKSP() the array of KSPes is not to be freed.

980:    Currently for some matrix implementations only 1 block per processor
981:    is supported.

983:    You must call KSPSetUp() before calling PCASMGetSubKSP().

985:    Fortran note:
986:    The output argument 'ksp' must be an array of sufficient length or NULL_OBJECT. The latter can be used to learn the necessary length.

988:    Level: advanced

990: .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context

992: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
993:           PCASMCreateSubdomains2D(),
994: @*/
995: PetscErrorCode  PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
996: {

1001:   PetscUseMethod(pc,"PCASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1002:   return(0);
1003: }

1005: /* -------------------------------------------------------------------------------------*/
1006: /*MC
1007:    PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1008:            its own KSP object.

1010:    Options Database Keys:
1011: +  -pc_asm_blocks <blks> - Sets total blocks
1012: .  -pc_asm_overlap <ovl> - Sets overlap
1013: -  -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type

1015:      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1016:       will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
1017:       -pc_asm_type basic to use the standard ASM.

1019:    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1020:      than one processor. Defaults to one block per processor.

1022:      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1023:         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly

1025:      To set the options on the solvers separate for each block call PCASMGetSubKSP()
1026:          and set the options directly on the resulting KSP object (you can access its PC
1027:          with KSPGetPC())


1030:    Level: beginner

1032:    Concepts: additive Schwarz method

1034:     References:
1035:     An additive variant of the Schwarz alternating method for the case of many subregions
1036:     M Dryja, OB Widlund - Courant Institute, New York University Technical report

1038:     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1039:     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.

1041: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1042:            PCBJACOBI, PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
1043:            PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType()

1045: M*/

1049: PETSC_EXTERN PetscErrorCode PCCreate_ASM(PC pc)
1050: {
1052:   PC_ASM         *osm;

1055:   PetscNewLog(pc,PC_ASM,&osm);

1057:   osm->n                 = PETSC_DECIDE;
1058:   osm->n_local           = 0;
1059:   osm->n_local_true      = PETSC_DECIDE;
1060:   osm->overlap           = 1;
1061:   osm->ksp               = 0;
1062:   osm->restriction       = 0;
1063:   osm->localization      = 0;
1064:   osm->prolongation      = 0;
1065:   osm->x                 = 0;
1066:   osm->y                 = 0;
1067:   osm->y_local           = 0;
1068:   osm->is                = 0;
1069:   osm->is_local          = 0;
1070:   osm->mat               = 0;
1071:   osm->pmat              = 0;
1072:   osm->type              = PC_ASM_RESTRICT;
1073:   osm->same_local_solves = PETSC_TRUE;
1074:   osm->sort_indices      = PETSC_TRUE;
1075:   osm->dm_subdomains     = PETSC_FALSE;

1077:   pc->data                 = (void*)osm;
1078:   pc->ops->apply           = PCApply_ASM;
1079:   pc->ops->applytranspose  = PCApplyTranspose_ASM;
1080:   pc->ops->setup           = PCSetUp_ASM;
1081:   pc->ops->reset           = PCReset_ASM;
1082:   pc->ops->destroy         = PCDestroy_ASM;
1083:   pc->ops->setfromoptions  = PCSetFromOptions_ASM;
1084:   pc->ops->setuponblocks   = PCSetUpOnBlocks_ASM;
1085:   pc->ops->view            = PCView_ASM;
1086:   pc->ops->applyrichardson = 0;

1088:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",PCASMSetLocalSubdomains_ASM);
1089:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",PCASMSetTotalSubdomains_ASM);
1090:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetOverlap_C",PCASMSetOverlap_ASM);
1091:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetType_C",PCASMSetType_ASM);
1092:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetSortIndices_C",PCASMSetSortIndices_ASM);
1093:   PetscObjectComposeFunction((PetscObject)pc,"PCASMGetSubKSP_C",PCASMGetSubKSP_ASM);
1094:   return(0);
1095: }

1099: /*@C
1100:    PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1101:    preconditioner for a any problem on a general grid.

1103:    Collective

1105:    Input Parameters:
1106: +  A - The global matrix operator
1107: -  n - the number of local blocks

1109:    Output Parameters:
1110: .  outis - the array of index sets defining the subdomains

1112:    Level: advanced

1114:    Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
1115:     from these if you use PCASMSetLocalSubdomains()

1117:     In the Fortran version you must provide the array outis[] already allocated of length n.

1119: .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid

1121: .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
1122: @*/
1123: PetscErrorCode  PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1124: {
1125:   MatPartitioning mpart;
1126:   const char      *prefix;
1127:   PetscErrorCode  (*f)(Mat,Mat*);
1128:   PetscMPIInt     size;
1129:   PetscInt        i,j,rstart,rend,bs;
1130:   PetscBool       isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1131:   Mat             Ad     = NULL, adj;
1132:   IS              ispart,isnumb,*is;
1133:   PetscErrorCode  ierr;

1138:   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);

1140:   /* Get prefix, row distribution, and block size */
1141:   MatGetOptionsPrefix(A,&prefix);
1142:   MatGetOwnershipRange(A,&rstart,&rend);
1143:   MatGetBlockSize(A,&bs);
1144:   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);

1146:   /* Get diagonal block from matrix if possible */
1147:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1148:   PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",&f);
1149:   if (f) {
1150:     MatGetDiagonalBlock(A,&Ad);
1151:   } else if (size == 1) {
1152:     Ad = A;
1153:   }
1154:   if (Ad) {
1155:     PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1156:     if (!isbaij) {PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1157:   }
1158:   if (Ad && n > 1) {
1159:     PetscBool match,done;
1160:     /* Try to setup a good matrix partitioning if available */
1161:     MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1162:     PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1163:     MatPartitioningSetFromOptions(mpart);
1164:     PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1165:     if (!match) {
1166:       PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1167:     }
1168:     if (!match) { /* assume a "good" partitioner is available */
1169:       PetscInt       na;
1170:       const PetscInt *ia,*ja;
1171:       MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1172:       if (done) {
1173:         /* Build adjacency matrix by hand. Unfortunately a call to
1174:            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1175:            remove the block-aij structure and we cannot expect
1176:            MatPartitioning to split vertices as we need */
1177:         PetscInt       i,j,len,nnz,cnt,*iia=0,*jja=0;
1178:         const PetscInt *row;
1179:         nnz = 0;
1180:         for (i=0; i<na; i++) { /* count number of nonzeros */
1181:           len = ia[i+1] - ia[i];
1182:           row = ja + ia[i];
1183:           for (j=0; j<len; j++) {
1184:             if (row[j] == i) { /* don't count diagonal */
1185:               len--; break;
1186:             }
1187:           }
1188:           nnz += len;
1189:         }
1190:         PetscMalloc((na+1)*sizeof(PetscInt),&iia);
1191:         PetscMalloc((nnz)*sizeof(PetscInt),&jja);
1192:         nnz    = 0;
1193:         iia[0] = 0;
1194:         for (i=0; i<na; i++) { /* fill adjacency */
1195:           cnt = 0;
1196:           len = ia[i+1] - ia[i];
1197:           row = ja + ia[i];
1198:           for (j=0; j<len; j++) {
1199:             if (row[j] != i) { /* if not diagonal */
1200:               jja[nnz+cnt++] = row[j];
1201:             }
1202:           }
1203:           nnz     += cnt;
1204:           iia[i+1] = nnz;
1205:         }
1206:         /* Partitioning of the adjacency matrix */
1207:         MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1208:         MatPartitioningSetAdjacency(mpart,adj);
1209:         MatPartitioningSetNParts(mpart,n);
1210:         MatPartitioningApply(mpart,&ispart);
1211:         ISPartitioningToNumbering(ispart,&isnumb);
1212:         MatDestroy(&adj);
1213:         foundpart = PETSC_TRUE;
1214:       }
1215:       MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1216:     }
1217:     MatPartitioningDestroy(&mpart);
1218:   }

1220:   PetscMalloc(n*sizeof(IS),&is);
1221:   *outis = is;

1223:   if (!foundpart) {

1225:     /* Partitioning by contiguous chunks of rows */

1227:     PetscInt mbs   = (rend-rstart)/bs;
1228:     PetscInt start = rstart;
1229:     for (i=0; i<n; i++) {
1230:       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1231:       ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1232:       start += count;
1233:     }

1235:   } else {

1237:     /* Partitioning by adjacency of diagonal block  */

1239:     const PetscInt *numbering;
1240:     PetscInt       *count,nidx,*indices,*newidx,start=0;
1241:     /* Get node count in each partition */
1242:     PetscMalloc(n*sizeof(PetscInt),&count);
1243:     ISPartitioningCount(ispart,n,count);
1244:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1245:       for (i=0; i<n; i++) count[i] *= bs;
1246:     }
1247:     /* Build indices from node numbering */
1248:     ISGetLocalSize(isnumb,&nidx);
1249:     PetscMalloc(nidx*sizeof(PetscInt),&indices);
1250:     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1251:     ISGetIndices(isnumb,&numbering);
1252:     PetscSortIntWithPermutation(nidx,numbering,indices);
1253:     ISRestoreIndices(isnumb,&numbering);
1254:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1255:       PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);
1256:       for (i=0; i<nidx; i++) {
1257:         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1258:       }
1259:       PetscFree(indices);
1260:       nidx   *= bs;
1261:       indices = newidx;
1262:     }
1263:     /* Shift to get global indices */
1264:     for (i=0; i<nidx; i++) indices[i] += rstart;

1266:     /* Build the index sets for each block */
1267:     for (i=0; i<n; i++) {
1268:       ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1269:       ISSort(is[i]);
1270:       start += count[i];
1271:     }

1273:     PetscFree(count);
1274:     PetscFree(indices);
1275:     ISDestroy(&isnumb);
1276:     ISDestroy(&ispart);

1278:   }
1279:   return(0);
1280: }

1284: /*@C
1285:    PCASMDestroySubdomains - Destroys the index sets created with
1286:    PCASMCreateSubdomains(). Should be called after setting subdomains
1287:    with PCASMSetLocalSubdomains().

1289:    Collective

1291:    Input Parameters:
1292: +  n - the number of index sets
1293: .  is - the array of index sets
1294: -  is_local - the array of local index sets, can be NULL

1296:    Level: advanced

1298: .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid

1300: .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1301: @*/
1302: PetscErrorCode  PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1303: {
1304:   PetscInt       i;

1308:   if (n <= 0) return(0);
1309:   if (is) {
1311:     for (i=0; i<n; i++) { ISDestroy(&is[i]); }
1312:     PetscFree(is);
1313:   }
1314:   if (is_local) {
1316:     for (i=0; i<n; i++) { ISDestroy(&is_local[i]); }
1317:     PetscFree(is_local);
1318:   }
1319:   return(0);
1320: }

1324: /*@
1325:    PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1326:    preconditioner for a two-dimensional problem on a regular grid.

1328:    Not Collective

1330:    Input Parameters:
1331: +  m, n - the number of mesh points in the x and y directions
1332: .  M, N - the number of subdomains in the x and y directions
1333: .  dof - degrees of freedom per node
1334: -  overlap - overlap in mesh lines

1336:    Output Parameters:
1337: +  Nsub - the number of subdomains created
1338: .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1339: -  is_local - array of index sets defining non-overlapping subdomains

1341:    Note:
1342:    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1343:    preconditioners.  More general related routines are
1344:    PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().

1346:    Level: advanced

1348: .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid

1350: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1351:           PCASMSetOverlap()
1352: @*/
1353: PetscErrorCode  PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is,IS **is_local)
1354: {
1355:   PetscInt       i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outer;
1357:   PetscInt       nidx,*idx,loc,ii,jj,count;

1360:   if (dof != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," ");

1362:   *Nsub     = N*M;
1363:   PetscMalloc((*Nsub)*sizeof(IS*),is);
1364:   PetscMalloc((*Nsub)*sizeof(IS*),is_local);
1365:   ystart    = 0;
1366:   loc_outer = 0;
1367:   for (i=0; i<N; i++) {
1368:     height = n/N + ((n % N) > i); /* height of subdomain */
1369:     if (height < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1370:     yleft  = ystart - overlap; if (yleft < 0) yleft = 0;
1371:     yright = ystart + height + overlap; if (yright > n) yright = n;
1372:     xstart = 0;
1373:     for (j=0; j<M; j++) {
1374:       width = m/M + ((m % M) > j); /* width of subdomain */
1375:       if (width < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1376:       xleft  = xstart - overlap; if (xleft < 0) xleft = 0;
1377:       xright = xstart + width + overlap; if (xright > m) xright = m;
1378:       nidx   = (xright - xleft)*(yright - yleft);
1379:       PetscMalloc(nidx*sizeof(PetscInt),&idx);
1380:       loc    = 0;
1381:       for (ii=yleft; ii<yright; ii++) {
1382:         count = m*ii + xleft;
1383:         for (jj=xleft; jj<xright; jj++) idx[loc++] = count++;
1384:       }
1385:       ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,PETSC_COPY_VALUES,(*is)+loc_outer);
1386:       if (overlap == 0) {
1387:         PetscObjectReference((PetscObject)(*is)[loc_outer]);

1389:         (*is_local)[loc_outer] = (*is)[loc_outer];
1390:       } else {
1391:         for (loc=0,ii=ystart; ii<ystart+height; ii++) {
1392:           for (jj=xstart; jj<xstart+width; jj++) {
1393:             idx[loc++] = m*ii + jj;
1394:           }
1395:         }
1396:         ISCreateGeneral(PETSC_COMM_SELF,loc,idx,PETSC_COPY_VALUES,*is_local+loc_outer);
1397:       }
1398:       PetscFree(idx);
1399:       xstart += width;
1400:       loc_outer++;
1401:     }
1402:     ystart += height;
1403:   }
1404:   for (i=0; i<*Nsub; i++) { ISSort((*is)[i]); }
1405:   return(0);
1406: }

1410: /*@C
1411:     PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1412:     only) for the additive Schwarz preconditioner.

1414:     Not Collective

1416:     Input Parameter:
1417: .   pc - the preconditioner context

1419:     Output Parameters:
1420: +   n - the number of subdomains for this processor (default value = 1)
1421: .   is - the index sets that define the subdomains for this processor
1422: -   is_local - the index sets that define the local part of the subdomains for this processor (can be NULL)


1425:     Notes:
1426:     The IS numbering is in the parallel, global numbering of the vector.

1428:     Level: advanced

1430: .keywords: PC, ASM, set, local, subdomains, additive Schwarz

1432: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1433:           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1434: @*/
1435: PetscErrorCode  PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1436: {
1437:   PC_ASM         *osm;
1439:   PetscBool      match;

1445:   PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1446:   if (!match) {
1447:     if (n) *n = 0;
1448:     if (is) *is = NULL;
1449:   } else {
1450:     osm = (PC_ASM*)pc->data;
1451:     if (n) *n = osm->n_local_true;
1452:     if (is) *is = osm->is;
1453:     if (is_local) *is_local = osm->is_local;
1454:   }
1455:   return(0);
1456: }

1460: /*@C
1461:     PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1462:     only) for the additive Schwarz preconditioner.

1464:     Not Collective

1466:     Input Parameter:
1467: .   pc - the preconditioner context

1469:     Output Parameters:
1470: +   n - the number of matrices for this processor (default value = 1)
1471: -   mat - the matrices


1474:     Level: advanced

1476:     Notes: Call after PCSetUp() (or KSPSetUp()) but before PCApply() (or KSPApply()) and before PCSetUpOnBlocks())

1478:            Usually one would use PCSetModifySubmatrices() to change the submatrices in building the preconditioner.

1480: .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi

1482: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1483:           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains(), PCSetModifySubmatrices()
1484: @*/
1485: PetscErrorCode  PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1486: {
1487:   PC_ASM         *osm;
1489:   PetscBool      match;

1495:   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1496:   PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1497:   if (!match) {
1498:     if (n) *n = 0;
1499:     if (mat) *mat = NULL;
1500:   } else {
1501:     osm = (PC_ASM*)pc->data;
1502:     if (n) *n = osm->n_local_true;
1503:     if (mat) *mat = osm->pmat;
1504:   }
1505:   return(0);
1506: }

1510: /*@
1511:     PCASMSetDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1512:     Logically Collective

1514:     Input Parameter:
1515: +   pc  - the preconditioner
1516: -   flg - boolean indicating whether to use subdomains defined by the DM

1518:     Options Database Key:
1519: .   -pc_asm_dm_subdomains

1521:     Level: intermediate

1523:     Notes:
1524:     PCASMSetTotalSubdomains() and PCASMSetOverlap() take precedence over PCASMSetDMSubdomains(),
1525:     so setting either of the first two effectively turns the latter off.

1527: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz

1529: .seealso: PCASMGetDMSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap()
1530:           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1531: @*/
1532: PetscErrorCode  PCASMSetDMSubdomains(PC pc,PetscBool flg)
1533: {
1534:   PC_ASM         *osm = (PC_ASM*)pc->data;
1536:   PetscBool      match;

1541:   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1542:   PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1543:   if (match) {
1544:     osm->dm_subdomains = flg;
1545:   }
1546:   return(0);
1547: }

1551: /*@
1552:     PCASMGetDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1553:     Not Collective

1555:     Input Parameter:
1556: .   pc  - the preconditioner

1558:     Output Parameter:
1559: .   flg - boolean indicating whether to use subdomains defined by the DM

1561:     Level: intermediate

1563: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz

1565: .seealso: PCASMSetDMSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap()
1566:           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1567: @*/
1568: PetscErrorCode  PCASMGetDMSubdomains(PC pc,PetscBool* flg)
1569: {
1570:   PC_ASM         *osm = (PC_ASM*)pc->data;
1572:   PetscBool      match;

1577:   PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1578:   if (match) {
1579:     if (flg) *flg = osm->dm_subdomains;
1580:   }
1581:   return(0);
1582: }