Actual source code: asm.c

petsc-main 2021-04-20
Report Typos and Errors
  1: /*
  2:   This file defines an additive Schwarz preconditioner for any Mat implementation.

  4:   Note that each processor may have any number of subdomains. But in order to
  5:   deal easily with the VecScatter(), we treat each processor as if it has the
  6:   same number of subdomains.

  8:        n - total number of true subdomains on all processors
  9:        n_local_true - actual number of subdomains on this processor
 10:        n_local = maximum over all processors of n_local_true
 11: */

 13: #include <petsc/private/pcasmimpl.h>

 15: static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
 16: {
 17:   PC_ASM            *osm = (PC_ASM*)pc->data;
 18:   PetscErrorCode    ierr;
 19:   PetscMPIInt       rank;
 20:   PetscInt          i,bsz;
 21:   PetscBool         iascii,isstring;
 22:   PetscViewer       sviewer;
 23:   PetscViewerFormat format;
 24:   const char        *prefix;

 27:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 28:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
 29:   if (iascii) {
 30:     char overlaps[256] = "user-defined overlap",blocks[256] = "total subdomain blocks not yet set";
 31:     if (osm->overlap >= 0) {PetscSNPrintf(overlaps,sizeof(overlaps),"amount of overlap = %D",osm->overlap);}
 32:     if (osm->n > 0) {PetscSNPrintf(blocks,sizeof(blocks),"total subdomain blocks = %D",osm->n);}
 33:     PetscViewerASCIIPrintf(viewer,"  %s, %s\n",blocks,overlaps);
 34:     PetscViewerASCIIPrintf(viewer,"  restriction/interpolation type - %s\n",PCASMTypes[osm->type]);
 35:     if (osm->dm_subdomains) {PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: using DM to define subdomains\n");}
 36:     if (osm->loctype != PC_COMPOSITE_ADDITIVE) {PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: local solve composition type - %s\n",PCCompositeTypes[osm->loctype]);}
 37:     MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
 38:     PetscViewerGetFormat(viewer,&format);
 39:     if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) {
 40:       if (osm->ksp) {
 41:         PetscViewerASCIIPrintf(viewer,"  Local solver information for first block is in the following KSP and PC objects on rank 0:\n");
 42:         PCGetOptionsPrefix(pc,&prefix);
 43:         PetscViewerASCIIPrintf(viewer,"  Use -%sksp_view ::ascii_info_detail to display information for all blocks\n",prefix?prefix:"");
 44:         PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
 45:         if (!rank) {
 46:           PetscViewerASCIIPushTab(viewer);
 47:           KSPView(osm->ksp[0],sviewer);
 48:           PetscViewerASCIIPopTab(viewer);
 49:         }
 50:         PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
 51:       }
 52:     } else {
 53:       PetscViewerASCIIPushSynchronized(viewer);
 54:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);
 55:       PetscViewerFlush(viewer);
 56:       PetscViewerASCIIPrintf(viewer,"  Local solver information for each block is in the following KSP and PC objects:\n");
 57:       PetscViewerASCIIPushTab(viewer);
 58:       PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
 59:       PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
 60:       for (i=0; i<osm->n_local_true; i++) {
 61:         ISGetLocalSize(osm->is[i],&bsz);
 62:         PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);
 63:         KSPView(osm->ksp[i],sviewer);
 64:         PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");
 65:       }
 66:       PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
 67:       PetscViewerASCIIPopTab(viewer);
 68:       PetscViewerFlush(viewer);
 69:       PetscViewerASCIIPopSynchronized(viewer);
 70:     }
 71:   } else if (isstring) {
 72:     PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);
 73:     PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
 74:     if (osm->ksp) {KSPView(osm->ksp[0],sviewer);}
 75:     PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
 76:   }
 77:   return(0);
 78: }

 80: static PetscErrorCode PCASMPrintSubdomains(PC pc)
 81: {
 82:   PC_ASM         *osm = (PC_ASM*)pc->data;
 83:   const char     *prefix;
 84:   char           fname[PETSC_MAX_PATH_LEN+1];
 85:   PetscViewer    viewer, sviewer;
 86:   char           *s;
 87:   PetscInt       i,j,nidx;
 88:   const PetscInt *idx;
 89:   PetscMPIInt    rank, size;

 93:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
 94:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
 95:   PCGetOptionsPrefix(pc,&prefix);
 96:   PetscOptionsGetString(NULL,prefix,"-pc_asm_print_subdomains",fname,sizeof(fname),NULL);
 97:   if (fname[0] == 0) { PetscStrcpy(fname,"stdout"); };
 98:   PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
 99:   for (i=0; i<osm->n_local; i++) {
100:     if (i < osm->n_local_true) {
101:       ISGetLocalSize(osm->is[i],&nidx);
102:       ISGetIndices(osm->is[i],&idx);
103:       /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
104: #define len  16*(nidx+1)+512
105:       PetscMalloc1(len,&s);
106:       PetscViewerStringOpen(PETSC_COMM_SELF, s, len, &sviewer);
107: #undef len
108:       PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D with overlap:\n", rank, size, i);
109:       for (j=0; j<nidx; j++) {
110:         PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
111:       }
112:       ISRestoreIndices(osm->is[i],&idx);
113:       PetscViewerStringSPrintf(sviewer,"\n");
114:       PetscViewerDestroy(&sviewer);
115:       PetscViewerASCIIPushSynchronized(viewer);
116:       PetscViewerASCIISynchronizedPrintf(viewer, s);
117:       PetscViewerFlush(viewer);
118:       PetscViewerASCIIPopSynchronized(viewer);
119:       PetscFree(s);
120:       if (osm->is_local) {
121:         /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
122: #define len  16*(nidx+1)+512
123:         PetscMalloc1(len, &s);
124:         PetscViewerStringOpen(PETSC_COMM_SELF, s, len, &sviewer);
125: #undef len
126:         PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D without overlap:\n", rank, size, i);
127:         ISGetLocalSize(osm->is_local[i],&nidx);
128:         ISGetIndices(osm->is_local[i],&idx);
129:         for (j=0; j<nidx; j++) {
130:           PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
131:         }
132:         ISRestoreIndices(osm->is_local[i],&idx);
133:         PetscViewerStringSPrintf(sviewer,"\n");
134:         PetscViewerDestroy(&sviewer);
135:         PetscViewerASCIIPushSynchronized(viewer);
136:         PetscViewerASCIISynchronizedPrintf(viewer, s);
137:         PetscViewerFlush(viewer);
138:         PetscViewerASCIIPopSynchronized(viewer);
139:         PetscFree(s);
140:       }
141:     } else {
142:       /* Participate in collective viewer calls. */
143:       PetscViewerASCIIPushSynchronized(viewer);
144:       PetscViewerFlush(viewer);
145:       PetscViewerASCIIPopSynchronized(viewer);
146:       /* Assume either all ranks have is_local or none do. */
147:       if (osm->is_local) {
148:         PetscViewerASCIIPushSynchronized(viewer);
149:         PetscViewerFlush(viewer);
150:         PetscViewerASCIIPopSynchronized(viewer);
151:       }
152:     }
153:   }
154:   PetscViewerFlush(viewer);
155:   PetscViewerDestroy(&viewer);
156:   return(0);
157: }

159: static PetscErrorCode PCSetUp_ASM(PC pc)
160: {
161:   PC_ASM         *osm = (PC_ASM*)pc->data;
163:   PetscBool      flg;
164:   PetscInt       i,m,m_local;
165:   MatReuse       scall = MAT_REUSE_MATRIX;
166:   IS             isl;
167:   KSP            ksp;
168:   PC             subpc;
169:   const char     *prefix,*pprefix;
170:   Vec            vec;
171:   DM             *domain_dm = NULL;

174:   if (!pc->setupcalled) {
175:     PetscInt m;

177:     /* Note: if subdomains have been set either via PCASMSetTotalSubdomains() or via PCASMSetLocalSubdomains(), osm->n_local_true will not be PETSC_DECIDE */
178:     if (osm->n_local_true == PETSC_DECIDE) {
179:       /* no subdomains given */
180:       /* try pc->dm first, if allowed */
181:       if (osm->dm_subdomains && pc->dm) {
182:         PetscInt  num_domains, d;
183:         char      **domain_names;
184:         IS        *inner_domain_is, *outer_domain_is;
185:         DMCreateDomainDecomposition(pc->dm, &num_domains, &domain_names, &inner_domain_is, &outer_domain_is, &domain_dm);
186:         osm->overlap = -1; /* We do not want to increase the overlap of the IS.
187:                               A future improvement of this code might allow one to use
188:                               DM-defined subdomains and also increase the overlap,
189:                               but that is not currently supported */
190:         if (num_domains) {
191:           PCASMSetLocalSubdomains(pc, num_domains, outer_domain_is, inner_domain_is);
192:         }
193:         for (d = 0; d < num_domains; ++d) {
194:           if (domain_names)    {PetscFree(domain_names[d]);}
195:           if (inner_domain_is) {ISDestroy(&inner_domain_is[d]);}
196:           if (outer_domain_is) {ISDestroy(&outer_domain_is[d]);}
197:         }
198:         PetscFree(domain_names);
199:         PetscFree(inner_domain_is);
200:         PetscFree(outer_domain_is);
201:       }
202:       if (osm->n_local_true == PETSC_DECIDE) {
203:         /* still no subdomains; use one subdomain per processor */
204:         osm->n_local_true = 1;
205:       }
206:     }
207:     { /* determine the global and max number of subdomains */
208:       struct {PetscInt max,sum;} inwork,outwork;
209:       PetscMPIInt size;

211:       inwork.max   = osm->n_local_true;
212:       inwork.sum   = osm->n_local_true;
213:       MPIU_Allreduce(&inwork,&outwork,1,MPIU_2INT,MPIU_MAXSUM_OP,PetscObjectComm((PetscObject)pc));
214:       osm->n_local = outwork.max;
215:       osm->n       = outwork.sum;

217:       MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
218:       if (outwork.max == 1 && outwork.sum == size) {
219:         /* osm->n_local_true = 1 on all processes, set this option may enable use of optimized MatCreateSubMatrices() implementation */
220:         MatSetOption(pc->pmat,MAT_SUBMAT_SINGLEIS,PETSC_TRUE);
221:       }
222:     }
223:     if (!osm->is) { /* create the index sets */
224:       PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);
225:     }
226:     if (osm->n_local_true > 1 && !osm->is_local) {
227:       PetscMalloc1(osm->n_local_true,&osm->is_local);
228:       for (i=0; i<osm->n_local_true; i++) {
229:         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
230:           ISDuplicate(osm->is[i],&osm->is_local[i]);
231:           ISCopy(osm->is[i],osm->is_local[i]);
232:         } else {
233:           PetscObjectReference((PetscObject)osm->is[i]);
234:           osm->is_local[i] = osm->is[i];
235:         }
236:       }
237:     }
238:     PCGetOptionsPrefix(pc,&prefix);
239:     flg  = PETSC_FALSE;
240:     PetscOptionsGetBool(NULL,prefix,"-pc_asm_print_subdomains",&flg,NULL);
241:     if (flg) { PCASMPrintSubdomains(pc); }

243:     if (osm->overlap > 0) {
244:       /* Extend the "overlapping" regions by a number of steps */
245:       MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);
246:     }
247:     if (osm->sort_indices) {
248:       for (i=0; i<osm->n_local_true; i++) {
249:         ISSort(osm->is[i]);
250:         if (osm->is_local) {
251:           ISSort(osm->is_local[i]);
252:         }
253:       }
254:     }

256:     if (!osm->ksp) {
257:       /* Create the local solvers */
258:       PetscMalloc1(osm->n_local_true,&osm->ksp);
259:       if (domain_dm) {
260:         PetscInfo(pc,"Setting up ASM subproblems using the embedded DM\n");
261:       }
262:       for (i=0; i<osm->n_local_true; i++) {
263:         KSPCreate(PETSC_COMM_SELF,&ksp);
264:         KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
265:         PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
266:         PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
267:         KSPSetType(ksp,KSPPREONLY);
268:         KSPGetPC(ksp,&subpc);
269:         PCGetOptionsPrefix(pc,&prefix);
270:         KSPSetOptionsPrefix(ksp,prefix);
271:         KSPAppendOptionsPrefix(ksp,"sub_");
272:         if (domain_dm) {
273:           KSPSetDM(ksp, domain_dm[i]);
274:           KSPSetDMActive(ksp, PETSC_FALSE);
275:           DMDestroy(&domain_dm[i]);
276:         }
277:         osm->ksp[i] = ksp;
278:       }
279:       if (domain_dm) {
280:         PetscFree(domain_dm);
281:       }
282:     }

284:     ISConcatenate(PETSC_COMM_SELF, osm->n_local_true, osm->is, &osm->lis);
285:     ISSortRemoveDups(osm->lis);
286:     ISGetLocalSize(osm->lis, &m);

288:     scall = MAT_INITIAL_MATRIX;
289:   } else {
290:     /*
291:        Destroy the blocks from the previous iteration
292:     */
293:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
294:       MatDestroyMatrices(osm->n_local_true,&osm->pmat);
295:       scall = MAT_INITIAL_MATRIX;
296:     }
297:   }

299:   /* Destroy previous submatrices of a different type than pc->pmat since MAT_REUSE_MATRIX won't work in that case */
300:   if ((scall == MAT_REUSE_MATRIX) && osm->sub_mat_type) {
301:     if (osm->n_local_true > 0) {
302:       MatDestroySubMatrices(osm->n_local_true,&osm->pmat);
303:     }
304:     scall = MAT_INITIAL_MATRIX;
305:   }

307:   /*
308:      Extract out the submatrices
309:   */
310:   MatCreateSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);
311:   if (scall == MAT_INITIAL_MATRIX) {
312:     PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
313:     for (i=0; i<osm->n_local_true; i++) {
314:       PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
315:       PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
316:     }
317:   }

319:   /* Convert the types of the submatrices (if needbe) */
320:   if (osm->sub_mat_type) {
321:     for (i=0; i<osm->n_local_true; i++) {
322:       MatConvert(osm->pmat[i],osm->sub_mat_type,MAT_INPLACE_MATRIX,&(osm->pmat[i]));
323:     }
324:   }

326:   if (!pc->setupcalled) {
327:     VecType vtype;

329:     /* Create the local work vectors (from the local matrices) and scatter contexts */
330:     MatCreateVecs(pc->pmat,&vec,NULL);

332:     if (osm->is_local && (osm->type == PC_ASM_INTERPOLATE || osm->type == PC_ASM_NONE)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot use interpolate or none PCASMType if is_local was provided to PCASMSetLocalSubdomains()");
333:     if (osm->is_local && osm->type == PC_ASM_RESTRICT && osm->loctype == PC_COMPOSITE_ADDITIVE) {
334:       PetscMalloc1(osm->n_local_true,&osm->lprolongation);
335:     }
336:     PetscMalloc1(osm->n_local_true,&osm->lrestriction);
337:     PetscMalloc1(osm->n_local_true,&osm->x);
338:     PetscMalloc1(osm->n_local_true,&osm->y);

340:     ISGetLocalSize(osm->lis,&m);
341:     ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);
342:     MatGetVecType(osm->pmat[0],&vtype);
343:     VecCreate(PETSC_COMM_SELF,&osm->lx);
344:     VecSetSizes(osm->lx,m,m);
345:     VecSetType(osm->lx,vtype);
346:     VecDuplicate(osm->lx, &osm->ly);
347:     VecScatterCreate(vec,osm->lis,osm->lx,isl,&osm->restriction);
348:     ISDestroy(&isl);

350:     for (i=0; i<osm->n_local_true; ++i) {
351:       ISLocalToGlobalMapping ltog;
352:       IS                     isll;
353:       const PetscInt         *idx_is;
354:       PetscInt               *idx_lis,nout;

356:       ISGetLocalSize(osm->is[i],&m);
357:       MatCreateVecs(osm->pmat[i],&osm->x[i],NULL);
358:       VecDuplicate(osm->x[i],&osm->y[i]);

360:       /* generate a scatter from ly to y[i] picking all the overlapping is[i] entries */
361:       ISLocalToGlobalMappingCreateIS(osm->lis,&ltog);
362:       ISGetLocalSize(osm->is[i],&m);
363:       ISGetIndices(osm->is[i], &idx_is);
364:       PetscMalloc1(m,&idx_lis);
365:       ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,m,idx_is,&nout,idx_lis);
366:       if (nout != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"is not a subset of lis");
367:       ISRestoreIndices(osm->is[i], &idx_is);
368:       ISCreateGeneral(PETSC_COMM_SELF,m,idx_lis,PETSC_OWN_POINTER,&isll);
369:       ISLocalToGlobalMappingDestroy(&ltog);
370:       ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);
371:       VecScatterCreate(osm->ly,isll,osm->y[i],isl,&osm->lrestriction[i]);
372:       ISDestroy(&isll);
373:       ISDestroy(&isl);
374:       if (osm->lprolongation) { /* generate a scatter from y[i] to ly picking only the the non-overlapping is_local[i] entries */
375:         ISLocalToGlobalMapping ltog;
376:         IS                     isll,isll_local;
377:         const PetscInt         *idx_local;
378:         PetscInt               *idx1, *idx2, nout;

380:         ISGetLocalSize(osm->is_local[i],&m_local);
381:         ISGetIndices(osm->is_local[i], &idx_local);

383:         ISLocalToGlobalMappingCreateIS(osm->is[i],&ltog);
384:         PetscMalloc1(m_local,&idx1);
385:         ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,m_local,idx_local,&nout,idx1);
386:         ISLocalToGlobalMappingDestroy(&ltog);
387:         if (nout != m_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"is_local not a subset of is");
388:         ISCreateGeneral(PETSC_COMM_SELF,m_local,idx1,PETSC_OWN_POINTER,&isll);

390:         ISLocalToGlobalMappingCreateIS(osm->lis,&ltog);
391:         PetscMalloc1(m_local,&idx2);
392:         ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,m_local,idx_local,&nout,idx2);
393:         ISLocalToGlobalMappingDestroy(&ltog);
394:         if (nout != m_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"is_local not a subset of lis");
395:         ISCreateGeneral(PETSC_COMM_SELF,m_local,idx2,PETSC_OWN_POINTER,&isll_local);

397:         ISRestoreIndices(osm->is_local[i], &idx_local);
398:         VecScatterCreate(osm->y[i],isll,osm->ly,isll_local,&osm->lprolongation[i]);

400:         ISDestroy(&isll);
401:         ISDestroy(&isll_local);
402:       }
403:     }
404:     VecDestroy(&vec);
405:   }

407:   if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE) {
408:     IS      *cis;
409:     PetscInt c;

411:     PetscMalloc1(osm->n_local_true, &cis);
412:     for (c = 0; c < osm->n_local_true; ++c) cis[c] = osm->lis;
413:     MatCreateSubMatrices(pc->pmat, osm->n_local_true, osm->is, cis, scall, &osm->lmats);
414:     PetscFree(cis);
415:   }

417:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
418:      different boundary conditions for the submatrices than for the global problem) */
419:   PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);

421:   /*
422:      Loop over subdomains putting them into local ksp
423:   */
424:   KSPGetOptionsPrefix(osm->ksp[0],&prefix);
425:   for (i=0; i<osm->n_local_true; i++) {
426:     KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
427:     MatSetOptionsPrefix(osm->pmat[i],prefix);
428:     if (!pc->setupcalled) {
429:       KSPSetFromOptions(osm->ksp[i]);
430:     }
431:   }
432:   return(0);
433: }

435: static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
436: {
437:   PC_ASM             *osm = (PC_ASM*)pc->data;
438:   PetscErrorCode     ierr;
439:   PetscInt           i;
440:   KSPConvergedReason reason;

443:   for (i=0; i<osm->n_local_true; i++) {
444:     KSPSetUp(osm->ksp[i]);
445:     KSPGetConvergedReason(osm->ksp[i],&reason);
446:     if (reason == KSP_DIVERGED_PC_FAILED) {
447:       pc->failedreason = PC_SUBPC_ERROR;
448:     }
449:   }
450:   return(0);
451: }

453: static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
454: {
455:   PC_ASM         *osm = (PC_ASM*)pc->data;
457:   PetscInt       i,n_local_true = osm->n_local_true;
458:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

461:   /*
462:      support for limiting the restriction or interpolation to only local
463:      subdomain values (leaving the other values 0).
464:   */
465:   if (!(osm->type & PC_ASM_RESTRICT)) {
466:     forward = SCATTER_FORWARD_LOCAL;
467:     /* have to zero the work RHS since scatter may leave some slots empty */
468:     VecSet(osm->lx, 0.0);
469:   }
470:   if (!(osm->type & PC_ASM_INTERPOLATE)) {
471:     reverse = SCATTER_REVERSE_LOCAL;
472:   }

474:   if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE || osm->loctype == PC_COMPOSITE_ADDITIVE) {
475:     /* zero the global and the local solutions */
476:     VecSet(y, 0.0);
477:     VecSet(osm->ly, 0.0);

479:     /* copy the global RHS to local RHS including the ghost nodes */
480:     VecScatterBegin(osm->restriction, x, osm->lx, INSERT_VALUES, forward);
481:     VecScatterEnd(osm->restriction, x, osm->lx, INSERT_VALUES, forward);

483:     /* restrict local RHS to the overlapping 0-block RHS */
484:     VecScatterBegin(osm->lrestriction[0], osm->lx, osm->x[0], INSERT_VALUES, forward);
485:     VecScatterEnd(osm->lrestriction[0], osm->lx, osm->x[0], INSERT_VALUES, forward);

487:     /* do the local solves */
488:     for (i = 0; i < n_local_true; ++i) {

490:       /* solve the overlapping i-block */
491:       PetscLogEventBegin(PC_ApplyOnBlocks, osm->ksp[i], osm->x[i], osm->y[i],0);
492:       KSPSolve(osm->ksp[i], osm->x[i], osm->y[i]);
493:       KSPCheckSolve(osm->ksp[i], pc, osm->y[i]);
494:       PetscLogEventEnd(PC_ApplyOnBlocks, osm->ksp[i], osm->x[i], osm->y[i], 0);

496:       if (osm->lprolongation) { /* interpolate the non-overlapping i-block solution to the local solution (only for restrictive additive) */
497:         VecScatterBegin(osm->lprolongation[i], osm->y[i], osm->ly, ADD_VALUES, forward);
498:         VecScatterEnd(osm->lprolongation[i], osm->y[i], osm->ly, ADD_VALUES, forward);
499:       } else { /* interpolate the overlapping i-block solution to the local solution */
500:         VecScatterBegin(osm->lrestriction[i], osm->y[i], osm->ly, ADD_VALUES, reverse);
501:         VecScatterEnd(osm->lrestriction[i], osm->y[i], osm->ly, ADD_VALUES, reverse);
502:       }

504:       if (i < n_local_true-1) {
505:         /* restrict local RHS to the overlapping (i+1)-block RHS */
506:         VecScatterBegin(osm->lrestriction[i+1], osm->lx, osm->x[i+1], INSERT_VALUES, forward);
507:         VecScatterEnd(osm->lrestriction[i+1], osm->lx, osm->x[i+1], INSERT_VALUES, forward);

509:         if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE) {
510:           /* update the overlapping (i+1)-block RHS using the current local solution */
511:           MatMult(osm->lmats[i+1], osm->ly, osm->y[i+1]);
512:           VecAXPBY(osm->x[i+1],-1.,1., osm->y[i+1]);
513:         }
514:       }
515:     }
516:     /* add the local solution to the global solution including the ghost nodes */
517:     VecScatterBegin(osm->restriction, osm->ly, y, ADD_VALUES, reverse);
518:     VecScatterEnd(osm->restriction, osm->ly, y, ADD_VALUES, reverse);
519:   } else SETERRQ1(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Invalid local composition type: %s", PCCompositeTypes[osm->loctype]);
520:   return(0);
521: }

523: static PetscErrorCode PCMatApply_ASM(PC pc,Mat X,Mat Y)
524: {
525:   PC_ASM         *osm = (PC_ASM*)pc->data;
526:   Mat            Z,W;
527:   Vec            x;
528:   PetscInt       i,m,N;
529:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

533:   if (osm->n_local_true > 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
534:   /*
535:      support for limiting the restriction or interpolation to only local
536:      subdomain values (leaving the other values 0).
537:   */
538:   if (!(osm->type & PC_ASM_RESTRICT)) {
539:     forward = SCATTER_FORWARD_LOCAL;
540:     /* have to zero the work RHS since scatter may leave some slots empty */
541:     VecSet(osm->lx, 0.0);
542:   }
543:   if (!(osm->type & PC_ASM_INTERPOLATE)) {
544:     reverse = SCATTER_REVERSE_LOCAL;
545:   }
546:   VecGetLocalSize(osm->x[0], &m);
547:   MatGetSize(X, NULL, &N);
548:   MatCreateSeqDense(PETSC_COMM_SELF, m, N, NULL, &Z);
549:   if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE || osm->loctype == PC_COMPOSITE_ADDITIVE) {
550:     /* zero the global and the local solutions */
551:     MatZeroEntries(Y);
552:     VecSet(osm->ly, 0.0);

554:     for (i = 0; i < N; ++i) {
555:       MatDenseGetColumnVecRead(X, i, &x);
556:       /* copy the global RHS to local RHS including the ghost nodes */
557:       VecScatterBegin(osm->restriction, x, osm->lx, INSERT_VALUES, forward);
558:       VecScatterEnd(osm->restriction, x, osm->lx, INSERT_VALUES, forward);
559:       MatDenseRestoreColumnVecRead(X, i, &x);

561:       MatDenseGetColumnVecWrite(Z, i, &x);
562:       /* restrict local RHS to the overlapping 0-block RHS */
563:       VecScatterBegin(osm->lrestriction[0], osm->lx, x, INSERT_VALUES, forward);
564:       VecScatterEnd(osm->lrestriction[0], osm->lx, x, INSERT_VALUES, forward);
565:       MatDenseRestoreColumnVecWrite(Z, i, &x);
566:     }
567:     MatCreateSeqDense(PETSC_COMM_SELF, m, N, NULL, &W);
568:     /* solve the overlapping 0-block */
569:     PetscLogEventBegin(PC_ApplyOnBlocks, osm->ksp[0], Z, W, 0);
570:     KSPMatSolve(osm->ksp[0], Z, W);
571:     KSPCheckSolve(osm->ksp[0], pc, NULL);
572:     PetscLogEventEnd(PC_ApplyOnBlocks, osm->ksp[0], Z, W,0);
573:     MatDestroy(&Z);

575:     for (i = 0; i < N; ++i) {
576:       VecSet(osm->ly, 0.0);
577:       MatDenseGetColumnVecRead(W, i, &x);
578:       if (osm->lprolongation) { /* interpolate the non-overlapping 0-block solution to the local solution (only for restrictive additive) */
579:         VecScatterBegin(osm->lprolongation[0], x, osm->ly, ADD_VALUES, forward);
580:         VecScatterEnd(osm->lprolongation[0], x, osm->ly, ADD_VALUES, forward);
581:       } else { /* interpolate the overlapping 0-block solution to the local solution */
582:         VecScatterBegin(osm->lrestriction[0], x, osm->ly, ADD_VALUES, reverse);
583:         VecScatterEnd(osm->lrestriction[0], x, osm->ly, ADD_VALUES, reverse);
584:       }
585:       MatDenseRestoreColumnVecRead(W, i, &x);

587:       MatDenseGetColumnVecWrite(Y, i, &x);
588:       /* add the local solution to the global solution including the ghost nodes */
589:       VecScatterBegin(osm->restriction, osm->ly, x, ADD_VALUES, reverse);
590:       VecScatterEnd(osm->restriction, osm->ly, x, ADD_VALUES, reverse);
591:       MatDenseRestoreColumnVecWrite(Y, i, &x);
592:     }
593:     MatDestroy(&W);
594:   } else SETERRQ1(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Invalid local composition type: %s", PCCompositeTypes[osm->loctype]);
595:   return(0);
596: }

598: static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
599: {
600:   PC_ASM         *osm = (PC_ASM*)pc->data;
602:   PetscInt       i,n_local_true = osm->n_local_true;
603:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

606:   /*
607:      Support for limiting the restriction or interpolation to only local
608:      subdomain values (leaving the other values 0).

610:      Note: these are reversed from the PCApply_ASM() because we are applying the
611:      transpose of the three terms
612:   */

614:   if (!(osm->type & PC_ASM_INTERPOLATE)) {
615:     forward = SCATTER_FORWARD_LOCAL;
616:     /* have to zero the work RHS since scatter may leave some slots empty */
617:     VecSet(osm->lx, 0.0);
618:   }
619:   if (!(osm->type & PC_ASM_RESTRICT)) reverse = SCATTER_REVERSE_LOCAL;

621:   /* zero the global and the local solutions */
622:   VecSet(y, 0.0);
623:   VecSet(osm->ly, 0.0);

625:   /* Copy the global RHS to local RHS including the ghost nodes */
626:   VecScatterBegin(osm->restriction, x, osm->lx, INSERT_VALUES, forward);
627:   VecScatterEnd(osm->restriction, x, osm->lx, INSERT_VALUES, forward);

629:   /* Restrict local RHS to the overlapping 0-block RHS */
630:   VecScatterBegin(osm->lrestriction[0], osm->lx, osm->x[0], INSERT_VALUES, forward);
631:   VecScatterEnd(osm->lrestriction[0], osm->lx, osm->x[0], INSERT_VALUES, forward);

633:   /* do the local solves */
634:   for (i = 0; i < n_local_true; ++i) {

636:     /* solve the overlapping i-block */
637:     PetscLogEventBegin(PC_ApplyOnBlocks,osm->ksp[i],osm->x[i],osm->y[i],0);
638:     KSPSolveTranspose(osm->ksp[i], osm->x[i], osm->y[i]);
639:     KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
640:     PetscLogEventEnd(PC_ApplyOnBlocks,osm->ksp[i],osm->x[i],osm->y[i],0);

642:     if (osm->lprolongation) { /* interpolate the non-overlapping i-block solution to the local solution */
643:       VecScatterBegin(osm->lprolongation[i], osm->y[i], osm->ly, ADD_VALUES, forward);
644:       VecScatterEnd(osm->lprolongation[i], osm->y[i], osm->ly, ADD_VALUES, forward);
645:     } else { /* interpolate the overlapping i-block solution to the local solution */
646:       VecScatterBegin(osm->lrestriction[i], osm->y[i], osm->ly, ADD_VALUES, reverse);
647:       VecScatterEnd(osm->lrestriction[i], osm->y[i], osm->ly, ADD_VALUES, reverse);
648:     }

650:     if (i < n_local_true-1) {
651:       /* Restrict local RHS to the overlapping (i+1)-block RHS */
652:       VecScatterBegin(osm->lrestriction[i+1], osm->lx, osm->x[i+1], INSERT_VALUES, forward);
653:       VecScatterEnd(osm->lrestriction[i+1], osm->lx, osm->x[i+1], INSERT_VALUES, forward);
654:     }
655:   }
656:   /* Add the local solution to the global solution including the ghost nodes */
657:   VecScatterBegin(osm->restriction, osm->ly, y, ADD_VALUES, reverse);
658:   VecScatterEnd(osm->restriction, osm->ly, y, ADD_VALUES, reverse);

660:   return(0);

662: }

664: static PetscErrorCode PCReset_ASM(PC pc)
665: {
666:   PC_ASM         *osm = (PC_ASM*)pc->data;
668:   PetscInt       i;

671:   if (osm->ksp) {
672:     for (i=0; i<osm->n_local_true; i++) {
673:       KSPReset(osm->ksp[i]);
674:     }
675:   }
676:   if (osm->pmat) {
677:     if (osm->n_local_true > 0) {
678:       MatDestroySubMatrices(osm->n_local_true,&osm->pmat);
679:     }
680:   }
681:   if (osm->lrestriction) {
682:     VecScatterDestroy(&osm->restriction);
683:     for (i=0; i<osm->n_local_true; i++) {
684:       VecScatterDestroy(&osm->lrestriction[i]);
685:       if (osm->lprolongation) {VecScatterDestroy(&osm->lprolongation[i]);}
686:       VecDestroy(&osm->x[i]);
687:       VecDestroy(&osm->y[i]);
688:     }
689:     PetscFree(osm->lrestriction);
690:     if (osm->lprolongation) {PetscFree(osm->lprolongation);}
691:     PetscFree(osm->x);
692:     PetscFree(osm->y);

694:   }
695:   PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
696:   ISDestroy(&osm->lis);
697:   VecDestroy(&osm->lx);
698:   VecDestroy(&osm->ly);
699:   if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE) {
700:     MatDestroyMatrices(osm->n_local_true, &osm->lmats);
701:   }

703:   PetscFree(osm->sub_mat_type);

705:   osm->is       = NULL;
706:   osm->is_local = NULL;
707:   return(0);
708: }

710: static PetscErrorCode PCDestroy_ASM(PC pc)
711: {
712:   PC_ASM         *osm = (PC_ASM*)pc->data;
714:   PetscInt       i;

717:   PCReset_ASM(pc);
718:   if (osm->ksp) {
719:     for (i=0; i<osm->n_local_true; i++) {
720:       KSPDestroy(&osm->ksp[i]);
721:     }
722:     PetscFree(osm->ksp);
723:   }
724:   PetscFree(pc->data);

726:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",NULL);
727:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",NULL);
728:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetOverlap_C",NULL);
729:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetType_C",NULL);
730:   PetscObjectComposeFunction((PetscObject)pc,"PCASMGetType_C",NULL);
731:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetLocalType_C",NULL);
732:   PetscObjectComposeFunction((PetscObject)pc,"PCASMGetLocalType_C",NULL);
733:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetSortIndices_C",NULL);
734:   PetscObjectComposeFunction((PetscObject)pc,"PCASMGetSubKSP_C",NULL);
735:   PetscObjectComposeFunction((PetscObject)pc,"PCASMGetSubMatType_C",NULL);
736:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetSubMatType_C",NULL);
737:   return(0);
738: }

740: static PetscErrorCode PCSetFromOptions_ASM(PetscOptionItems *PetscOptionsObject,PC pc)
741: {
742:   PC_ASM         *osm = (PC_ASM*)pc->data;
744:   PetscInt       blocks,ovl;
745:   PetscBool      flg;
746:   PCASMType      asmtype;
747:   PCCompositeType loctype;
748:   char           sub_mat_type[256];

751:   PetscOptionsHead(PetscOptionsObject,"Additive Schwarz options");
752:   PetscOptionsBool("-pc_asm_dm_subdomains","Use DMCreateDomainDecomposition() to define subdomains","PCASMSetDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
753:   PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);
754:   if (flg) {
755:     PCASMSetTotalSubdomains(pc,blocks,NULL,NULL);
756:     osm->dm_subdomains = PETSC_FALSE;
757:   }
758:   PetscOptionsInt("-pc_asm_local_blocks","Number of local subdomains","PCASMSetLocalSubdomains",osm->n_local_true,&blocks,&flg);
759:   if (flg) {
760:     PCASMSetLocalSubdomains(pc,blocks,NULL,NULL);
761:     osm->dm_subdomains = PETSC_FALSE;
762:   }
763:   PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);
764:   if (flg) {
765:     PCASMSetOverlap(pc,ovl);
766:     osm->dm_subdomains = PETSC_FALSE;
767:   }
768:   flg  = PETSC_FALSE;
769:   PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);
770:   if (flg) {PCASMSetType(pc,asmtype); }
771:   flg  = PETSC_FALSE;
772:   PetscOptionsEnum("-pc_asm_local_type","Type of local solver composition","PCASMSetLocalType",PCCompositeTypes,(PetscEnum)osm->loctype,(PetscEnum*)&loctype,&flg);
773:   if (flg) {PCASMSetLocalType(pc,loctype); }
774:   PetscOptionsFList("-pc_asm_sub_mat_type","Subsolve Matrix Type","PCASMSetSubMatType",MatList,NULL,sub_mat_type,256,&flg);
775:   if (flg) {
776:     PCASMSetSubMatType(pc,sub_mat_type);
777:   }
778:   PetscOptionsTail();
779:   return(0);
780: }

782: /*------------------------------------------------------------------------------------*/

784: static PetscErrorCode  PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[])
785: {
786:   PC_ASM         *osm = (PC_ASM*)pc->data;
788:   PetscInt       i;

791:   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
792:   if (pc->setupcalled && (n != osm->n_local_true || is)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetUp().");

794:   if (!pc->setupcalled) {
795:     if (is) {
796:       for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is[i]);}
797:     }
798:     if (is_local) {
799:       for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is_local[i]);}
800:     }
801:     PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);

803:     osm->n_local_true = n;
804:     osm->is           = NULL;
805:     osm->is_local     = NULL;
806:     if (is) {
807:       PetscMalloc1(n,&osm->is);
808:       for (i=0; i<n; i++) osm->is[i] = is[i];
809:       /* Flag indicating that the user has set overlapping subdomains so PCASM should not increase their size. */
810:       osm->overlap = -1;
811:     }
812:     if (is_local) {
813:       PetscMalloc1(n,&osm->is_local);
814:       for (i=0; i<n; i++) osm->is_local[i] = is_local[i];
815:       if (!is) {
816:         PetscMalloc1(osm->n_local_true,&osm->is);
817:         for (i=0; i<osm->n_local_true; i++) {
818:           if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
819:             ISDuplicate(osm->is_local[i],&osm->is[i]);
820:             ISCopy(osm->is_local[i],osm->is[i]);
821:           } else {
822:             PetscObjectReference((PetscObject)osm->is_local[i]);
823:             osm->is[i] = osm->is_local[i];
824:           }
825:         }
826:       }
827:     }
828:   }
829:   return(0);
830: }

832: static PetscErrorCode  PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is,IS *is_local)
833: {
834:   PC_ASM         *osm = (PC_ASM*)pc->data;
836:   PetscMPIInt    rank,size;
837:   PetscInt       n;

840:   if (N < 1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
841:   if (is || is_local) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");

843:   /*
844:      Split the subdomains equally among all processors
845:   */
846:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
847:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
848:   n    = N/size + ((N % size) > rank);
849:   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
850:   if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetUp().");
851:   if (!pc->setupcalled) {
852:     PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);

854:     osm->n_local_true = n;
855:     osm->is           = NULL;
856:     osm->is_local     = NULL;
857:   }
858:   return(0);
859: }

861: static PetscErrorCode  PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
862: {
863:   PC_ASM *osm = (PC_ASM*)pc->data;

866:   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
867:   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetUp().");
868:   if (!pc->setupcalled) osm->overlap = ovl;
869:   return(0);
870: }

872: static PetscErrorCode  PCASMSetType_ASM(PC pc,PCASMType type)
873: {
874:   PC_ASM *osm = (PC_ASM*)pc->data;

877:   osm->type     = type;
878:   osm->type_set = PETSC_TRUE;
879:   return(0);
880: }

882: static PetscErrorCode  PCASMGetType_ASM(PC pc,PCASMType *type)
883: {
884:   PC_ASM *osm = (PC_ASM*)pc->data;

887:   *type = osm->type;
888:   return(0);
889: }

891: static PetscErrorCode  PCASMSetLocalType_ASM(PC pc, PCCompositeType type)
892: {
893:   PC_ASM *osm = (PC_ASM *) pc->data;

896:   if (type != PC_COMPOSITE_ADDITIVE && type != PC_COMPOSITE_MULTIPLICATIVE) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Only supports additive or multiplicative as the local type");
897:   osm->loctype = type;
898:   return(0);
899: }

901: static PetscErrorCode  PCASMGetLocalType_ASM(PC pc, PCCompositeType *type)
902: {
903:   PC_ASM *osm = (PC_ASM *) pc->data;

906:   *type = osm->loctype;
907:   return(0);
908: }

910: static PetscErrorCode  PCASMSetSortIndices_ASM(PC pc,PetscBool  doSort)
911: {
912:   PC_ASM *osm = (PC_ASM*)pc->data;

915:   osm->sort_indices = doSort;
916:   return(0);
917: }

919: static PetscErrorCode  PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
920: {
921:   PC_ASM         *osm = (PC_ASM*)pc->data;

925:   if (osm->n_local_true < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");

927:   if (n_local) *n_local = osm->n_local_true;
928:   if (first_local) {
929:     MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
930:     *first_local -= osm->n_local_true;
931:   }
932:   if (ksp) *ksp   = osm->ksp;
933:   return(0);
934: }

936: static PetscErrorCode  PCASMGetSubMatType_ASM(PC pc,MatType *sub_mat_type)
937: {
938:   PC_ASM         *osm = (PC_ASM*)pc->data;

943:   *sub_mat_type = osm->sub_mat_type;
944:   return(0);
945: }

947: static PetscErrorCode PCASMSetSubMatType_ASM(PC pc,MatType sub_mat_type)
948: {
949:   PetscErrorCode    ierr;
950:   PC_ASM            *osm = (PC_ASM*)pc->data;

954:   PetscFree(osm->sub_mat_type);
955:   PetscStrallocpy(sub_mat_type,(char**)&osm->sub_mat_type);
956:   return(0);
957: }

959: /*@C
960:     PCASMSetLocalSubdomains - Sets the local subdomains (for this processor only) for the additive Schwarz preconditioner.

962:     Collective on pc

964:     Input Parameters:
965: +   pc - the preconditioner context
966: .   n - the number of subdomains for this processor (default value = 1)
967: .   is - the index set that defines the subdomains for this processor
968:          (or NULL for PETSc to determine subdomains)
969: -   is_local - the index sets that define the local part of the subdomains for this processor, not used unless PCASMType is PC_ASM_RESTRICT
970:          (or NULL to not provide these)

972:     Options Database Key:
973:     To set the total number of subdomain blocks rather than specify the
974:     index sets, use the option
975: .    -pc_asm_local_blocks <blks> - Sets local blocks

977:     Notes:
978:     The IS numbering is in the parallel, global numbering of the vector for both is and is_local

980:     By default the ASM preconditioner uses 1 block per processor.

982:     Use PCASMSetTotalSubdomains() to set the subdomains for all processors.

984:     If is_local is provided and PCASMType is PC_ASM_RESTRICT then the solution only over the is_local region is interpolated
985:     back to form the global solution (this is the standard restricted additive Schwarz method)

987:     If the is_local is provided and PCASMType is PC_ASM_INTERPOLATE or PC_ASM_NONE then an error is generated since there is
988:     no code to handle that case.

990:     Level: advanced

992: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
993:           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains(), PCASMType, PCASMSetType()
994: @*/
995: PetscErrorCode  PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
996: {

1001:   PetscTryMethod(pc,"PCASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));
1002:   return(0);
1003: }

1005: /*@C
1006:     PCASMSetTotalSubdomains - Sets the subdomains for all processors for the
1007:     additive Schwarz preconditioner.  Either all or no processors in the
1008:     PC communicator must call this routine, with the same index sets.

1010:     Collective on pc

1012:     Input Parameters:
1013: +   pc - the preconditioner context
1014: .   N  - the number of subdomains for all processors
1015: .   is - the index sets that define the subdomains for all processors
1016:          (or NULL to ask PETSc to determine the subdomains)
1017: -   is_local - the index sets that define the local part of the subdomains for this processor
1018:          (or NULL to not provide this information)

1020:     Options Database Key:
1021:     To set the total number of subdomain blocks rather than specify the
1022:     index sets, use the option
1023: .    -pc_asm_blocks <blks> - Sets total blocks

1025:     Notes:
1026:     Currently you cannot use this to set the actual subdomains with the argument is or is_local.

1028:     By default the ASM preconditioner uses 1 block per processor.

1030:     These index sets cannot be destroyed until after completion of the
1031:     linear solves for which the ASM preconditioner is being used.

1033:     Use PCASMSetLocalSubdomains() to set local subdomains.

1035:     The IS numbering is in the parallel, global numbering of the vector for both is and is_local

1037:     Level: advanced

1039: .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1040:           PCASMCreateSubdomains2D()
1041: @*/
1042: PetscErrorCode  PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[])
1043: {

1048:   PetscTryMethod(pc,"PCASMSetTotalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,N,is,is_local));
1049:   return(0);
1050: }

1052: /*@
1053:     PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
1054:     additive Schwarz preconditioner.  Either all or no processors in the
1055:     PC communicator must call this routine.

1057:     Logically Collective on pc

1059:     Input Parameters:
1060: +   pc  - the preconditioner context
1061: -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)

1063:     Options Database Key:
1064: .   -pc_asm_overlap <ovl> - Sets overlap

1066:     Notes:
1067:     By default the ASM preconditioner uses 1 block per processor.  To use
1068:     multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
1069:     PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).

1071:     The overlap defaults to 1, so if one desires that no additional
1072:     overlap be computed beyond what may have been set with a call to
1073:     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
1074:     must be set to be 0.  In particular, if one does not explicitly set
1075:     the subdomains an application code, then all overlap would be computed
1076:     internally by PETSc, and using an overlap of 0 would result in an ASM
1077:     variant that is equivalent to the block Jacobi preconditioner.

1079:     The default algorithm used by PETSc to increase overlap is fast, but not scalable,
1080:     use the option -mat_increase_overlap_scalable when the problem and number of processes is large.

1082:     Note that one can define initial index sets with any overlap via
1083:     PCASMSetLocalSubdomains(); the routine
1084:     PCASMSetOverlap() merely allows PETSc to extend that overlap further
1085:     if desired.

1087:     Level: intermediate

1089: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1090:           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains(), MatIncreaseOverlap()
1091: @*/
1092: PetscErrorCode  PCASMSetOverlap(PC pc,PetscInt ovl)
1093: {

1099:   PetscTryMethod(pc,"PCASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1100:   return(0);
1101: }

1103: /*@
1104:     PCASMSetType - Sets the type of restriction and interpolation used
1105:     for local problems in the additive Schwarz method.

1107:     Logically Collective on pc

1109:     Input Parameters:
1110: +   pc  - the preconditioner context
1111: -   type - variant of ASM, one of
1112: .vb
1113:       PC_ASM_BASIC       - full interpolation and restriction
1114:       PC_ASM_RESTRICT    - full restriction, local processor interpolation
1115:       PC_ASM_INTERPOLATE - full interpolation, local processor restriction
1116:       PC_ASM_NONE        - local processor restriction and interpolation
1117: .ve

1119:     Options Database Key:
1120: .   -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type

1122:     Notes:
1123:     if the is_local arguments are passed to PCASMSetLocalSubdomains() then they are used when PC_ASM_RESTRICT has been selected
1124:     to limit the local processor interpolation

1126:     Level: intermediate

1128: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
1129:           PCASMCreateSubdomains2D(), PCASMType, PCASMSetLocalType(), PCASMGetLocalType()
1130: @*/
1131: PetscErrorCode  PCASMSetType(PC pc,PCASMType type)
1132: {

1138:   PetscTryMethod(pc,"PCASMSetType_C",(PC,PCASMType),(pc,type));
1139:   return(0);
1140: }

1142: /*@
1143:     PCASMGetType - Gets the type of restriction and interpolation used
1144:     for local problems in the additive Schwarz method.

1146:     Logically Collective on pc

1148:     Input Parameter:
1149: .   pc  - the preconditioner context

1151:     Output Parameter:
1152: .   type - variant of ASM, one of

1154: .vb
1155:       PC_ASM_BASIC       - full interpolation and restriction
1156:       PC_ASM_RESTRICT    - full restriction, local processor interpolation
1157:       PC_ASM_INTERPOLATE - full interpolation, local processor restriction
1158:       PC_ASM_NONE        - local processor restriction and interpolation
1159: .ve

1161:     Options Database Key:
1162: .   -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type

1164:     Level: intermediate

1166: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
1167:           PCASMCreateSubdomains2D(), PCASMType, PCASMSetType(), PCASMSetLocalType(), PCASMGetLocalType()
1168: @*/
1169: PetscErrorCode  PCASMGetType(PC pc,PCASMType *type)
1170: {

1175:   PetscUseMethod(pc,"PCASMGetType_C",(PC,PCASMType*),(pc,type));
1176:   return(0);
1177: }

1179: /*@
1180:   PCASMSetLocalType - Sets the type of composition used for local problems in the additive Schwarz method.

1182:   Logically Collective on pc

1184:   Input Parameters:
1185: + pc  - the preconditioner context
1186: - type - type of composition, one of
1187: .vb
1188:   PC_COMPOSITE_ADDITIVE       - local additive combination
1189:   PC_COMPOSITE_MULTIPLICATIVE - local multiplicative combination
1190: .ve

1192:   Options Database Key:
1193: . -pc_asm_local_type [additive,multiplicative] - Sets local solver composition type

1195:   Level: intermediate

1197: .seealso: PCASMSetType(), PCASMGetType(), PCASMGetLocalType(), PCASM, PCASMType, PCASMSetType(), PCASMGetType(), PCCompositeType
1198: @*/
1199: PetscErrorCode PCASMSetLocalType(PC pc, PCCompositeType type)
1200: {

1206:   PetscTryMethod(pc, "PCASMSetLocalType_C", (PC, PCCompositeType), (pc, type));
1207:   return(0);
1208: }

1210: /*@
1211:   PCASMGetLocalType - Gets the type of composition used for local problems in the additive Schwarz method.

1213:   Logically Collective on pc

1215:   Input Parameter:
1216: . pc  - the preconditioner context

1218:   Output Parameter:
1219: . type - type of composition, one of
1220: .vb
1221:   PC_COMPOSITE_ADDITIVE       - local additive combination
1222:   PC_COMPOSITE_MULTIPLICATIVE - local multiplicative combination
1223: .ve

1225:   Options Database Key:
1226: . -pc_asm_local_type [additive,multiplicative] - Sets local solver composition type

1228:   Level: intermediate

1230: .seealso: PCASMSetType(), PCASMGetType(), PCASMSetLocalType(), PCASMCreate(), PCASMType, PCASMSetType(), PCASMGetType(), PCCompositeType
1231: @*/
1232: PetscErrorCode PCASMGetLocalType(PC pc, PCCompositeType *type)
1233: {

1239:   PetscUseMethod(pc, "PCASMGetLocalType_C", (PC, PCCompositeType *), (pc, type));
1240:   return(0);
1241: }

1243: /*@
1244:     PCASMSetSortIndices - Determines whether subdomain indices are sorted.

1246:     Logically Collective on pc

1248:     Input Parameters:
1249: +   pc  - the preconditioner context
1250: -   doSort - sort the subdomain indices

1252:     Level: intermediate

1254: .seealso: PCASMSetLocalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
1255:           PCASMCreateSubdomains2D()
1256: @*/
1257: PetscErrorCode  PCASMSetSortIndices(PC pc,PetscBool doSort)
1258: {

1264:   PetscTryMethod(pc,"PCASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1265:   return(0);
1266: }

1268: /*@C
1269:    PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
1270:    this processor.

1272:    Collective on pc iff first_local is requested

1274:    Input Parameter:
1275: .  pc - the preconditioner context

1277:    Output Parameters:
1278: +  n_local - the number of blocks on this processor or NULL
1279: .  first_local - the global number of the first block on this processor or NULL,
1280:                  all processors must request or all must pass NULL
1281: -  ksp - the array of KSP contexts

1283:    Note:
1284:    After PCASMGetSubKSP() the array of KSPes is not to be freed.

1286:    You must call KSPSetUp() before calling PCASMGetSubKSP().

1288:    Fortran note:
1289:    The output argument 'ksp' must be an array of sufficient length or PETSC_NULL_KSP. The latter can be used to learn the necessary length.

1291:    Level: advanced

1293: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
1294:           PCASMCreateSubdomains2D(),
1295: @*/
1296: PetscErrorCode  PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1297: {

1302:   PetscUseMethod(pc,"PCASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1303:   return(0);
1304: }

1306: /* -------------------------------------------------------------------------------------*/
1307: /*MC
1308:    PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1309:            its own KSP object.

1311:    Options Database Keys:
1312: +  -pc_asm_blocks <blks> - Sets total blocks
1313: .  -pc_asm_overlap <ovl> - Sets overlap
1314: .  -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type, default is restrict
1315: -  -pc_asm_local_type [additive, multiplicative] - Sets ASM type, default is additive

1317:      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1318:       will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
1319:       -pc_asm_type basic to use the standard ASM.

1321:    Notes:
1322:     Each processor can have one or more blocks, but a block cannot be shared by more
1323:      than one processor. Use PCGASM for subdomains shared by multiple processes. Defaults to one block per processor.

1325:      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1326:         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly

1328:      To set the options on the solvers separate for each block call PCASMGetSubKSP()
1329:          and set the options directly on the resulting KSP object (you can access its PC
1330:          with KSPGetPC())

1332:    Level: beginner

1334:     References:
1335: +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1336:      Courant Institute, New York University Technical report
1337: -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1338:     Cambridge University Press.

1340: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1341:            PCBJACOBI, PCASMGetSubKSP(), PCASMSetLocalSubdomains(), PCASMType, PCASMGetType(), PCASMSetLocalType(), PCASMGetLocalType()
1342:            PCASMSetTotalSubdomains(), PCSetModifySubMatrices(), PCASMSetOverlap(), PCASMSetType(), PCCompositeType

1344: M*/

1346: PETSC_EXTERN PetscErrorCode PCCreate_ASM(PC pc)
1347: {
1349:   PC_ASM         *osm;

1352:   PetscNewLog(pc,&osm);

1354:   osm->n                 = PETSC_DECIDE;
1355:   osm->n_local           = 0;
1356:   osm->n_local_true      = PETSC_DECIDE;
1357:   osm->overlap           = 1;
1358:   osm->ksp               = NULL;
1359:   osm->restriction       = NULL;
1360:   osm->lprolongation     = NULL;
1361:   osm->lrestriction      = NULL;
1362:   osm->x                 = NULL;
1363:   osm->y                 = NULL;
1364:   osm->is                = NULL;
1365:   osm->is_local          = NULL;
1366:   osm->mat               = NULL;
1367:   osm->pmat              = NULL;
1368:   osm->type              = PC_ASM_RESTRICT;
1369:   osm->loctype           = PC_COMPOSITE_ADDITIVE;
1370:   osm->sort_indices      = PETSC_TRUE;
1371:   osm->dm_subdomains     = PETSC_FALSE;
1372:   osm->sub_mat_type      = NULL;

1374:   pc->data                 = (void*)osm;
1375:   pc->ops->apply           = PCApply_ASM;
1376:   pc->ops->matapply        = PCMatApply_ASM;
1377:   pc->ops->applytranspose  = PCApplyTranspose_ASM;
1378:   pc->ops->setup           = PCSetUp_ASM;
1379:   pc->ops->reset           = PCReset_ASM;
1380:   pc->ops->destroy         = PCDestroy_ASM;
1381:   pc->ops->setfromoptions  = PCSetFromOptions_ASM;
1382:   pc->ops->setuponblocks   = PCSetUpOnBlocks_ASM;
1383:   pc->ops->view            = PCView_ASM;
1384:   pc->ops->applyrichardson = NULL;

1386:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",PCASMSetLocalSubdomains_ASM);
1387:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",PCASMSetTotalSubdomains_ASM);
1388:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetOverlap_C",PCASMSetOverlap_ASM);
1389:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetType_C",PCASMSetType_ASM);
1390:   PetscObjectComposeFunction((PetscObject)pc,"PCASMGetType_C",PCASMGetType_ASM);
1391:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetLocalType_C",PCASMSetLocalType_ASM);
1392:   PetscObjectComposeFunction((PetscObject)pc,"PCASMGetLocalType_C",PCASMGetLocalType_ASM);
1393:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetSortIndices_C",PCASMSetSortIndices_ASM);
1394:   PetscObjectComposeFunction((PetscObject)pc,"PCASMGetSubKSP_C",PCASMGetSubKSP_ASM);
1395:   PetscObjectComposeFunction((PetscObject)pc,"PCASMGetSubMatType_C",PCASMGetSubMatType_ASM);
1396:   PetscObjectComposeFunction((PetscObject)pc,"PCASMSetSubMatType_C",PCASMSetSubMatType_ASM);
1397:   return(0);
1398: }

1400: /*@C
1401:    PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1402:    preconditioner for any problem on a general grid.

1404:    Collective

1406:    Input Parameters:
1407: +  A - The global matrix operator
1408: -  n - the number of local blocks

1410:    Output Parameters:
1411: .  outis - the array of index sets defining the subdomains

1413:    Level: advanced

1415:    Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
1416:     from these if you use PCASMSetLocalSubdomains()

1418:     In the Fortran version you must provide the array outis[] already allocated of length n.

1420: .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
1421: @*/
1422: PetscErrorCode  PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1423: {
1424:   MatPartitioning mpart;
1425:   const char      *prefix;
1426:   PetscInt        i,j,rstart,rend,bs;
1427:   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1428:   Mat             Ad     = NULL, adj;
1429:   IS              ispart,isnumb,*is;
1430:   PetscErrorCode  ierr;

1435:   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);

1437:   /* Get prefix, row distribution, and block size */
1438:   MatGetOptionsPrefix(A,&prefix);
1439:   MatGetOwnershipRange(A,&rstart,&rend);
1440:   MatGetBlockSize(A,&bs);
1441:   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);

1443:   /* Get diagonal block from matrix if possible */
1444:   MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);
1445:   if (hasop) {
1446:     MatGetDiagonalBlock(A,&Ad);
1447:   }
1448:   if (Ad) {
1449:     PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1450:     if (!isbaij) {PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1451:   }
1452:   if (Ad && n > 1) {
1453:     PetscBool match,done;
1454:     /* Try to setup a good matrix partitioning if available */
1455:     MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1456:     PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1457:     MatPartitioningSetFromOptions(mpart);
1458:     PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1459:     if (!match) {
1460:       PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1461:     }
1462:     if (!match) { /* assume a "good" partitioner is available */
1463:       PetscInt       na;
1464:       const PetscInt *ia,*ja;
1465:       MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1466:       if (done) {
1467:         /* Build adjacency matrix by hand. Unfortunately a call to
1468:            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1469:            remove the block-aij structure and we cannot expect
1470:            MatPartitioning to split vertices as we need */
1471:         PetscInt       i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1472:         const PetscInt *row;
1473:         nnz = 0;
1474:         for (i=0; i<na; i++) { /* count number of nonzeros */
1475:           len = ia[i+1] - ia[i];
1476:           row = ja + ia[i];
1477:           for (j=0; j<len; j++) {
1478:             if (row[j] == i) { /* don't count diagonal */
1479:               len--; break;
1480:             }
1481:           }
1482:           nnz += len;
1483:         }
1484:         PetscMalloc1(na+1,&iia);
1485:         PetscMalloc1(nnz,&jja);
1486:         nnz    = 0;
1487:         iia[0] = 0;
1488:         for (i=0; i<na; i++) { /* fill adjacency */
1489:           cnt = 0;
1490:           len = ia[i+1] - ia[i];
1491:           row = ja + ia[i];
1492:           for (j=0; j<len; j++) {
1493:             if (row[j] != i) { /* if not diagonal */
1494:               jja[nnz+cnt++] = row[j];
1495:             }
1496:           }
1497:           nnz     += cnt;
1498:           iia[i+1] = nnz;
1499:         }
1500:         /* Partitioning of the adjacency matrix */
1501:         MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1502:         MatPartitioningSetAdjacency(mpart,adj);
1503:         MatPartitioningSetNParts(mpart,n);
1504:         MatPartitioningApply(mpart,&ispart);
1505:         ISPartitioningToNumbering(ispart,&isnumb);
1506:         MatDestroy(&adj);
1507:         foundpart = PETSC_TRUE;
1508:       }
1509:       MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1510:     }
1511:     MatPartitioningDestroy(&mpart);
1512:   }

1514:   PetscMalloc1(n,&is);
1515:   *outis = is;

1517:   if (!foundpart) {

1519:     /* Partitioning by contiguous chunks of rows */

1521:     PetscInt mbs   = (rend-rstart)/bs;
1522:     PetscInt start = rstart;
1523:     for (i=0; i<n; i++) {
1524:       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1525:       ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1526:       start += count;
1527:     }

1529:   } else {

1531:     /* Partitioning by adjacency of diagonal block  */

1533:     const PetscInt *numbering;
1534:     PetscInt       *count,nidx,*indices,*newidx,start=0;
1535:     /* Get node count in each partition */
1536:     PetscMalloc1(n,&count);
1537:     ISPartitioningCount(ispart,n,count);
1538:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1539:       for (i=0; i<n; i++) count[i] *= bs;
1540:     }
1541:     /* Build indices from node numbering */
1542:     ISGetLocalSize(isnumb,&nidx);
1543:     PetscMalloc1(nidx,&indices);
1544:     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1545:     ISGetIndices(isnumb,&numbering);
1546:     PetscSortIntWithPermutation(nidx,numbering,indices);
1547:     ISRestoreIndices(isnumb,&numbering);
1548:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1549:       PetscMalloc1(nidx*bs,&newidx);
1550:       for (i=0; i<nidx; i++) {
1551:         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1552:       }
1553:       PetscFree(indices);
1554:       nidx   *= bs;
1555:       indices = newidx;
1556:     }
1557:     /* Shift to get global indices */
1558:     for (i=0; i<nidx; i++) indices[i] += rstart;

1560:     /* Build the index sets for each block */
1561:     for (i=0; i<n; i++) {
1562:       ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1563:       ISSort(is[i]);
1564:       start += count[i];
1565:     }

1567:     PetscFree(count);
1568:     PetscFree(indices);
1569:     ISDestroy(&isnumb);
1570:     ISDestroy(&ispart);

1572:   }
1573:   return(0);
1574: }

1576: /*@C
1577:    PCASMDestroySubdomains - Destroys the index sets created with
1578:    PCASMCreateSubdomains(). Should be called after setting subdomains
1579:    with PCASMSetLocalSubdomains().

1581:    Collective

1583:    Input Parameters:
1584: +  n - the number of index sets
1585: .  is - the array of index sets
1586: -  is_local - the array of local index sets, can be NULL

1588:    Level: advanced

1590: .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1591: @*/
1592: PetscErrorCode  PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1593: {
1594:   PetscInt       i;

1598:   if (n <= 0) return(0);
1599:   if (is) {
1601:     for (i=0; i<n; i++) { ISDestroy(&is[i]); }
1602:     PetscFree(is);
1603:   }
1604:   if (is_local) {
1606:     for (i=0; i<n; i++) { ISDestroy(&is_local[i]); }
1607:     PetscFree(is_local);
1608:   }
1609:   return(0);
1610: }

1612: /*@
1613:    PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1614:    preconditioner for a two-dimensional problem on a regular grid.

1616:    Not Collective

1618:    Input Parameters:
1619: +  m, n - the number of mesh points in the x and y directions
1620: .  M, N - the number of subdomains in the x and y directions
1621: .  dof - degrees of freedom per node
1622: -  overlap - overlap in mesh lines

1624:    Output Parameters:
1625: +  Nsub - the number of subdomains created
1626: .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1627: -  is_local - array of index sets defining non-overlapping subdomains

1629:    Note:
1630:    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1631:    preconditioners.  More general related routines are
1632:    PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().

1634:    Level: advanced

1636: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1637:           PCASMSetOverlap()
1638: @*/
1639: PetscErrorCode  PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is,IS **is_local)
1640: {
1641:   PetscInt       i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outer;
1643:   PetscInt       nidx,*idx,loc,ii,jj,count;

1646:   if (dof != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," ");

1648:   *Nsub     = N*M;
1649:   PetscMalloc1(*Nsub,is);
1650:   PetscMalloc1(*Nsub,is_local);
1651:   ystart    = 0;
1652:   loc_outer = 0;
1653:   for (i=0; i<N; i++) {
1654:     height = n/N + ((n % N) > i); /* height of subdomain */
1655:     if (height < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1656:     yleft  = ystart - overlap; if (yleft < 0) yleft = 0;
1657:     yright = ystart + height + overlap; if (yright > n) yright = n;
1658:     xstart = 0;
1659:     for (j=0; j<M; j++) {
1660:       width = m/M + ((m % M) > j); /* width of subdomain */
1661:       if (width < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1662:       xleft  = xstart - overlap; if (xleft < 0) xleft = 0;
1663:       xright = xstart + width + overlap; if (xright > m) xright = m;
1664:       nidx   = (xright - xleft)*(yright - yleft);
1665:       PetscMalloc1(nidx,&idx);
1666:       loc    = 0;
1667:       for (ii=yleft; ii<yright; ii++) {
1668:         count = m*ii + xleft;
1669:         for (jj=xleft; jj<xright; jj++) idx[loc++] = count++;
1670:       }
1671:       ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,PETSC_COPY_VALUES,(*is)+loc_outer);
1672:       if (overlap == 0) {
1673:         PetscObjectReference((PetscObject)(*is)[loc_outer]);

1675:         (*is_local)[loc_outer] = (*is)[loc_outer];
1676:       } else {
1677:         for (loc=0,ii=ystart; ii<ystart+height; ii++) {
1678:           for (jj=xstart; jj<xstart+width; jj++) {
1679:             idx[loc++] = m*ii + jj;
1680:           }
1681:         }
1682:         ISCreateGeneral(PETSC_COMM_SELF,loc,idx,PETSC_COPY_VALUES,*is_local+loc_outer);
1683:       }
1684:       PetscFree(idx);
1685:       xstart += width;
1686:       loc_outer++;
1687:     }
1688:     ystart += height;
1689:   }
1690:   for (i=0; i<*Nsub; i++) { ISSort((*is)[i]); }
1691:   return(0);
1692: }

1694: /*@C
1695:     PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1696:     only) for the additive Schwarz preconditioner.

1698:     Not Collective

1700:     Input Parameter:
1701: .   pc - the preconditioner context

1703:     Output Parameters:
1704: +   n - if requested, the number of subdomains for this processor (default value = 1)
1705: .   is - if requested, the index sets that define the subdomains for this processor
1706: -   is_local - if requested, the index sets that define the local part of the subdomains for this processor (can be NULL)


1709:     Notes:
1710:     The IS numbering is in the parallel, global numbering of the vector.

1712:     Level: advanced

1714: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1715:           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1716: @*/
1717: PetscErrorCode  PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1718: {
1719:   PC_ASM         *osm = (PC_ASM*)pc->data;
1721:   PetscBool      match;

1728:   PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1729:   if (!match) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONG,"PC is not a PCASM");
1730:   if (n) *n = osm->n_local_true;
1731:   if (is) *is = osm->is;
1732:   if (is_local) *is_local = osm->is_local;
1733:   return(0);
1734: }

1736: /*@C
1737:     PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1738:     only) for the additive Schwarz preconditioner.

1740:     Not Collective

1742:     Input Parameter:
1743: .   pc - the preconditioner context

1745:     Output Parameters:
1746: +   n - if requested, the number of matrices for this processor (default value = 1)
1747: -   mat - if requested, the matrices

1749:     Level: advanced

1751:     Notes:
1752:     Call after PCSetUp() (or KSPSetUp()) but before PCApply() and before PCSetUpOnBlocks())

1754:            Usually one would use PCSetModifySubMatrices() to change the submatrices in building the preconditioner.

1756: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1757:           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains(), PCSetModifySubMatrices()
1758: @*/
1759: PetscErrorCode  PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1760: {
1761:   PC_ASM         *osm;
1763:   PetscBool      match;

1769:   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1770:   PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1771:   if (!match) {
1772:     if (n) *n = 0;
1773:     if (mat) *mat = NULL;
1774:   } else {
1775:     osm = (PC_ASM*)pc->data;
1776:     if (n) *n = osm->n_local_true;
1777:     if (mat) *mat = osm->pmat;
1778:   }
1779:   return(0);
1780: }

1782: /*@
1783:     PCASMSetDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.

1785:     Logically Collective

1787:     Input Parameter:
1788: +   pc  - the preconditioner
1789: -   flg - boolean indicating whether to use subdomains defined by the DM

1791:     Options Database Key:
1792: .   -pc_asm_dm_subdomains

1794:     Level: intermediate

1796:     Notes:
1797:     PCASMSetTotalSubdomains() and PCASMSetOverlap() take precedence over PCASMSetDMSubdomains(),
1798:     so setting either of the first two effectively turns the latter off.

1800: .seealso: PCASMGetDMSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap()
1801:           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1802: @*/
1803: PetscErrorCode  PCASMSetDMSubdomains(PC pc,PetscBool flg)
1804: {
1805:   PC_ASM         *osm = (PC_ASM*)pc->data;
1807:   PetscBool      match;

1812:   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1813:   PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1814:   if (match) {
1815:     osm->dm_subdomains = flg;
1816:   }
1817:   return(0);
1818: }

1820: /*@
1821:     PCASMGetDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1822:     Not Collective

1824:     Input Parameter:
1825: .   pc  - the preconditioner

1827:     Output Parameter:
1828: .   flg - boolean indicating whether to use subdomains defined by the DM

1830:     Level: intermediate

1832: .seealso: PCASMSetDMSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap()
1833:           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1834: @*/
1835: PetscErrorCode  PCASMGetDMSubdomains(PC pc,PetscBool* flg)
1836: {
1837:   PC_ASM         *osm = (PC_ASM*)pc->data;
1839:   PetscBool      match;

1844:   PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1845:   if (match) *flg = osm->dm_subdomains;
1846:   else *flg = PETSC_FALSE;
1847:   return(0);
1848: }

1850: /*@
1851:      PCASMGetSubMatType - Gets the matrix type used for ASM subsolves, as a string.

1853:    Not Collective

1855:    Input Parameter:
1856: .  pc - the PC

1858:    Output Parameter:
1859: .  -pc_asm_sub_mat_type - name of matrix type

1861:    Level: advanced

1863: .seealso: PCASMSetSubMatType(), PCASM, PCSetType(), VecSetType(), MatType, Mat
1864: @*/
1865: PetscErrorCode  PCASMGetSubMatType(PC pc,MatType *sub_mat_type)
1866: {

1871:   PetscTryMethod(pc,"PCASMGetSubMatType_C",(PC,MatType*),(pc,sub_mat_type));
1872:   return(0);
1873: }

1875: /*@
1876:      PCASMSetSubMatType - Set the type of matrix used for ASM subsolves

1878:    Collective on Mat

1880:    Input Parameters:
1881: +  pc             - the PC object
1882: -  sub_mat_type   - matrix type

1884:    Options Database Key:
1885: .  -pc_asm_sub_mat_type  <sub_mat_type> - Sets the matrix type used for subsolves, for example, seqaijviennacl. If you specify a base name like aijviennacl, the corresponding sequential type is assumed.

1887:    Notes:
1888:    See "${PETSC_DIR}/include/petscmat.h" for available types

1890:   Level: advanced

1892: .seealso: PCASMGetSubMatType(), PCASM, PCSetType(), VecSetType(), MatType, Mat
1893: @*/
1894: PetscErrorCode PCASMSetSubMatType(PC pc,MatType sub_mat_type)
1895: {

1900:   PetscTryMethod(pc,"PCASMSetSubMatType_C",(PC,MatType),(pc,sub_mat_type));
1901:   return(0);
1902: }