Actual source code: nn.c

petsc-master 2017-08-19
Report Typos and Errors

  2:  #include <../src/ksp/pc/impls/is/nn/nn.h>

  4: /* -------------------------------------------------------------------------- */
  5: /*
  6:    PCSetUp_NN - Prepares for the use of the NN preconditioner
  7:                     by setting data structures and options.

  9:    Input Parameter:
 10: .  pc - the preconditioner context

 12:    Application Interface Routine: PCSetUp()

 14:    Notes:
 15:    The interface routine PCSetUp() is not usually called directly by
 16:    the user, but instead is called by PCApply() if necessary.
 17: */
 18: static PetscErrorCode PCSetUp_NN(PC pc)
 19: {

 23:   if (!pc->setupcalled) {
 24:     /* Set up all the "iterative substructuring" common block */
 25:     PCISSetUp(pc,PETSC_TRUE);
 26:     /* Create the coarse matrix. */
 27:     PCNNCreateCoarseMatrix(pc);
 28:   }
 29:   return(0);
 30: }

 32: /* -------------------------------------------------------------------------- */
 33: /*
 34:    PCApply_NN - Applies the NN preconditioner to a vector.

 36:    Input Parameters:
 37: .  pc - the preconditioner context
 38: .  r - input vector (global)

 40:    Output Parameter:
 41: .  z - output vector (global)

 43:    Application Interface Routine: PCApply()
 44:  */
 45: static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
 46: {
 47:   PC_IS          *pcis = (PC_IS*)(pc->data);
 49:   PetscScalar    m_one = -1.0;
 50:   Vec            w     = pcis->vec1_global;

 53:   /*
 54:     Dirichlet solvers.
 55:     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
 56:     Storing the local results at vec2_D
 57:   */
 58:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
 59:   VecScatterEnd  (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
 60:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);

 62:   /*
 63:     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
 64:     Storing the result in the interface portion of the global vector w.
 65:   */
 66:   MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
 67:   VecScale(pcis->vec1_B,m_one);
 68:   VecCopy(r,w);
 69:   VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
 70:   VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);

 72:   /*
 73:     Apply the interface preconditioner
 74:   */
 75:   PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
 76:                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);

 78:   /*
 79:     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
 80:     The result is stored in vec1_D.
 81:   */
 82:   VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
 83:   VecScatterEnd  (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
 84:   MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);

 86:   /*
 87:     Dirichlet solvers.
 88:     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
 89:     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
 90:   */
 91:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
 92:   VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
 93:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
 94:   VecScale(pcis->vec2_D,m_one);
 95:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
 96:   VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
 97:   return(0);
 98: }

100: /* -------------------------------------------------------------------------- */
101: /*
102:    PCDestroy_NN - Destroys the private context for the NN preconditioner
103:    that was created with PCCreate_NN().

105:    Input Parameter:
106: .  pc - the preconditioner context

108:    Application Interface Routine: PCDestroy()
109: */
110: static PetscErrorCode PCDestroy_NN(PC pc)
111: {
112:   PC_NN          *pcnn = (PC_NN*)pc->data;

116:   PCISDestroy(pc);

118:   MatDestroy(&pcnn->coarse_mat);
119:   VecDestroy(&pcnn->coarse_x);
120:   VecDestroy(&pcnn->coarse_b);
121:   KSPDestroy(&pcnn->ksp_coarse);
122:   if (pcnn->DZ_IN) {
123:     PetscFree(pcnn->DZ_IN[0]);
124:     PetscFree(pcnn->DZ_IN);
125:   }

127:   /*
128:       Free the private data structure that was hanging off the PC
129:   */
130:   PetscFree(pc->data);
131:   return(0);
132: }

134: /* -------------------------------------------------------------------------- */
135: /*MC
136:    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.

138:    Options Database Keys:
139: +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
140:                                        (this skips the first coarse grid solve in the preconditioner)
141: .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
142:                                        (this skips the second coarse grid solve in the preconditioner)
143: .    -pc_is_damp_fixed <fact> -
144: .    -pc_is_remove_nullspace_fixed -
145: .    -pc_is_set_damping_factor_floating <fact> -
146: .    -pc_is_not_damp_floating -
147: +    -pc_is_not_remove_nullspace_floating -

149:    Level: intermediate

151:    Notes: The matrix used with this preconditioner must be of type MATIS

153:           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
154:           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
155:           on the subdomains; though in our experience using approximate solvers is slower.).

157:           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
158:           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
159:           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx

161:    Contributed by Paulo Goldfeld

163: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MATIS
164: M*/

166: PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc)
167: {
169:   PC_NN          *pcnn;

172:   /*
173:      Creates the private data structure for this preconditioner and
174:      attach it to the PC object.
175:   */
176:   PetscNewLog(pc,&pcnn);
177:   pc->data = (void*)pcnn;

179:   PCISCreate(pc);
180:   pcnn->coarse_mat = 0;
181:   pcnn->coarse_x   = 0;
182:   pcnn->coarse_b   = 0;
183:   pcnn->ksp_coarse = 0;
184:   pcnn->DZ_IN      = 0;

186:   /*
187:       Set the pointers for the functions that are provided above.
188:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
189:       are called, they will automatically call these functions.  Note we
190:       choose not to provide a couple of these functions since they are
191:       not needed.
192:   */
193:   pc->ops->apply               = PCApply_NN;
194:   pc->ops->applytranspose      = 0;
195:   pc->ops->setup               = PCSetUp_NN;
196:   pc->ops->destroy             = PCDestroy_NN;
197:   pc->ops->view                = 0;
198:   pc->ops->applyrichardson     = 0;
199:   pc->ops->applysymmetricleft  = 0;
200:   pc->ops->applysymmetricright = 0;
201:   return(0);
202: }

204: /* -------------------------------------------------------------------------- */
205: /*
206:    PCNNCreateCoarseMatrix -
207: */
208: PetscErrorCode PCNNCreateCoarseMatrix(PC pc)
209: {
210:   MPI_Request    *send_request, *recv_request;
212:   PetscInt       i, j, k;
213:   PetscScalar    *mat;     /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
214:   PetscScalar    **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */

216:   /* aliasing some names */
217:   PC_IS       *pcis     = (PC_IS*)(pc->data);
218:   PC_NN       *pcnn     = (PC_NN*)pc->data;
219:   PetscInt    n_neigh   = pcis->n_neigh;
220:   PetscInt    *neigh    = pcis->neigh;
221:   PetscInt    *n_shared = pcis->n_shared;
222:   PetscInt    **shared  = pcis->shared;
223:   PetscScalar **DZ_IN;   /* Must be initialized after memory allocation. */

226:   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
227:   PetscMalloc1(n_neigh*n_neigh+1,&mat);

229:   /* Allocate memory for DZ */
230:   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
231:   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
232:   {
233:     PetscInt size_of_Z = 0;
234:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
235:     DZ_IN = pcnn->DZ_IN;
236:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
237:     for (i=0; i<n_neigh; i++) size_of_Z += n_shared[i];
238:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
239:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
240:   }
241:   for (i=1; i<n_neigh; i++) {
242:     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
243:     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
244:   }

246:   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
247:   /* First, set the auxiliary array pcis->work_N. */
248:   PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
249:   for (i=1; i<n_neigh; i++) {
250:     for (j=0; j<n_shared[i]; j++) {
251:       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
252:     }
253:   }

255:   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
256:   /* Notice that send_request[] and recv_request[] could have one less element. */
257:   /* We make them longer to have request[i] corresponding to neigh[i].          */
258:   {
259:     PetscMPIInt tag;
260:     PetscObjectGetNewTag((PetscObject)pc,&tag);
261:     PetscMalloc2(n_neigh+1,&send_request,n_neigh+1,&recv_request);
262:     for (i=1; i<n_neigh; i++) {
263:       MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(send_request[i]));
264:       MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(recv_request[i]));
265:     }
266:   }

268:   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
269:   for (j=0; j<n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];

271:   /* Start computing with local D*Z while communication goes on.    */
272:   /* Apply Schur complement. The result is "stored" in vec (more    */
273:   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
274:   /* and also scattered to pcnn->work_N.                            */
275:   PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
276:                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);

278:   /* Compute the first column, while completing the receiving. */
279:   for (i=0; i<n_neigh; i++) {
280:     MPI_Status  stat;
281:     PetscMPIInt ind=0;
282:     if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
283:     mat[ind*n_neigh+0] = 0.0;
284:     for (k=0; k<n_shared[ind]; k++) mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
285:   }

287:   /* Compute the remaining of the columns */
288:   for (j=1; j<n_neigh; j++) {
289:     PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
290:                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
291:     for (i=0; i<n_neigh; i++) {
292:       mat[i*n_neigh+j] = 0.0;
293:       for (k=0; k<n_shared[i]; k++) mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
294:     }
295:   }

297:   /* Complete the sending. */
298:   if (n_neigh>1) {
299:     MPI_Status *stat;
300:     PetscMalloc1(n_neigh-1,&stat);
301:     if (n_neigh-1) {MPI_Waitall(n_neigh-1,&(send_request[1]),stat);}
302:     PetscFree(stat);
303:   }

305:   /* Free the memory for the MPI requests */
306:   PetscFree2(send_request,recv_request);

308:   /* Free the memory for DZ_OUT */
309:   if (DZ_OUT) {
310:     PetscFree(DZ_OUT[0]);
311:     PetscFree(DZ_OUT);
312:   }

314:   {
315:     PetscMPIInt size;
316:     MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
317:     /* Create the global coarse vectors (rhs and solution). */
318:     VecCreateMPI(PetscObjectComm((PetscObject)pc),1,size,&(pcnn->coarse_b));
319:     VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
320:     /* Create and set the global coarse AIJ matrix. */
321:     MatCreate(PetscObjectComm((PetscObject)pc),&(pcnn->coarse_mat));
322:     MatSetSizes(pcnn->coarse_mat,1,1,size,size);
323:     MatSetType(pcnn->coarse_mat,MATAIJ);
324:     MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,NULL);
325:     MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,NULL,n_neigh,NULL);
326:     MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
327:     MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
328:     MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
329:     MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
330:     MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
331:   }

333:   {
334:     PetscMPIInt rank;
335:     PetscScalar one = 1.0;
336:     MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
337:     /* "Zero out" rows of not-purely-Neumann subdomains */
338:     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
339:       MatZeroRows(pcnn->coarse_mat,0,NULL,one,0,0);
340:     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
341:       PetscInt row = (PetscInt) rank;
342:       MatZeroRows(pcnn->coarse_mat,1,&row,one,0,0);
343:     }
344:   }

346:   /* Create the coarse linear solver context */
347:   {
348:     PC  pc_ctx, inner_pc;
349:     KSP inner_ksp;

351:     KSPCreate(PetscObjectComm((PetscObject)pc),&pcnn->ksp_coarse);
352:     PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse,(PetscObject)pc,2);
353:     KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat);
354:     KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
355:     PCSetType(pc_ctx,PCREDUNDANT);
356:     KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
357:     PCRedundantGetKSP(pc_ctx,&inner_ksp);
358:     KSPGetPC(inner_ksp,&inner_pc);
359:     PCSetType(inner_pc,PCLU);
360:     KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
361:     KSPSetFromOptions(pcnn->ksp_coarse);
362:     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
363:     KSPSetUp(pcnn->ksp_coarse);
364:   }

366:   /* Free the memory for mat */
367:   PetscFree(mat);

369:   /* for DEBUGGING, save the coarse matrix to a file. */
370:   {
371:     PetscBool flg = PETSC_FALSE;
372:     PetscOptionsGetBool(NULL,NULL,"-pc_nn_save_coarse_matrix",&flg,NULL);
373:     if (flg) {
374:       PetscViewer viewer;
375:       PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
376:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
377:       MatView(pcnn->coarse_mat,viewer);
378:       PetscViewerPopFormat(viewer);
379:       PetscViewerDestroy(&viewer);
380:     }
381:   }

383:   /*  Set the variable pcnn->factor_coarse_rhs. */
384:   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;

386:   /* See historical note 02, at the bottom of this file. */
387:   return(0);
388: }

390: /* -------------------------------------------------------------------------- */
391: /*
392:    PCNNApplySchurToChunk -

394:    Input parameters:
395: .  pcnn
396: .  n - size of chunk
397: .  idx - indices of chunk
398: .  chunk - values

400:    Output parameters:
401: .  array_N - result of Schur complement applied to chunk, scattered to big array
402: .  vec1_B  - result of Schur complement applied to chunk
403: .  vec2_B  - garbage (used as work space)
404: .  vec1_D  - garbage (used as work space)
405: .  vec2_D  - garbage (used as work space)

407: */
408: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
409: {
411:   PetscInt       i;
412:   PC_IS          *pcis = (PC_IS*)(pc->data);

415:   PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
416:   for (i=0; i<n; i++) array_N[idx[i]] = chunk[i];
417:   PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
418:   PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
419:   PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
420:   return(0);
421: }

423: /* -------------------------------------------------------------------------- */
424: /*
425:    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
426:                                       the preconditioner for the Schur complement.

428:    Input parameter:
429: .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.

431:    Output parameters:
432: .  z - global vector of interior and interface nodes. The values on the interface are the result of
433:        the application of the interface preconditioner to the interface part of r. The values on the
434:        interior nodes are garbage.
435: .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
436: .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
437: .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
438: .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
439: .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
440: .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
441: .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
442: .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)

444: */
445: PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,Vec vec2_D, Vec vec1_N, Vec vec2_N)
446: {
448:   PC_IS          *pcis = (PC_IS*)(pc->data);

451:   /*
452:     First balancing step.
453:   */
454:   {
455:     PetscBool flg = PETSC_FALSE;
456:     PetscOptionsGetBool(NULL,NULL,"-pc_nn_turn_off_first_balancing",&flg,NULL);
457:     if (!flg) {
458:       PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
459:     } else {
460:       VecCopy(r,z);
461:     }
462:   }

464:   /*
465:     Extract the local interface part of z and scale it by D
466:   */
467:   VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
468:   VecScatterEnd  (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
469:   VecPointwiseMult(vec2_B,pcis->D,vec1_B);

471:   /* Neumann Solver */
472:   PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);

474:   /*
475:     Second balancing step.
476:   */
477:   {
478:     PetscBool flg = PETSC_FALSE;
479:     PetscOptionsGetBool(NULL,NULL,"-pc_turn_off_second_balancing",&flg,NULL);
480:     if (!flg) {
481:       PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
482:     } else {
483:       VecPointwiseMult(vec2_B,pcis->D,vec1_B);
484:       VecSet(z,0.0);
485:       VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
486:       VecScatterEnd  (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
487:     }
488:   }
489:   return(0);
490: }

492: /* -------------------------------------------------------------------------- */
493: /*
494:    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
495:                    input argument u is provided), or s, as given in equations
496:                    (12) and (13), if the input argument u is a null vector.
497:                    Notice that the input argument u plays the role of u_i in
498:                    equation (14). The equation numbers refer to [Man93].

500:    Input Parameters:
501: .  pcnn - NN preconditioner context.
502: .  r - MPI vector of all nodes (interior and interface). It's preserved.
503: .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.

505:    Output Parameters:
506: .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
507: .  vec1_B - Sequential vector of local interface nodes. Workspace.
508: .  vec2_B - Sequential vector of local interface nodes. Workspace.
509: .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
510: .  vec1_D - Sequential vector of local interior nodes. Workspace.
511: .  vec2_D - Sequential vector of local interior nodes. Workspace.
512: .  work_N - Array of all local nodes (interior and interface). Workspace.

514: */
515: PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
516: {
518:   PetscInt       k;
519:   PetscScalar    value;
520:   PetscScalar    *lambda;
521:   PC_NN          *pcnn = (PC_NN*)(pc->data);
522:   PC_IS          *pcis = (PC_IS*)(pc->data);

525:   PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);
526:   if (u) {
527:     if (!vec3_B) vec3_B = u;
528:     VecPointwiseMult(vec1_B,pcis->D,u);
529:     VecSet(z,0.0);
530:     VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
531:     VecScatterEnd  (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
532:     VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
533:     VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
534:     PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
535:     VecScale(vec3_B,-1.0);
536:     VecCopy(r,z);
537:     VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
538:     VecScatterEnd  (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
539:   } else {
540:     VecCopy(r,z);
541:   }
542:   VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
543:   VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
544:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
545:   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
546:   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
547:   {
548:     PetscMPIInt rank;
549:     MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
550:     VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
551:     /*
552:        Since we are only inserting local values (one value actually) we don't need to do the
553:        reduction that tells us there is no data that needs to be moved. Hence we comment out these
554:        VecAssemblyBegin(pcnn->coarse_b);
555:        VecAssemblyEnd  (pcnn->coarse_b);
556:     */
557:   }
558:   KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);
559:   if (!u) { VecScale(pcnn->coarse_x,-1.0); }
560:   VecGetArray(pcnn->coarse_x,&lambda);
561:   for (k=0; k<pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
562:   VecRestoreArray(pcnn->coarse_x,&lambda);
563:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
564:   VecSet(z,0.0);
565:   VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
566:   VecScatterEnd  (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
567:   if (!u) {
568:     VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
569:     VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
570:     PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
571:     VecCopy(r,z);
572:   }
573:   VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
574:   VecScatterEnd  (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
575:   PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);
576:   return(0);
577: }




582: /*  -------   E N D   O F   T H E   C O D E   -------  */
583: /*                                                     */
584: /*  From now on, "footnotes" (or "historical notes").  */
585: /*                                                     */
586: /*  -------------------------------------------------  */



590: /* --------------------------------------------------------------------------
591:    Historical note 01
592:    -------------------------------------------------------------------------- */
593: /*
594:    We considered the possibility of an alternative D_i that would still
595:    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
596:    The basic principle was still the pseudo-inverse of the counting
597:    function; the difference was that we would not count subdomains
598:    that do not contribute to the coarse space (i.e., not pure-Neumann
599:    subdomains).

601:    This turned out to be a bad idea:  we would solve trivial Neumann
602:    problems in the not pure-Neumann subdomains, since we would be scaling
603:    the balanced residual by zero.
604: */




609: /* --------------------------------------------------------------------------
610:    Historical note 02
611:    -------------------------------------------------------------------------- */
612: /*
613:    We tried an alternative coarse problem, that would eliminate exactly a
614:    constant error. Turned out not to improve the overall convergence.
615: */