Actual source code: bddc.c

petsc-3.4.5 2014-06-29
  1: /* TODOLIST
  2:    DofSplitting and DM attached to pc?
  3:    Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet)
  4:    change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment):
  5:      - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels?
  6:      - remove coarse enums and allow use of PCBDDCGetCoarseKSP
  7:      - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries?
  8:    code refactoring:
  9:      - pick up better names for static functions
 10:    change options structure:
 11:      - insert BDDC into MG framework?
 12:    provide other ops? Ask to developers
 13:    remove all unused printf
 14:    man pages
 15: */

 17: /* ----------------------------------------------------------------------------------------------------------------------------------------------
 18:    Implementation of BDDC preconditioner based on:
 19:    C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007
 20:    ---------------------------------------------------------------------------------------------------------------------------------------------- */

 22:  #include bddc.h
 23: #include <petscblaslapack.h>
 24: /* -------------------------------------------------------------------------- */
 27: PetscErrorCode PCSetFromOptions_BDDC(PC pc)
 28: {
 29:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

 33:   PetscOptionsHead("BDDC options");
 34:   /* Verbose debugging of main data structures */
 35:   PetscOptionsBool("-pc_bddc_check_all"       ,"Verbose (debugging) output for PCBDDC"                       ,"none",pcbddc->dbg_flag      ,&pcbddc->dbg_flag      ,NULL);
 36:   /* Some customization for default primal space */
 37:   PetscOptionsBool("-pc_bddc_vertices_only"   ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag   ,&pcbddc->vertices_flag   ,NULL);
 38:   PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,NULL);
 39:   PetscOptionsBool("-pc_bddc_faces_only"      ,"Use only faces among constraints of coarse space (i.e. discard edges)"         ,"none",pcbddc->faces_flag      ,&pcbddc->faces_flag      ,NULL);
 40:   PetscOptionsBool("-pc_bddc_edges_only"      ,"Use only edges among constraints of coarse space (i.e. discard faces)"         ,"none",pcbddc->edges_flag      ,&pcbddc->edges_flag      ,NULL);

 42:   /* Coarse solver context */
 43:   static const char * const avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel","CoarseProblemType","PC_BDDC_",0}; /* order of choiches depends on ENUM defined in bddc.h */
 44:   PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,NULL);

 46:   /* Two different application of BDDC to the whole set of dofs, internal and interface */
 47:   PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->inexact_prec_type,&pcbddc->inexact_prec_type,NULL);
 48:   PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,NULL);
 49:   PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,NULL);

 51:   pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis;

 53:   PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,NULL);
 54:   PetscOptionsInt("-pc_bddc_max_levels","Set maximum number of levels for multilevel","none",pcbddc->max_levels,&pcbddc->max_levels,NULL);
 55:   PetscOptionsTail();
 56:   return(0);
 57: }
 58: /* -------------------------------------------------------------------------- */

 62: static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT)
 63: {
 64:   PC_BDDC *pcbddc = (PC_BDDC*)pc->data;

 67:   pcbddc->coarse_problem_type = CPT;
 68:   return(0);
 69: }

 73: /*@
 74:  PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC.

 76:    Not collective

 78:    Input Parameters:
 79: +  pc - the preconditioning context
 80: -  CoarseProblemType - pick a better name and explain what this is

 82:    Level: intermediate

 84:    Notes:
 85:    Not collective but all procs must call with same arguments.

 87: .seealso: PCBDDC
 88: @*/
 89: PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT)
 90: {

 95:   PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));
 96:   return(0);
 97: }
 98: /* -------------------------------------------------------------------------- */
101: static PetscErrorCode PCBDDCSetCoarseningRatio_BDDC(PC pc,PetscInt k)
102: {
103:   PC_BDDC *pcbddc = (PC_BDDC*)pc->data;

106:   pcbddc->coarsening_ratio=k;
107:   return(0);
108: }

112: /*@
113:  PCBDDCSetCoarseningRatio - Set coarsening ratio used in multilevel coarsening

115:    Logically collective on PC

117:    Input Parameters:
118: +  pc - the preconditioning context
119: -  k - coarsening ratio

121:    Approximatively k subdomains at the finer level will be aggregated into a single subdomain at the coarser level.

123:    Level: intermediate

125:    Notes:

127: .seealso: PCBDDC
128: @*/
129: PetscErrorCode PCBDDCSetCoarseningRatio(PC pc,PetscInt k)
130: {

135:   PetscTryMethod(pc,"PCBDDCSetCoarseningRatio_C",(PC,PetscInt),(pc,k));
136:   return(0);
137: }
138: /* -------------------------------------------------------------------------- */

142: static PetscErrorCode PCBDDCSetMaxLevels_BDDC(PC pc,PetscInt max_levels)
143: {
144:   PC_BDDC *pcbddc = (PC_BDDC*)pc->data;

147:   pcbddc->max_levels=max_levels;
148:   return(0);
149: }

153: /*@
154:  PCBDDCSetMaxLevels - Sets the maximum number of levels within the multilevel approach.

156:    Logically collective on PC

158:    Input Parameters:
159: +  pc - the preconditioning context
160: -  max_levels - the maximum number of levels

162:    Default value is 1, i.e. coarse problem will be solved inexactly with one application
163:    of PCBDDC preconditioner if the multilevel approach is requested.

165:    Level: intermediate

167:    Notes:

169: .seealso: PCBDDC
170: @*/
171: PetscErrorCode PCBDDCSetMaxLevels(PC pc,PetscInt max_levels)
172: {

177:   PetscTryMethod(pc,"PCBDDCSetMaxLevels_C",(PC,PetscInt),(pc,max_levels));
178:   return(0);
179: }
180: /* -------------------------------------------------------------------------- */

184: static PetscErrorCode PCBDDCSetNullSpace_BDDC(PC pc,MatNullSpace NullSpace)
185: {
186:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

190:   PetscObjectReference((PetscObject)NullSpace);
191:   MatNullSpaceDestroy(&pcbddc->NullSpace);

193:   pcbddc->NullSpace = NullSpace;
194:   return(0);
195: }

199: /*@
200:  PCBDDCSetNullSpace - Set NullSpace of global operator of BDDC preconditioned mat.

202:    Logically collective on PC and MatNullSpace

204:    Input Parameters:
205: +  pc - the preconditioning context
206: -  NullSpace - Null space of the linear operator to be preconditioned.

208:    Level: intermediate

210:    Notes:

212: .seealso: PCBDDC
213: @*/
214: PetscErrorCode PCBDDCSetNullSpace(PC pc,MatNullSpace NullSpace)
215: {

220:   PetscTryMethod(pc,"PCBDDCSetNullSpace_C",(PC,MatNullSpace),(pc,NullSpace));
221:   return(0);
222: }
223: /* -------------------------------------------------------------------------- */

227: static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries)
228: {
229:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

233:   ISDestroy(&pcbddc->DirichletBoundaries);
234:   PetscObjectReference((PetscObject)DirichletBoundaries);

236:   pcbddc->DirichletBoundaries = DirichletBoundaries;
237:   return(0);
238: }

242: /*@
243:  PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering)
244:                               of Dirichlet boundaries for the global problem.

246:    Not collective

248:    Input Parameters:
249: +  pc - the preconditioning context
250: -  DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be NULL)

252:    Level: intermediate

254:    Notes:

256: .seealso: PCBDDC
257: @*/
258: PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries)
259: {

264:   PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));
265:   return(0);
266: }
267: /* -------------------------------------------------------------------------- */

271: static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries)
272: {
273:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

277:   ISDestroy(&pcbddc->NeumannBoundaries);
278:   PetscObjectReference((PetscObject)NeumannBoundaries);

280:   pcbddc->NeumannBoundaries = NeumannBoundaries;
281:   return(0);
282: }

286: /*@
287:  PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering)
288:                               of Neumann boundaries for the global problem.

290:    Not collective

292:    Input Parameters:
293: +  pc - the preconditioning context
294: -  NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be NULL)

296:    Level: intermediate

298:    Notes:

300: .seealso: PCBDDC
301: @*/
302: PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries)
303: {

308:   PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));
309:   return(0);
310: }
311: /* -------------------------------------------------------------------------- */

315: static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries)
316: {
317:   PC_BDDC *pcbddc = (PC_BDDC*)pc->data;

320:   *DirichletBoundaries = pcbddc->DirichletBoundaries;
321:   return(0);
322: }

326: /*@
327:  PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering)
328:                                 of Dirichlet boundaries for the global problem.

330:    Not collective

332:    Input Parameters:
333: +  pc - the preconditioning context

335:    Output Parameters:
336: +  DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries

338:    Level: intermediate

340:    Notes:

342: .seealso: PCBDDC
343: @*/
344: PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries)
345: {

350:   PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));
351:   return(0);
352: }
353: /* -------------------------------------------------------------------------- */

357: static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries)
358: {
359:   PC_BDDC *pcbddc = (PC_BDDC*)pc->data;

362:   *NeumannBoundaries = pcbddc->NeumannBoundaries;
363:   return(0);
364: }

368: /*@
369:  PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering)
370:                               of Neumann boundaries for the global problem.

372:    Not collective

374:    Input Parameters:
375: +  pc - the preconditioning context

377:    Output Parameters:
378: +  NeumannBoundaries - index set defining the subdomain part of Neumann boundaries

380:    Level: intermediate

382:    Notes:

384: .seealso: PCBDDC
385: @*/
386: PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries)
387: {

392:   PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));
393:   return(0);
394: }
395: /* -------------------------------------------------------------------------- */

399: static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode)
400: {
401:   PC_BDDC        *pcbddc  = (PC_BDDC*)pc->data;
402:   PCBDDCGraph    mat_graph=pcbddc->mat_graph;

406:   mat_graph->nvtxs=nvtxs;

408:   PetscFree(mat_graph->xadj);
409:   PetscFree(mat_graph->adjncy);
410:   if (copymode == PETSC_COPY_VALUES) {
411:     PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);
412:     PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);
413:     PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));
414:     PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));
415:   } else if (copymode == PETSC_OWN_POINTER) {
416:     mat_graph->xadj   = (PetscInt*)xadj;
417:     mat_graph->adjncy = (PetscInt*)adjncy;
418:   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d\n",copymode);
419:   return(0);
420: }

424: /*@
425:  PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC.

427:    Not collective

429:    Input Parameters:
430: +  pc - the preconditioning context
431: -  nvtxs - number of local vertices of the graph
432: -  xadj, adjncy - the CSR graph
433: -  copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in;
434:                                                              in the latter case, memory must be obtained with PetscMalloc.

436:    Level: intermediate

438:    Notes:

440: .seealso: PCBDDC
441: @*/
442: PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode)
443: {
444:   PetscInt       nrows,ncols;
445:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;

450:   MatGetSize(matis->A,&nrows,&ncols);
451:   if (nvtxs != nrows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,nrows);
452:   else {
453:     PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,const PetscInt[],const PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));
454:   }
455:   return(0);
456: }
457: /* -------------------------------------------------------------------------- */

461: static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[])
462: {
463:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
464:   PetscInt       i;

468:   /* Destroy ISes if they were already set */
469:   for (i=0; i<pcbddc->n_ISForDofs; i++) {
470:     ISDestroy(&pcbddc->ISForDofs[i]);
471:   }
472:   PetscFree(pcbddc->ISForDofs);
473:   /* allocate space then set */
474:   PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);
475:   for (i=0; i<n_is; i++) {
476:     PetscObjectReference((PetscObject)ISForDofs[i]);

478:     pcbddc->ISForDofs[i]=ISForDofs[i];
479:   }
480:   pcbddc->n_ISForDofs=n_is;
481:   return(0);
482: }

486: /*@
487:  PCBDDCSetDofsSplitting - Set index sets defining fields of local mat.

489:    Not collective

491:    Input Parameters:
492: +  pc - the preconditioning context
493: -  n - number of index sets defining the fields
494: -  IS[] - array of IS describing the fields

496:    Level: intermediate

498:    Notes:

500: .seealso: PCBDDC
501: @*/
502: PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[])
503: {

508:   PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));
509:   return(0);
510: }
511: /* -------------------------------------------------------------------------- */
514: /* -------------------------------------------------------------------------- */
515: /*
516:    PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial
517:                      guess if a transformation of basis approach has been selected.

519:    Input Parameter:
520: +  pc - the preconditioner contex

522:    Application Interface Routine: PCPreSolve()

524:    Notes:
525:    The interface routine PCPreSolve() is not usually called directly by
526:    the user, but instead is called by KSPSolve().
527: */
528: static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x)
529: {
531:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
532:   PC_IS          *pcis   = (PC_IS*)(pc->data);
533:   Mat_IS         *matis  = (Mat_IS*)pc->pmat->data;
534:   Mat            temp_mat;
535:   IS             dirIS;
536:   PetscInt       dirsize,i,*is_indices;
537:   PetscScalar    *array_x,*array_diagonal;
538:   Vec            used_vec;
539:   PetscBool      guess_nonzero;

542:   if (x) {
543:     PetscObjectReference((PetscObject)x);
544:     used_vec = x;
545:   } else {
546:     PetscObjectReference((PetscObject)pcbddc->temp_solution);
547:     used_vec = pcbddc->temp_solution;
548:     VecSet(used_vec,0.0);
549:   }
550:   /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */
551:   if (ksp) {
552:     KSPGetInitialGuessNonzero(ksp,&guess_nonzero);
553:     if (!guess_nonzero) {
554:       VecSet(used_vec,0.0);
555:     }
556:   }
557:   /* store the original rhs */
558:   VecCopy(rhs,pcbddc->original_rhs);

560:   /* Take into account zeroed rows -> change rhs and store solution removed */
561:   MatGetDiagonal(pc->pmat,pcis->vec1_global);
562:   VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);
563:   VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
564:   VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
565:   VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
566:   VecScatterEnd  (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
567:   PCBDDCGetDirichletBoundaries(pc,&dirIS);
568:   if (dirIS) {
569:     ISGetSize(dirIS,&dirsize);
570:     VecGetArray(pcis->vec1_N,&array_x);
571:     VecGetArray(pcis->vec2_N,&array_diagonal);
572:     ISGetIndices(dirIS,(const PetscInt**)&is_indices);
573:     for (i=0; i<dirsize; i++) array_x[is_indices[i]] = array_diagonal[is_indices[i]];

575:     ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);
576:     VecRestoreArray(pcis->vec2_N,&array_diagonal);
577:     VecRestoreArray(pcis->vec1_N,&array_x);
578:   }
579:   VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);
580:   VecScatterEnd  (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);

582:   /* remove the computed solution from the rhs */
583:   VecScale(used_vec,-1.0);
584:   MatMultAdd(pc->pmat,used_vec,rhs,rhs);
585:   VecScale(used_vec,-1.0);

587:   /* store partially computed solution and set initial guess */
588:   if (x) {
589:     VecCopy(used_vec,pcbddc->temp_solution);
590:     VecSet(used_vec,0.0);
591:     if (pcbddc->use_exact_dirichlet) {
592:       VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
593:       VecScatterEnd  (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
594:       KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
595:       VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);
596:       VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);
597:       if (ksp) {
598:         KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);
599:       }
600:     }
601:   }

603:   /* rhs change of basis */
604:   if (pcbddc->usechangeofbasis) {
605:     /* swap pointers for local matrices */
606:     temp_mat          = matis->A;
607:     matis->A          = pcbddc->local_mat;
608:     pcbddc->local_mat = temp_mat;
609:     /* Get local rhs and apply transformation of basis */
610:     VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
611:     VecScatterEnd  (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
612:     /* from original basis to modified basis */
613:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);
614:     /* put back modified values into the global vec using INSERT_VALUES copy mode */
615:     VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);
616:     VecScatterEnd  (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);
617:     if (ksp && pcbddc->NullSpace) {
618:       MatNullSpaceRemove(pcbddc->NullSpace,used_vec,NULL);
619:       MatNullSpaceRemove(pcbddc->NullSpace,rhs,NULL);
620:     }
621:   }
622:   VecDestroy(&used_vec);
623:   return(0);
624: }
625: /* -------------------------------------------------------------------------- */
628: /* -------------------------------------------------------------------------- */
629: /*
630:    PCPostSolve_BDDC - Changes the computed solution if a transformation of basis
631:                      approach has been selected. Also, restores rhs to its original state.

633:    Input Parameter:
634: +  pc - the preconditioner contex

636:    Application Interface Routine: PCPostSolve()

638:    Notes:
639:    The interface routine PCPostSolve() is not usually called directly by
640:    the user, but instead is called by KSPSolve().
641: */
642: static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x)
643: {
645:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
646:   PC_IS          *pcis   = (PC_IS*)(pc->data);
647:   Mat_IS         *matis  = (Mat_IS*)pc->pmat->data;
648:   Mat            temp_mat;

651:   if (pcbddc->usechangeofbasis) {
652:     /* swap pointers for local matrices */
653:     temp_mat          = matis->A;
654:     matis->A          = pcbddc->local_mat;
655:     pcbddc->local_mat = temp_mat;
656:     /* restore rhs to its original state */
657:     if (rhs) {
658:       VecCopy(pcbddc->original_rhs,rhs);
659:     }
660:     /* Get Local boundary and apply transformation of basis to solution vector */
661:     VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
662:     VecScatterEnd  (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
663:     /* from modified basis to original basis */
664:     MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);
665:     /* put back modified values into the global vec using INSERT_VALUES copy mode */
666:     VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);
667:     VecScatterEnd  (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);
668:   }
669:   /* add solution removed in presolve */
670:   if (x) {
671:     VecAXPY(x,1.0,pcbddc->temp_solution);
672:   }
673:   return(0);
674: }
675: /* -------------------------------------------------------------------------- */
678: /* -------------------------------------------------------------------------- */
679: /*
680:    PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner
681:                   by setting data structures and options.

683:    Input Parameter:
684: +  pc - the preconditioner context

686:    Application Interface Routine: PCSetUp()

688:    Notes:
689:    The interface routine PCSetUp() is not usually called directly by
690:    the user, but instead is called by PCApply() if necessary.
691: */
692: PetscErrorCode PCSetUp_BDDC(PC pc)
693: {
695:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

698:   if (!pc->setupcalled) {
699:     /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup
700:        So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation
701:        Also, we decide to directly build the (same) Dirichlet problem */
702:     PetscOptionsSetValue("-is_localN_pc_type","none");
703:     PetscOptionsSetValue("-is_localD_pc_type","none");
704:     /* Set up all the "iterative substructuring" common block */

706:     PCISSetUp(pc);
707:     /* Get stdout for dbg */
708:     if (pcbddc->dbg_flag) {
709:       PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)pc),&pcbddc->dbg_viewer);
710:       PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
711:     }
712:     /* Analyze local interface */
713:     PCBDDCManageLocalBoundaries(pc);
714:     /* Set up local constraint matrix */
715:     PCBDDCCreateConstraintMatrix(pc);
716:     /* Create coarse and local stuffs used for evaluating action of preconditioner */
717:     PCBDDCCoarseSetUp(pc);
718:   }
719:   return(0);
720: }

722: /* -------------------------------------------------------------------------- */
723: /*
724:    PCApply_BDDC - Applies the BDDC preconditioner to a vector.

726:    Input Parameters:
727: .  pc - the preconditioner context
728: .  r - input vector (global)

730:    Output Parameter:
731: .  z - output vector (global)

733:    Application Interface Routine: PCApply()
734:  */
737: PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z)
738: {
739:   PC_IS             *pcis   = (PC_IS*)(pc->data);
740:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
741:   PetscErrorCode    ierr;
742:   const PetscScalar one   = 1.0;
743:   const PetscScalar m_one = -1.0;
744:   const PetscScalar zero  = 0.0;

746: /* This code is similar to that provided in nn.c for PCNN
747:    NN interface preconditioner changed to BDDC
748:    Added support for M_3 preconditioner in the reference article (code is active if pcbddc->inexact_prec_type = PETSC_TRUE) */

751:   if (!pcbddc->use_exact_dirichlet) {
752:     /* First Dirichlet solve */
753:     VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
754:     VecScatterEnd  (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
755:     KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
756:     /*
757:       Assembling right hand side for BDDC operator
758:       - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE)
759:       - the interface part of the global vector z
760:     */
761:     VecScale(pcis->vec2_D,m_one);
762:     MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
763:     if (pcbddc->inexact_prec_type) { MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D); }
764:     VecScale(pcis->vec2_D,m_one);
765:     VecCopy(r,z);
766:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
767:     VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
768:     VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
769:     VecScatterEnd  (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
770:   } else {
771:     VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
772:     VecScatterEnd  (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
773:     VecSet(pcis->vec1_D,zero);
774:     VecSet(pcis->vec2_D,zero);
775:   }

777:   /* Apply partition of unity */
778:   VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);

780:   /* Apply interface preconditioner
781:      input/output vecs: pcis->vec1_B and pcis->vec1_D */
782:   PCBDDCApplyInterfacePreconditioner(pc);

784:   /* Apply partition of unity and sum boundary values */
785:   VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);
786:   VecSet(z,zero);
787:   VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
788:   VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);

790:   /* Second Dirichlet solve and assembling of output */
791:   VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
792:   VecScatterEnd  (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
793:   MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);
794:   if (pcbddc->inexact_prec_type) { MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D); }
795:   KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);
796:   VecScale(pcbddc->vec4_D,m_one);
797:   if (pcbddc->inexact_prec_type) { VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D); }
798:   VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);
799:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
800:   VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
801:   return(0);

803: }
804: /* -------------------------------------------------------------------------- */
807: PetscErrorCode PCDestroy_BDDC(PC pc)
808: {
809:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
810:   PetscInt       i;

814:   /* free data created by PCIS */
815:   PCISDestroy(pc);
816:   /* free BDDC data  */
817:   MatNullSpaceDestroy(&pcbddc->CoarseNullSpace);
818:   MatNullSpaceDestroy(&pcbddc->NullSpace);
819:   VecDestroy(&pcbddc->temp_solution);
820:   VecDestroy(&pcbddc->original_rhs);
821:   MatDestroy(&pcbddc->local_mat);
822:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
823:   VecDestroy(&pcbddc->coarse_vec);
824:   VecDestroy(&pcbddc->coarse_rhs);
825:   KSPDestroy(&pcbddc->coarse_ksp);
826:   MatDestroy(&pcbddc->coarse_mat);
827:   MatDestroy(&pcbddc->coarse_phi_B);
828:   MatDestroy(&pcbddc->coarse_phi_D);
829:   VecDestroy(&pcbddc->vec1_P);
830:   VecDestroy(&pcbddc->vec1_C);
831:   MatDestroy(&pcbddc->local_auxmat1);
832:   MatDestroy(&pcbddc->local_auxmat2);
833:   VecDestroy(&pcbddc->vec1_R);
834:   VecDestroy(&pcbddc->vec2_R);
835:   VecDestroy(&pcbddc->vec4_D);
836:   VecScatterDestroy(&pcbddc->R_to_B);
837:   VecScatterDestroy(&pcbddc->R_to_D);
838:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
839:   KSPDestroy(&pcbddc->ksp_D);
840:   KSPDestroy(&pcbddc->ksp_R);
841:   ISDestroy(&pcbddc->NeumannBoundaries);
842:   ISDestroy(&pcbddc->DirichletBoundaries);
843:   MatDestroy(&pcbddc->ConstraintMatrix);
844:   PetscFree(pcbddc->local_primal_indices);
845:   PetscFree(pcbddc->replicated_local_primal_indices);
846:   PetscFree(pcbddc->replicated_local_primal_values);
847:   PetscFree(pcbddc->local_primal_displacements);
848:   PetscFree(pcbddc->local_primal_sizes);
849:   for (i=0; i<pcbddc->n_ISForDofs; i++) {
850:     ISDestroy(&pcbddc->ISForDofs[i]);
851:   }
852:   PetscFree(pcbddc->ISForDofs);
853:   for (i=0; i<pcbddc->n_ISForFaces; i++) {
854:     ISDestroy(&pcbddc->ISForFaces[i]);
855:   }
856:   PetscFree(pcbddc->ISForFaces);
857:   for (i=0; i<pcbddc->n_ISForEdges; i++) {
858:     ISDestroy(&pcbddc->ISForEdges[i]);
859:   }
860:   PetscFree(pcbddc->ISForEdges);
861:   ISDestroy(&pcbddc->ISForVertices);
862:   /* Free graph structure */
863:   PetscFree(pcbddc->mat_graph->xadj);
864:   PetscFree(pcbddc->mat_graph->adjncy);
865:   if (pcbddc->mat_graph->nvtxs) {
866:     PetscFree(pcbddc->mat_graph->neighbours_set[0]);
867:   }
868:   PetscFree(pcbddc->mat_graph->neighbours_set);
869:   PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);
870:   PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);
871:   PetscFree(pcbddc->mat_graph->where_ncmps);
872:   PetscFree(pcbddc->mat_graph);
873:   /* remove functions */
874:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C",NULL);
875:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C",NULL);
876:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C",NULL);
877:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C",NULL);
878:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C",NULL);
879:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C",NULL);
880:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C",NULL);
881:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C",NULL);
882:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C",NULL);
883:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C",NULL);
884:   PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C",NULL);
885:   PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C",NULL);
886:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C",NULL);
887:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C",NULL);
888:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C",NULL);
889:   /* Free the private data structure that was hanging off the PC */
890:   PetscFree(pcbddc);
891:   return(0);
892: }
893: /* -------------------------------------------------------------------------- */

897: static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs)
898: {
899:   FETIDPMat_ctx  *mat_ctx;
900:   PC_IS          * pcis;
901:   PC_BDDC        * pcbddc;

905:   MatShellGetContext(fetidp_mat,&mat_ctx);
906:   pcis   = (PC_IS*)mat_ctx->pc->data;
907:   pcbddc = (PC_BDDC*)mat_ctx->pc->data;

909:   /* change of basis for physical rhs if needed
910:      It also changes the rhs in case of dirichlet boundaries */
911:   (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,NULL,standard_rhs,NULL);
912:   /* store vectors for computation of fetidp final solution */
913:   VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);
914:   VecScatterEnd  (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);
915:   VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);
916:   VecScatterEnd  (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);
917:   /* scale rhs since it should be unassembled */
918:   VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);
919:   if (!pcbddc->inexact_prec_type) {
920:     /* compute partially subassembled Schur complement right-hand side */
921:     KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);
922:     MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);
923:     VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);
924:     VecSet(standard_rhs,0.0);
925:     VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);
926:     VecScatterEnd  (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);
927:     VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);
928:     VecScatterEnd  (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);
929:     VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);
930:   }
931:   /* BDDC rhs */
932:   VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);
933:   if (pcbddc->inexact_prec_type) {
934:     VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);
935:   }
936:   /* apply BDDC */
937:   PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);
938:   /* Application of B_delta and assembling of rhs for fetidp fluxes */
939:   VecSet(fetidp_flux_rhs,0.0);
940:   MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);
941:   VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);
942:   VecScatterEnd  (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);
943:   /* restore original rhs */
944:   VecCopy(pcbddc->original_rhs,standard_rhs);
945:   return(0);
946: }

950: /*@
951:  PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system.

953:    Collective

955:    Input Parameters:
956: +  fetidp_mat   - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators
957: +  standard_rhs - the rhs of your linear system

959:    Output Parameters:
960: +  fetidp_flux_rhs   - the rhs of the FETIDP linear system

962:    Level: developer

964:    Notes:

966: .seealso: PCBDDC
967: @*/
968: PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs)
969: {
970:   FETIDPMat_ctx  *mat_ctx;

974:   MatShellGetContext(fetidp_mat,&mat_ctx);
975:   PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));
976:   return(0);
977: }
978: /* -------------------------------------------------------------------------- */

982: static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol)
983: {
984:   FETIDPMat_ctx  *mat_ctx;
985:   PC_IS          *pcis;
986:   PC_BDDC        *pcbddc;

990:   MatShellGetContext(fetidp_mat,&mat_ctx);
991:   pcis   = (PC_IS*)mat_ctx->pc->data;
992:   pcbddc = (PC_BDDC*)mat_ctx->pc->data;

994:   /* apply B_delta^T */
995:   VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
996:   VecScatterEnd  (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
997:   MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);
998:   /* compute rhs for BDDC application */
999:   VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);
1000:   if (pcbddc->inexact_prec_type) {
1001:     VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);
1002:   }
1003:   /* apply BDDC */
1004:   PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);
1005:   /* put values into standard global vector */
1006:   VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);
1007:   VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);
1008:   if (!pcbddc->inexact_prec_type) {
1009:     /* compute values into the interior if solved for the partially subassembled Schur complement */
1010:     MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);
1011:     VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);
1012:     KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);
1013:   }
1014:   VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);
1015:   VecScatterEnd  (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);
1016:   /* final change of basis if needed
1017:      Is also sums the dirichlet part removed during RHS assembling */
1018:   (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,NULL,NULL,standard_sol);
1019:   return(0);

1021: }

1025: /*@
1026:  PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system.

1028:    Collective

1030:    Input Parameters:
1031: +  fetidp_mat        - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators
1032: +  fetidp_flux_sol - the solution of the FETIDP linear system

1034:    Output Parameters:
1035: +  standard_sol      - the solution on the global domain

1037:    Level: developer

1039:    Notes:

1041: .seealso: PCBDDC
1042: @*/
1043: PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol)
1044: {
1045:   FETIDPMat_ctx  *mat_ctx;

1049:   MatShellGetContext(fetidp_mat,&mat_ctx);
1050:   PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));
1051:   return(0);
1052: }
1053: /* -------------------------------------------------------------------------- */

1055: extern PetscErrorCode FETIDPMatMult(Mat,Vec,Vec);
1056: extern PetscErrorCode PCBDDCDestroyFETIDPMat(Mat);
1057: extern PetscErrorCode FETIDPPCApply(PC,Vec,Vec);
1058: extern PetscErrorCode PCBDDCDestroyFETIDPPC(PC);
1061: static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc)
1062: {
1063:   FETIDPMat_ctx  *fetidpmat_ctx;
1064:   Mat            newmat;
1065:   FETIDPPC_ctx   *fetidppc_ctx;
1066:   PC             newpc;
1067:   MPI_Comm       comm;

1071:   PetscObjectGetComm((PetscObject)pc,&comm);
1072:   /* FETIDP linear matrix */
1073:   PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);
1074:   PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);
1075:   MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);
1076:   MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);
1077:   MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);
1078:   MatSetUp(newmat);
1079:   /* FETIDP preconditioner */
1080:   PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);
1081:   PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);
1082:   PCCreate(comm,&newpc);
1083:   PCSetType(newpc,PCSHELL);
1084:   PCShellSetContext(newpc,fetidppc_ctx);
1085:   PCShellSetApply(newpc,FETIDPPCApply);
1086:   PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);
1087:   PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);
1088:   PCSetUp(newpc);

1090:   /* return pointers for objects created */
1091:   *fetidp_mat = newmat;
1092:   *fetidp_pc  = newpc;
1093:   return(0);
1094: }

1098: /*@
1099:  PCBDDCCreateFETIDPOperators - Create operators for FETIDP.

1101:    Collective

1103:    Input Parameters:
1104: +  pc - the BDDC preconditioning context (setup must be already called)

1106:    Level: developer

1108:    Notes:

1110: .seealso: PCBDDC
1111: @*/
1112: PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc)
1113: {

1118:   if (pc->setupcalled) {
1119:     PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));
1120:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC() first \n");
1121:   return(0);
1122: }
1123: /* -------------------------------------------------------------------------- */
1124: /*MC
1125:    PCBDDC - Balancing Domain Decomposition by Constraints.

1127:    Options Database Keys:
1128: .    -pcbddc ??? -

1130:    Level: intermediate

1132:    Notes: The matrix used with this preconditioner must be of type MATIS

1134:           Unlike more 'conventional' interface preconditioners, this iterates over ALL the
1135:           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
1136:           on the subdomains).

1138:           Options for the coarse grid preconditioner can be set with -
1139:           Options for the Dirichlet subproblem can be set with -
1140:           Options for the Neumann subproblem can be set with -

1142:    Contributed by Stefano Zampini

1144: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MATIS
1145: M*/

1149: PETSC_EXTERN PetscErrorCode PCCreate_BDDC(PC pc)
1150: {
1152:   PC_BDDC        *pcbddc;
1153:   PCBDDCGraph    mat_graph;

1156:   /* Creates the private data structure for this preconditioner and attach it to the PC object. */
1157:   PetscNewLog(pc,PC_BDDC,&pcbddc);
1158:   pc->data = (void*)pcbddc;

1160:   /* create PCIS data structure */
1161:   PCISCreate(pc);

1163:   /* BDDC specific */
1164:   pcbddc->CoarseNullSpace                 = 0;
1165:   pcbddc->NullSpace                       = 0;
1166:   pcbddc->temp_solution                   = 0;
1167:   pcbddc->original_rhs                    = 0;
1168:   pcbddc->local_mat                       = 0;
1169:   pcbddc->ChangeOfBasisMatrix             = 0;
1170:   pcbddc->usechangeofbasis                = PETSC_TRUE;
1171:   pcbddc->usechangeonfaces                = PETSC_FALSE;
1172:   pcbddc->coarse_vec                      = 0;
1173:   pcbddc->coarse_rhs                      = 0;
1174:   pcbddc->coarse_ksp                      = 0;
1175:   pcbddc->coarse_phi_B                    = 0;
1176:   pcbddc->coarse_phi_D                    = 0;
1177:   pcbddc->vec1_P                          = 0;
1178:   pcbddc->vec1_R                          = 0;
1179:   pcbddc->vec2_R                          = 0;
1180:   pcbddc->local_auxmat1                   = 0;
1181:   pcbddc->local_auxmat2                   = 0;
1182:   pcbddc->R_to_B                          = 0;
1183:   pcbddc->R_to_D                          = 0;
1184:   pcbddc->ksp_D                           = 0;
1185:   pcbddc->ksp_R                           = 0;
1186:   pcbddc->local_primal_indices            = 0;
1187:   pcbddc->inexact_prec_type               = PETSC_FALSE;
1188:   pcbddc->NeumannBoundaries               = 0;
1189:   pcbddc->ISForDofs                       = 0;
1190:   pcbddc->ISForVertices                   = 0;
1191:   pcbddc->n_ISForFaces                    = 0;
1192:   pcbddc->n_ISForEdges                    = 0;
1193:   pcbddc->ConstraintMatrix                = 0;
1194:   pcbddc->use_nnsp_true                   = PETSC_FALSE;
1195:   pcbddc->local_primal_sizes              = 0;
1196:   pcbddc->local_primal_displacements      = 0;
1197:   pcbddc->replicated_local_primal_indices = 0;
1198:   pcbddc->replicated_local_primal_values  = 0;
1199:   pcbddc->coarse_loc_to_glob              = 0;
1200:   pcbddc->dbg_flag                        = PETSC_FALSE;
1201:   pcbddc->coarsening_ratio                = 8;
1202:   pcbddc->use_exact_dirichlet             = PETSC_TRUE;
1203:   pcbddc->current_level                   = 0;
1204:   pcbddc->max_levels                      = 1;

1206:   /* allocate and initialize needed graph structure */
1207:   PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);
1208:   pcbddc->mat_graph->xadj   = 0;
1209:   pcbddc->mat_graph->adjncy = 0;

1211:   /* function pointers */
1212:   pc->ops->apply               = PCApply_BDDC;
1213:   pc->ops->applytranspose      = 0;
1214:   pc->ops->setup               = PCSetUp_BDDC;
1215:   pc->ops->destroy             = PCDestroy_BDDC;
1216:   pc->ops->setfromoptions      = PCSetFromOptions_BDDC;
1217:   pc->ops->view                = 0;
1218:   pc->ops->applyrichardson     = 0;
1219:   pc->ops->applysymmetricleft  = 0;
1220:   pc->ops->applysymmetricright = 0;
1221:   pc->ops->presolve            = PCPreSolve_BDDC;
1222:   pc->ops->postsolve           = PCPostSolve_BDDC;

1224:   /* composing function */
1225:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C",PCBDDCSetCoarseningRatio_BDDC);
1226:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C",PCBDDCSetMaxLevels_BDDC);
1227:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C",PCBDDCSetNullSpace_BDDC);
1228:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C",PCBDDCSetDirichletBoundaries_BDDC);
1229:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C",PCBDDCSetNeumannBoundaries_BDDC);
1230:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C",PCBDDCGetDirichletBoundaries_BDDC);
1231:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C",PCBDDCGetNeumannBoundaries_BDDC);
1232:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C",PCBDDCSetCoarseProblemType_BDDC);
1233:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C",PCBDDCSetDofsSplitting_BDDC);
1234:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C",PCBDDCSetLocalAdjacencyGraph_BDDC);
1235:   PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C",PCPreSolve_BDDC);
1236:   PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C",PCPostSolve_BDDC);
1237:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C",PCBDDCCreateFETIDPOperators_BDDC);
1238:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C",PCBDDCMatFETIDPGetRHS_BDDC);
1239:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C",PCBDDCMatFETIDPGetSolution_BDDC);
1240:   return(0);
1241: }

1243: /* -------------------------------------------------------------------------- */
1244: /* All static functions from now on                                           */
1245: /* -------------------------------------------------------------------------- */
1248: static PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC pc,Vec x,Vec y)
1249: {
1250:   NullSpaceCorrection_ctx *pc_ctx;
1251:   PetscErrorCode          ierr;

1254:   PCShellGetContext(pc,(void**)&pc_ctx);
1255:   /* E */
1256:   MatMultTranspose(pc_ctx->Lbasis_mat,x,pc_ctx->work_small_2);
1257:   MatMultAdd(pc_ctx->Kbasis_mat,pc_ctx->work_small_2,x,pc_ctx->work_full_1);
1258:   /* P^-1 */
1259:   PCApply(pc_ctx->local_pc,pc_ctx->work_full_1,pc_ctx->work_full_2);
1260:   /* E^T */
1261:   MatMultTranspose(pc_ctx->Kbasis_mat,pc_ctx->work_full_2,pc_ctx->work_small_1);
1262:   VecScale(pc_ctx->work_small_1,-1.0);
1263:   MatMultAdd(pc_ctx->Lbasis_mat,pc_ctx->work_small_1,pc_ctx->work_full_2,pc_ctx->work_full_1);
1264:   /* Sum contributions */
1265:   MatMultAdd(pc_ctx->basis_mat,pc_ctx->work_small_2,pc_ctx->work_full_1,y);
1266:   return(0);
1267: }

1271: static PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC pc)
1272: {
1273:   NullSpaceCorrection_ctx *pc_ctx;
1274:   PetscErrorCode          ierr;

1277:   PCShellGetContext(pc,(void**)&pc_ctx);
1278:   VecDestroy(&pc_ctx->work_small_1);
1279:   VecDestroy(&pc_ctx->work_small_2);
1280:   VecDestroy(&pc_ctx->work_full_1);
1281:   VecDestroy(&pc_ctx->work_full_2);
1282:   MatDestroy(&pc_ctx->basis_mat);
1283:   MatDestroy(&pc_ctx->Lbasis_mat);
1284:   MatDestroy(&pc_ctx->Kbasis_mat);
1285:   PCDestroy(&pc_ctx->local_pc);
1286:   PetscFree(pc_ctx);
1287:   return(0);
1288: }

1292: static PetscErrorCode PCBDDCAdaptLocalProblem(PC pc,IS local_dofs)
1293: {
1294:   extern PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC,Vec,Vec);
1295:   extern PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC);

1297:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
1298:   PC_IS                   *pcis   = (PC_IS*)pc->data;
1299:   Mat_IS                  * matis = (Mat_IS*)pc->pmat->data;
1300:   KSP                     *local_ksp;
1301:   PC                      newpc;
1302:   NullSpaceCorrection_ctx *shell_ctx;
1303:   Mat                     local_mat,local_pmat,small_mat,inv_small_mat;
1304:   MatStructure            local_mat_struct;
1305:   Vec                     work1,work2,work3;
1306:   const Vec               *nullvecs;
1307:   VecScatter              scatter_ctx;
1308:   IS                      is_aux;
1309:   MatFactorInfo           matinfo;
1310:   PetscScalar             *basis_mat,*Kbasis_mat,*array,*array_mat;
1311:   PetscScalar             one = 1.0,zero = 0.0, m_one = -1.0;
1312:   PetscInt                basis_dofs,basis_size,nnsp_size,i,k,n_I,n_R;
1313:   PetscBool               nnsp_has_cnst;
1314:   PetscErrorCode          ierr;

1317:   /* Infer the local solver */
1318:   ISGetSize(local_dofs,&basis_dofs);
1319:   VecGetSize(pcis->vec1_D,&n_I);
1320:   VecGetSize(pcbddc->vec1_R,&n_R);
1321:   if (basis_dofs == n_I) {
1322:     /* Dirichlet solver */
1323:     local_ksp = &pcbddc->ksp_D;
1324:   } else if (basis_dofs == n_R) {
1325:     /* Neumann solver */
1326:     local_ksp = &pcbddc->ksp_R;
1327:   } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in %s: unknown local IS size %d. n_I=%d, n_R=%d)\n",basis_dofs,n_I,n_R);
1328:   KSPGetOperators(*local_ksp,&local_mat,&local_pmat,&local_mat_struct);

1330:   /* Get null space vecs */
1331:   MatNullSpaceGetVecs(pcbddc->NullSpace,&nnsp_has_cnst,&nnsp_size,&nullvecs);
1332:   basis_size = nnsp_size;
1333:   if (nnsp_has_cnst) basis_size++;

1335:   /* Create shell ctx */
1336:   PetscMalloc(sizeof(*shell_ctx),&shell_ctx);

1338:   /* Create work vectors in shell context */
1339:   VecCreate(PETSC_COMM_SELF,&shell_ctx->work_small_1);
1340:   VecSetSizes(shell_ctx->work_small_1,basis_size,basis_size);
1341:   VecSetType(shell_ctx->work_small_1,VECSEQ);
1342:   VecDuplicate(shell_ctx->work_small_1,&shell_ctx->work_small_2);
1343:   VecCreate(PETSC_COMM_SELF,&shell_ctx->work_full_1);
1344:   VecSetSizes(shell_ctx->work_full_1,basis_dofs,basis_dofs);
1345:   VecSetType(shell_ctx->work_full_1,VECSEQ);
1346:   VecDuplicate(shell_ctx->work_full_1,&shell_ctx->work_full_2);

1348:   /* Allocate workspace */
1349:   MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->basis_mat);
1350:   MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->Kbasis_mat);
1351:   MatDenseGetArray(shell_ctx->basis_mat,&basis_mat);
1352:   MatDenseGetArray(shell_ctx->Kbasis_mat,&Kbasis_mat);

1354:   /* Restrict local null space on selected dofs (Dirichlet or Neumann)
1355:      and compute matrices N and K*N */
1356:   VecDuplicate(shell_ctx->work_full_1,&work1);
1357:   VecDuplicate(shell_ctx->work_full_1,&work2);
1358:   VecScatterCreate(pcis->vec1_N,local_dofs,work1,(IS)0,&scatter_ctx);
1359:   k    = 0;
1360:   for (; k<nnsp_size; k++) {
1361:     VecScatterBegin(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1362:     VecScatterEnd(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1363:     VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);
1364:     VecScatterBegin(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);
1365:     VecScatterEnd(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);
1366:     VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);
1367:     MatMult(local_mat,work1,work2);
1368:     VecResetArray(work1);
1369:     VecResetArray(work2);
1370:   }
1371:   if (nnsp_has_cnst) {
1372:     VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);
1373:     VecSet(work1,one);
1374:     VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);
1375:     MatMult(local_mat,work1,work2);
1376:     VecResetArray(work1);
1377:     VecResetArray(work2);
1378:   }
1379:   VecDestroy(&work1);
1380:   VecDestroy(&work2);
1381:   VecScatterDestroy(&scatter_ctx);
1382:   MatDenseRestoreArray(shell_ctx->basis_mat,&basis_mat);
1383:   MatDenseRestoreArray(shell_ctx->Kbasis_mat,&Kbasis_mat);

1385:   /* Assemble another Mat object in shell context */
1386:   MatTransposeMatMult(shell_ctx->basis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&small_mat);
1387:   MatFactorInfoInitialize(&matinfo);
1388:   ISCreateStride(PETSC_COMM_SELF,basis_size,0,1,&is_aux);
1389:   MatLUFactor(small_mat,is_aux,is_aux,&matinfo);
1390:   ISDestroy(&is_aux);
1391:   PetscMalloc(basis_size*basis_size*sizeof(PetscScalar),&array_mat);
1392:   for (k=0; k<basis_size; k++) {
1393:     VecSet(shell_ctx->work_small_1,zero);
1394:     VecSetValue(shell_ctx->work_small_1,k,one,INSERT_VALUES);
1395:     VecAssemblyBegin(shell_ctx->work_small_1);
1396:     VecAssemblyEnd(shell_ctx->work_small_1);
1397:     MatSolve(small_mat,shell_ctx->work_small_1,shell_ctx->work_small_2);
1398:     VecGetArray(shell_ctx->work_small_2,&array);
1399:     for (i=0; i<basis_size; i++) array_mat[i*basis_size+k]=array[i];
1400:     VecRestoreArray(shell_ctx->work_small_2,&array);
1401:   }
1402:   MatCreateSeqDense(PETSC_COMM_SELF,basis_size,basis_size,array_mat,&inv_small_mat);
1403:   MatMatMult(shell_ctx->basis_mat,inv_small_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&shell_ctx->Lbasis_mat);
1404:   PetscFree(array_mat);
1405:   MatDestroy(&inv_small_mat);
1406:   MatDestroy(&small_mat);
1407:   MatScale(shell_ctx->Kbasis_mat,m_one);

1409:   /* Rebuild local PC */
1410:   KSPGetPC(*local_ksp,&shell_ctx->local_pc);
1411:   PetscObjectReference((PetscObject)shell_ctx->local_pc);
1412:   PCCreate(PETSC_COMM_SELF,&newpc);
1413:   PCSetOperators(newpc,local_mat,local_mat,SAME_PRECONDITIONER);
1414:   PCSetType(newpc,PCSHELL);
1415:   PCShellSetContext(newpc,shell_ctx);
1416:   PCShellSetApply(newpc,PCBDDCApplyNullSpaceCorrectionPC);
1417:   PCShellSetDestroy(newpc,PCBDDCDestroyNullSpaceCorrectionPC);
1418:   PCSetUp(newpc);
1419:   KSPSetPC(*local_ksp,newpc);
1420:   PCDestroy(&newpc);
1421:   KSPSetUp(*local_ksp);

1423:   /* test */
1424:   if (pcbddc->dbg_flag) {
1425:     PetscReal   test_err;
1426:     KSP         check_ksp;
1427:     PC          check_pc;
1428:     PetscReal   lambda_min,lambda_max;
1429:     Mat         test_mat;
1430:     PetscViewer viewer=pcbddc->dbg_viewer;
1431:     PetscBool   setsym,issym=PETSC_FALSE;

1433:     KSPGetPC(*local_ksp,&check_pc);
1434:     VecDuplicate(shell_ctx->work_full_1,&work1);
1435:     VecDuplicate(shell_ctx->work_full_1,&work2);
1436:     VecDuplicate(shell_ctx->work_full_1,&work3);
1437:     VecSetRandom(shell_ctx->work_small_1,NULL);
1438:     MatMult(shell_ctx->basis_mat,shell_ctx->work_small_1,work1);
1439:     VecCopy(work1,work2);
1440:     MatMult(local_mat,work1,work3);
1441:     PCApply(check_pc,work3,work1);
1442:     VecAXPY(work1,m_one,work2);
1443:     VecNorm(work1,NORM_INFINITY,&test_err);
1444:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace correction for ",PetscGlobalRank);
1445:     if (basis_dofs == n_I) {
1446:       PetscViewerASCIISynchronizedPrintf(viewer,"Dirichlet ");
1447:     } else {
1448:       PetscViewerASCIISynchronizedPrintf(viewer,"Neumann ");
1449:     }
1450:     PetscViewerASCIISynchronizedPrintf(viewer,"solver is :%1.14e\n",test_err);

1452:     MatTransposeMatMult(shell_ctx->Lbasis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&test_mat);
1453:     MatShift(test_mat,one);
1454:     MatNorm(test_mat,NORM_INFINITY,&test_err);
1455:     MatDestroy(&test_mat);
1456:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace matrices is :%1.14e\n",PetscGlobalRank,test_err);

1458:     /* Create ksp object suitable for extreme eigenvalues' estimation */
1459:     KSPCreate(PETSC_COMM_SELF,&check_ksp);
1460:     KSPSetOperators(check_ksp,local_mat,local_mat,SAME_PRECONDITIONER);
1461:     KSPSetTolerances(check_ksp,1.e-8,1.e-8,PETSC_DEFAULT,basis_dofs);
1462:     KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);
1463:     MatIsSymmetricKnown(pc->pmat,&setsym,&issym);
1464:     if (issym) {
1465:       KSPSetType(check_ksp,KSPCG);
1466:     }
1467:     KSPSetPC(check_ksp,check_pc);
1468:     KSPSetUp(check_ksp);
1469:     VecSetRandom(work1,NULL);
1470:     MatMult(local_mat,work1,work2);
1471:     KSPSolve(check_ksp,work2,work2);
1472:     VecAXPY(work2,m_one,work1);
1473:     VecNorm(work2,NORM_INFINITY,&test_err);
1474:     KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);
1475:     KSPGetIterationNumber(check_ksp,&k);
1476:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for adapted KSP %1.14e (it %d, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max);
1477:     KSPDestroy(&check_ksp);
1478:     VecDestroy(&work1);
1479:     VecDestroy(&work2);
1480:     VecDestroy(&work3);
1481:   }
1482:   return(0);
1483: }

1487: static PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
1488: {
1489:   PC_BDDC *pcbddc = (PC_BDDC*)pc->data;

1492:   pcbddc->use_exact_dirichlet=use;
1493:   return(0);
1494: }

1498: static PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
1499: {
1500:   PC_BDDC *pcbddc = (PC_BDDC*)pc->data;

1503:   pcbddc->current_level=level;
1504:   return(0);
1505: }

1509: static PetscErrorCode PCBDDCAdaptNullSpace(PC pc)
1510: {
1511:   PC_IS          *pcis   = (PC_IS*)  (pc->data);
1512:   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1513:   KSP            inv_change;
1514:   PC             pc_change;
1515:   const Vec      *nsp_vecs;
1516:   Vec            *new_nsp_vecs;
1517:   PetscInt       i,nsp_size,new_nsp_size,start_new;
1518:   PetscBool      nsp_has_cnst;
1519:   MatNullSpace   new_nsp;

1523:   MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);
1524:   KSPCreate(PETSC_COMM_SELF,&inv_change);
1525:   KSPSetOperators(inv_change,pcbddc->ChangeOfBasisMatrix,pcbddc->ChangeOfBasisMatrix,SAME_PRECONDITIONER);
1526:   KSPSetType(inv_change,KSPPREONLY);
1527:   KSPGetPC(inv_change,&pc_change);
1528:   PCSetType(pc_change,PCLU);
1529:   KSPSetUp(inv_change);

1531:   new_nsp_size = nsp_size;
1532:   if (nsp_has_cnst) new_nsp_size++;
1533:   PetscMalloc(new_nsp_size*sizeof(Vec),&new_nsp_vecs);
1534:   for (i=0;i<new_nsp_size;i++) { VecDuplicate(pcis->vec1_global,&new_nsp_vecs[i]); }
1535:   start_new = 0;
1536:   if (nsp_has_cnst) {
1537:     start_new = 1;
1538:     VecSet(new_nsp_vecs[0],1.0);
1539:     VecSet(pcis->vec1_B,1.0);
1540:     KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B);
1541:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);
1542:     VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);
1543:   }
1544:   for (i=0; i<nsp_size; i++) {
1545:     VecCopy(nsp_vecs[i],new_nsp_vecs[i+start_new]);
1546:     VecScatterBegin(pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1547:     VecScatterEnd  (pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1548:     KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B);
1549:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);
1550:     VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);
1551:   }
1552:   VecNormalize(new_nsp_vecs[0],NULL);
1553:   /* TODO : Orthonormalize vecs when new_nsp_size > 0! */

1555:   KSPDestroy(&inv_change);
1556:   MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,new_nsp_size,new_nsp_vecs,&new_nsp);
1557:   PCBDDCSetNullSpace(pc,new_nsp);
1558:   MatNullSpaceDestroy(&new_nsp);
1559:   /*
1560:   MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);
1561:   printf("New Null Space, mat changed: %d\n",nsp_t);
1562:     temp_mat = matis->A;
1563:     matis->A = pcbddc->local_mat;
1564:     pcbddc->local_mat = temp_mat;
1565:   MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);
1566:   printf("New Null Space, mat original: %d\n",nsp_t);*/

1568:   for (i=0; i<new_nsp_size; i++) { VecDestroy(&new_nsp_vecs[i]); }
1569:   PetscFree(new_nsp_vecs);
1570:   return(0);
1571: }

1575: static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx)
1576: {
1577:   FETIDPMat_ctx  *newctx;

1581:   PetscMalloc(sizeof(*newctx),&newctx);

1583:   newctx->lambda_local    = 0;
1584:   newctx->temp_solution_B = 0;
1585:   newctx->temp_solution_D = 0;
1586:   newctx->B_delta         = 0;
1587:   newctx->B_Ddelta        = 0; /* theoretically belongs to the FETIDP preconditioner */
1588:   newctx->l2g_lambda      = 0;

1590:   /* increase the reference count for BDDC preconditioner */
1591:   PetscObjectReference((PetscObject)pc);
1592:   newctx->pc     = pc;
1593:   *fetidpmat_ctx = newctx;
1594:   return(0);
1595: }

1599: static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx)
1600: {
1601:   FETIDPPC_ctx   *newctx;

1605:   PetscMalloc(sizeof(*newctx),&newctx);

1607:   newctx->lambda_local = 0;
1608:   newctx->B_Ddelta     = 0;
1609:   newctx->l2g_lambda   = 0;

1611:   /* increase the reference count for BDDC preconditioner */
1612:   PetscObjectReference((PetscObject)pc);
1613:   newctx->pc    = pc;
1614:   *fetidppc_ctx = newctx;
1615:   return(0);
1616: }

1620: static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A)
1621: {
1622:   FETIDPMat_ctx  *mat_ctx;

1626:   MatShellGetContext(A,(void**)&mat_ctx);
1627:   VecDestroy(&mat_ctx->lambda_local);
1628:   VecDestroy(&mat_ctx->temp_solution_D);
1629:   VecDestroy(&mat_ctx->temp_solution_B);
1630:   MatDestroy(&mat_ctx->B_delta);
1631:   MatDestroy(&mat_ctx->B_Ddelta);
1632:   VecScatterDestroy(&mat_ctx->l2g_lambda);
1633:   PCDestroy(&mat_ctx->pc); /* actually it does not destroy BDDC, only decrease its reference count */
1634:   PetscFree(mat_ctx);
1635:   return(0);
1636: }

1640: static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc)
1641: {
1642:   FETIDPPC_ctx   *pc_ctx;

1646:   PCShellGetContext(pc,(void**)&pc_ctx);
1647:   VecDestroy(&pc_ctx->lambda_local);
1648:   MatDestroy(&pc_ctx->B_Ddelta);
1649:   VecScatterDestroy(&pc_ctx->l2g_lambda);
1650:   PCDestroy(&pc_ctx->pc); /* actually it does not destroy BDDC, only decrease its reference count */
1651:   PetscFree(pc_ctx);
1652:   return(0);
1653: }

1657: static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx)
1658: {
1660:   PC_IS          *pcis    =(PC_IS*)fetidpmat_ctx->pc->data;
1661:   PC_BDDC        *pcbddc  =(PC_BDDC*)fetidpmat_ctx->pc->data;
1662:   PCBDDCGraph    mat_graph=pcbddc->mat_graph;
1663:   Mat_IS         *matis   = (Mat_IS*)fetidpmat_ctx->pc->pmat->data;
1664:   MPI_Comm       comm     = ((PetscObject)(fetidpmat_ctx->pc))->comm;

1666:   Mat ScalingMat;
1667:   Vec lambda_global;
1668:   IS  IS_l2g_lambda;

1670:   PetscBool   skip_node,fully_redundant;
1671:   PetscInt    i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum;
1672:   PetscInt    n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values;
1673:   PetscMPIInt rank,nprocs;
1674:   PetscScalar scalar_value;

1676:   PetscInt    *vertex_indices,*temp_indices;
1677:   PetscInt    *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering;
1678:   PetscInt    *aux_sums,*cols_B_delta,*l2g_indices;
1679:   PetscScalar *array,*scaling_factors,*vals_B_delta;
1680:   PetscInt    *aux_local_numbering_2,*dof_sizes,*dof_displs;
1681:   PetscInt    first_index,old_index;
1682:   PetscBool   first_found = PETSC_FALSE;

1684:   /* For communication of scaling factors */
1685:   PetscInt    *ptrs_buffer,neigh_position;
1686:   PetscScalar **all_factors,*send_buffer,*recv_buffer;
1687:   MPI_Request *send_reqs,*recv_reqs;

1689:   /* tests */
1690:   Vec         test_vec;
1691:   PetscBool   test_fetidp;
1692:   PetscViewer viewer;

1695:   MPI_Comm_rank(comm,&rank);
1696:   MPI_Comm_size(comm,&nprocs);

1698:   /* Default type of lagrange multipliers is non-redundant */
1699:   fully_redundant = PETSC_FALSE;
1700:   PetscOptionsGetBool(NULL,"-fetidp_fullyredundant",&fully_redundant,NULL);

1702:   /* Evaluate local and global number of lagrange multipliers */
1703:   VecSet(pcis->vec1_N,0.0);
1704:   n_local_lambda  = 0;
1705:   partial_sum     = 0;
1706:   n_boundary_dofs = 0;
1707:   s               = 0;
1708:   n_vertices      = 0;
1709:   /* Get Vertices used to define the BDDC */
1710:   PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);
1711:   for (i=0; i<pcbddc->local_primal_size; i++) {
1712:     MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);
1713:     if (j == 1) {
1714:       vertex_indices[n_vertices]=temp_indices[0];
1715:       n_vertices++;
1716:     }
1717:     MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);
1718:   }
1719:   dual_size = pcis->n_B-n_vertices;

1721:   PetscSortInt(n_vertices,vertex_indices);
1722:   PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);
1723:   PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);
1724:   PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);

1726:   VecGetArray(pcis->vec1_N,&array);
1727:   for (i=0; i<pcis->n; i++) {
1728:     j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */
1729:     k = 0;
1730:     if (j > 0) k = (mat_graph->neighbours_set[i][0] == -1 ?  1 : 0);
1731:     j = j - k;
1732:     if (j > 0) n_boundary_dofs++;

1734:     skip_node = PETSC_FALSE;
1735:     if (s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */
1736:       skip_node = PETSC_TRUE;
1737:       s++;
1738:     }
1739:     if (j < 1) skip_node = PETSC_TRUE;
1740:     if (!skip_node) {
1741:       if (fully_redundant) {
1742:         /* fully redundant set of lagrange multipliers */
1743:         n_lambda_for_dof = (j*(j+1))/2;
1744:       } else {
1745:         n_lambda_for_dof = j;
1746:       }
1747:       n_local_lambda += j;
1748:       /* needed to evaluate global number of lagrange multipliers */
1749:       array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */
1750:       /* store some data needed */
1751:       dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1;
1752:       aux_local_numbering_1[partial_sum]      = i;
1753:       aux_local_numbering_2[partial_sum]      = n_lambda_for_dof;
1754:       partial_sum++;
1755:     }
1756:   }
1757:   VecRestoreArray(pcis->vec1_N,&array);

1759:   VecSet(pcis->vec1_global,0.0);
1760:   VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1761:   VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1762:   VecSum(pcis->vec1_global,&scalar_value);

1764:   fetidpmat_ctx->n_lambda = (PetscInt) scalar_value;
1765:   /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */

1767:   /* compute global ordering of lagrange multipliers and associate l2g map */
1768:   VecSet(pcis->vec1_global,0.0);
1769:   VecSet(pcis->vec1_N,0.0);
1770:   VecGetArray(pcis->vec1_N,&array);
1771:   for (i=0;i<dual_size;i++) array[aux_local_numbering_1[i]] = aux_local_numbering_2[i];
1772:   VecRestoreArray(pcis->vec1_N,&array);
1773:   VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);
1774:   VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);
1775:   VecSum(pcis->vec1_global,&scalar_value);
1776:   if (pcbddc->dbg_flag && (PetscInt)scalar_value != fetidpmat_ctx->n_lambda) {
1777:     SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Global number of multipliers mismatch! (%d!=%d)\n",(PetscInt)scalar_value,fetidpmat_ctx->n_lambda);
1778:   }

1780:   /* Fill pcis->vec1_global with cumulative function for global numbering */
1781:   VecGetArray(pcis->vec1_global,&array);
1782:   VecGetLocalSize(pcis->vec1_global,&s);
1783:   k           = 0;
1784:   first_index = -1;
1785:   for (i=0; i<s; i++) {
1786:     if (!first_found && array[i] > 0.0) {
1787:       first_found = PETSC_TRUE;
1788:       first_index = i;
1789:     }
1790:     k += (PetscInt)array[i];
1791:   }
1792:   j    = (!rank ? nprocs : 0);
1793:   PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);
1794:   PetscMalloc(j*sizeof(*dof_displs),&dof_displs);
1795:   MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);
1796:   if (!rank) {
1797:     dof_displs[0]=0;
1798:     for (i=1; i<nprocs; i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
1799:   }
1800:   MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,comm);
1801:   if (first_found) {
1802:     array[first_index] += k;

1804:     old_index = first_index;
1805:     for (i=first_index+1; i<s; i++) {
1806:       if (array[i] > 0.0) {
1807:         array[i] += array[old_index];
1808:         old_index = i;
1809:       }
1810:     }
1811:   }
1812:   VecRestoreArray(pcis->vec1_global,&array);
1813:   VecSet(pcis->vec1_N,0.0);
1814:   VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1815:   VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1816:   PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);
1817:   VecGetArray(pcis->vec1_N,&array);
1818:   for (i=0; i<dual_size; i++) {
1819:     aux_global_numbering[i] = (PetscInt)array[aux_local_numbering_1[i]]-aux_local_numbering_2[i];
1820:   }
1821:   VecRestoreArray(pcis->vec1_N,&array);
1822:   PetscFree(aux_local_numbering_2);
1823:   PetscFree(dof_displs);
1824:   PetscFree(dof_sizes);

1826:   /* init data for scaling factors exchange */
1827:   partial_sum = 0;
1828:   j           = 0;

1830:   PetscMalloc(pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);
1831:   PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);
1832:   PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);
1833:   PetscMalloc(pcis->n*sizeof(PetscScalar*),&all_factors);

1835:   ptrs_buffer[0] = 0;
1836:   for (i=1; i<pcis->n_neigh; i++) {
1837:     partial_sum += pcis->n_shared[i];
1838:     ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i];
1839:   }
1840:   PetscMalloc(partial_sum*sizeof(PetscScalar),&send_buffer);
1841:   PetscMalloc(partial_sum*sizeof(PetscScalar),&recv_buffer);
1842:   PetscMalloc(partial_sum*sizeof(PetscScalar),&all_factors[0]);
1843:   for (i=0; i<pcis->n-1; i++) {
1844:     j = mat_graph->count[i];
1845:     if (j>0) {
1846:       k = (mat_graph->neighbours_set[i][0] == -1 ?  1 : 0);
1847:       j = j - k;
1848:     }
1849:     all_factors[i+1]=all_factors[i]+j;
1850:   }
1851:   /* scatter B scaling to N vec */
1852:   VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
1853:   VecScatterEnd  (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
1854:   /* communications */
1855:   VecGetArray(pcis->vec1_N,&array);
1856:   for (i=1; i<pcis->n_neigh; i++) {
1857:     for (j=0; j<pcis->n_shared[i]; j++)  send_buffer[ptrs_buffer[i-1]+j] = array[pcis->shared[i][j]];

1859:     j    = ptrs_buffer[i]-ptrs_buffer[i-1];
1860:     MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[i-1]);
1861:     MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[i-1]);
1862:   }
1863:   VecRestoreArray(pcis->vec1_N,&array);
1864:   MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);
1865:   /* put values in correct places */
1866:   for (i=1; i<pcis->n_neigh; i++) {
1867:     for (j=0; j<pcis->n_shared[i]; j++) {
1868:       k = pcis->shared[i][j];

1870:       neigh_position = 0;
1871:       while (mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) neigh_position++;
1872:       s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0);

1874:       neigh_position = neigh_position - s;

1876:       all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j];
1877:     }
1878:   }
1879:   MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);
1880:   PetscFree(send_reqs);
1881:   PetscFree(recv_reqs);
1882:   PetscFree(send_buffer);
1883:   PetscFree(recv_buffer);
1884:   PetscFree(ptrs_buffer);

1886:   /* Compute B and B_delta (local actions) */
1887:   PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);
1888:   PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);
1889:   PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);
1890:   PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);
1891:   PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);

1893:   n_global_lambda = 0;
1894:   partial_sum     = 0;

1896:   for (i=0;i<dual_size;i++) {
1897:     n_global_lambda = aux_global_numbering[i];
1898:     j               = mat_graph->count[aux_local_numbering_1[i]];
1899:     k               = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ?  1 : 0);
1900:     j               = j - k;
1901:     aux_sums[0]     = 0;
1902:     for (s=1; s<j; s++) aux_sums[s]=aux_sums[s-1]+j-s+1;

1904:     array        = all_factors[aux_local_numbering_1[i]];
1905:     n_neg_values = 0;

1907:     while (n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) n_neg_values++;
1908:     n_pos_values = j - n_neg_values;

1910:     if (fully_redundant) {
1911:       for (s=0; s<n_neg_values; s++) {
1912:         l2g_indices    [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda;
1913:         cols_B_delta   [partial_sum+s]=dual_dofs_boundary_indices[i];
1914:         vals_B_delta   [partial_sum+s]=-1.0;
1915:         scaling_factors[partial_sum+s]=array[s];
1916:       }
1917:       for (s=0; s<n_pos_values; s++) {
1918:         l2g_indices    [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda;
1919:         cols_B_delta   [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i];
1920:         vals_B_delta   [partial_sum+s+n_neg_values]=1.0;
1921:         scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values];
1922:       }
1923:       partial_sum += j;
1924:     } else {
1925:       /* l2g_indices and default cols and vals of B_delta */
1926:       for (s=0; s<j; s++) {
1927:         l2g_indices    [partial_sum+s]=n_global_lambda+s;
1928:         cols_B_delta   [partial_sum+s]=dual_dofs_boundary_indices[i];
1929:         vals_B_delta   [partial_sum+s]=0.0;
1930:       }
1931:       /* B_delta */
1932:       if (n_neg_values > 0) vals_B_delta[partial_sum+n_neg_values-1] = -1.0; /* there's a rank next to me to the left */
1933:       if (n_neg_values < j) vals_B_delta[partial_sum+n_neg_values] = 1.0; /* there's a rank next to me to the right */

1935:       /* scaling as in Klawonn-Widlund 1999*/
1936:       for (s=0;s<n_neg_values;s++) {
1937:         scalar_value = 0.0;
1938:         for (k=0;k<s+1;k++) scalar_value += array[k];
1939:         scaling_factors[partial_sum+s] = -scalar_value;
1940:       }
1941:       for (s=0;s<n_pos_values;s++) {
1942:         scalar_value = 0.0;
1943:         for (k=s+n_neg_values;k<j;k++) scalar_value += array[k];
1944:         scaling_factors[partial_sum+s+n_neg_values] = scalar_value;
1945:       }
1946:       partial_sum += j;
1947:     }
1948:   }
1949:   PetscFree(aux_global_numbering);
1950:   PetscFree(aux_sums);
1951:   PetscFree(aux_local_numbering_1);
1952:   PetscFree(dual_dofs_boundary_indices);
1953:   PetscFree(all_factors[0]);
1954:   PetscFree(all_factors);
1955:   /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */

1957:   /* Local to global mapping of fetidpmat */
1958:   VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);
1959:   VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);
1960:   VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);
1961:   VecCreate(comm,&lambda_global);
1962:   VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);
1963:   VecSetType(lambda_global,VECMPI);
1964:   ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);
1965:   VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);
1966:   ISDestroy(&IS_l2g_lambda);

1968:   /* Create local part of B_delta */
1969:   MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta);
1970:   MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);
1971:   MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);
1972:   MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);
1973:   MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
1974:   for (i=0; i<n_local_lambda; i++) {
1975:     MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);
1976:   }
1977:   PetscFree(vals_B_delta);
1978:   MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);
1979:   MatAssemblyEnd  (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);

1981:   if (fully_redundant) {
1982:     MatCreate(PETSC_COMM_SELF,&ScalingMat);
1983:     MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);
1984:     MatSetType(ScalingMat,MATSEQAIJ);
1985:     MatSeqAIJSetPreallocation(ScalingMat,1,NULL);
1986:     for (i=0; i<n_local_lambda; i++) {
1987:       MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);
1988:     }
1989:     MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);
1990:     MatAssemblyEnd  (ScalingMat,MAT_FINAL_ASSEMBLY);
1991:     MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);
1992:     MatDestroy(&ScalingMat);
1993:   } else {
1994:     MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta);
1995:     MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);
1996:     MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);
1997:     MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);
1998:     for (i=0; i<n_local_lambda; i++) {
1999:       MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);
2000:     }
2001:     MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);
2002:     MatAssemblyEnd  (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);
2003:   }
2004:   PetscFree(scaling_factors);
2005:   PetscFree(cols_B_delta);

2007:   /* Create some vectors needed by fetidp */
2008:   VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);
2009:   VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);

2011:   test_fetidp = PETSC_FALSE;

2013:   PetscOptionsGetBool(NULL,"-fetidp_check",&test_fetidp,NULL);

2015:   if (test_fetidp) {

2017:     PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);
2018:     PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
2019:     PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");
2020:     PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");
2021:     PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");
2022:     if (fully_redundant) {
2023:       PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");
2024:     } else {
2025:       PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");
2026:     }
2027:     PetscViewerFlush(viewer);

2029:     /* TEST A/B: Test numbering of global lambda dofs             */

2031:     VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);
2032:     VecSet(lambda_global,1.0);
2033:     VecSet(test_vec,1.0);
2034:     VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2035:     VecScatterEnd  (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2036:     scalar_value = -1.0;
2037:     VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);
2038:     VecNorm(test_vec,NORM_INFINITY,&scalar_value);
2039:     VecDestroy(&test_vec);
2040:     PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);
2041:     PetscViewerFlush(viewer);
2042:     if (fully_redundant) {
2043:       VecSet(lambda_global,0.0);
2044:       VecSet(fetidpmat_ctx->lambda_local,0.5);
2045:       VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2046:       VecScatterEnd  (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2047:       VecSum(lambda_global,&scalar_value);
2048:       PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);
2049:       PetscViewerFlush(viewer);
2050:     }

2052:     /* TEST C: It should holds B_delta*w=0, w\in\widehat{W}           */
2053:     /* This is the meaning of the B matrix                            */

2055:     VecSetRandom(pcis->vec1_N,NULL);
2056:     VecSet(pcis->vec1_global,0.0);
2057:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2058:     VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2059:     VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2060:     VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2061:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2062:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2063:     /* Action of B_delta */
2064:     MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);
2065:     VecSet(lambda_global,0.0);
2066:     VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2067:     VecScatterEnd  (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2068:     VecNorm(lambda_global,NORM_INFINITY,&scalar_value);
2069:     PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);
2070:     PetscViewerFlush(viewer);

2072:     /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W}     */
2073:     /* E_D = R_D^TR                                                   */
2074:     /* P_D = B_{D,delta}^T B_{delta}                                  */
2075:     /* eq.44 Mandel Tezaur and Dohrmann 2005                          */

2077:     /* compute a random vector in \widetilde{W} */
2078:     VecSetRandom(pcis->vec1_N,NULL);

2080:     scalar_value = 0.0; /* set zero at vertices */
2081:     VecGetArray(pcis->vec1_N,&array);
2082:     for (i=0;i<n_vertices;i++) array[vertex_indices[i]] = scalar_value;
2083:     VecRestoreArray(pcis->vec1_N,&array);

2085:     /* store w for final comparison */
2086:     VecDuplicate(pcis->vec1_B,&test_vec);
2087:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);
2088:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);

2090:     /* Jump operator P_D : results stored in pcis->vec1_B */

2092:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2093:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2094:     /* Action of B_delta */
2095:     MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);
2096:     VecSet(lambda_global,0.0);
2097:     VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2098:     VecScatterEnd  (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2099:     /* Action of B_Ddelta^T */
2100:     VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2101:     VecScatterEnd  (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2102:     MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);

2104:     /* Average operator E_D : results stored in pcis->vec2_B */

2106:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
2107:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
2108:     VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);
2109:     VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
2110:     VecScatterEnd  (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
2111:     VecSet(pcis->vec1_global,0.0);
2112:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2113:     VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2114:     VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2115:     VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2116:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
2117:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);

2119:     /* test E_D=I-P_D */
2120:     scalar_value = 1.0;
2121:     VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);
2122:     scalar_value = -1.0;
2123:     VecAXPY(pcis->vec1_B,scalar_value,test_vec);
2124:     VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);
2125:     VecDestroy(&test_vec);
2126:     PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);
2127:     PetscViewerFlush(viewer);

2129:     /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W}          */
2130:     /* eq.48 Mandel Tezaur and Dohrmann 2005                          */

2132:     VecSetRandom(pcis->vec1_N,NULL);
2133:     VecGetArray(pcis->vec1_N,&array);

2135:     scalar_value = 0.0; /* set zero at vertices */
2136:     for (i=0;i<n_vertices;i++) array[vertex_indices[i]]=scalar_value;
2137:     VecRestoreArray(pcis->vec1_N,&array);

2139:     /* Jump operator P_D : results stored in pcis->vec1_B */

2141:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2142:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2143:     /* Action of B_delta */
2144:     MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);
2145:     VecSet(lambda_global,0.0);
2146:     VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2147:     VecScatterEnd  (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2148:     /* Action of B_Ddelta^T */
2149:     VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2150:     VecScatterEnd  (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2151:     MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);
2152:     /* diagonal scaling */
2153:     VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);
2154:     /* sum on the interface */
2155:     VecSet(pcis->vec1_N,0.0);
2156:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
2157:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
2158:     VecSet(pcis->vec1_global,0.0);
2159:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2160:     VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2161:     VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);
2162:     PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);
2163:     PetscViewerFlush(viewer);

2165:     if (!fully_redundant) {
2166:       /* TEST F: It should holds B_{delta}B^T_{D,delta}=I               */
2167:       /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005               */
2168:       VecDuplicate(lambda_global,&test_vec);
2169:       VecSetRandom(lambda_global,NULL);
2170:       /* Action of B_Ddelta^T */
2171:       VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2172:       VecScatterEnd  (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2173:       MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);
2174:       /* Action of B_delta */
2175:       MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);
2176:       VecSet(test_vec,0.0);
2177:       VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);
2178:       VecScatterEnd  (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);
2179:       scalar_value = -1.0;
2180:       VecAXPY(lambda_global,scalar_value,test_vec);
2181:       VecNorm(lambda_global,NORM_INFINITY,&scalar_value);
2182:       PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);
2183:       PetscViewerFlush(viewer);
2184:       PetscViewerFlush(viewer);
2185:       VecDestroy(&test_vec);
2186:     }
2187:   }
2188:   /* final cleanup */
2189:   PetscFree(vertex_indices);
2190:   VecDestroy(&lambda_global);
2191:   return(0);
2192: }

2196: static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx)
2197: {
2198:   FETIDPMat_ctx  *mat_ctx;

2202:   MatShellGetContext(fetimat,&mat_ctx);
2203:   /* get references from objects created when setting up feti mat context */
2204:   PetscObjectReference((PetscObject)mat_ctx->lambda_local);

2206:   fetidppc_ctx->lambda_local = mat_ctx->lambda_local;

2208:   PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);

2210:   fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta;

2212:   PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);

2214:   fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda;
2215:   return(0);
2216: }

2220: static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y)
2221: {
2222:   FETIDPMat_ctx  *mat_ctx;
2223:   PC_IS          *pcis;

2227:   MatShellGetContext(fetimat,&mat_ctx);
2228:   pcis = (PC_IS*)mat_ctx->pc->data;
2229:   /* Application of B_delta^T */
2230:   VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2231:   VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2232:   MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);
2233:   /* Application of \widetilde{S}^-1 */
2234:   VecSet(pcis->vec1_D,0.0);
2235:   PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);
2236:   /* Application of B_delta */
2237:   MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);
2238:   VecSet(y,0.0);
2239:   VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);
2240:   VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);
2241:   return(0);
2242: }

2246: static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y)
2247: {
2248:   FETIDPPC_ctx   *pc_ctx;
2249:   PC_IS          *pcis;

2253:   PCShellGetContext(fetipc,(void**)&pc_ctx);
2254:   pcis = (PC_IS*)pc_ctx->pc->data;
2255:   /* Application of B_Ddelta^T */
2256:   VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2257:   VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2258:   VecSet(pcis->vec2_B,0.0);
2259:   MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);
2260:   /* Application of S */
2261:   PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);
2262:   /* Application of B_Ddelta */
2263:   MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);
2264:   VecSet(y,0.0);
2265:   VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);
2266:   VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);
2267:   return(0);
2268: }

2272: static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc)
2273: {
2274:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2275:   Mat_IS         *matis  = (Mat_IS*)pc->pmat->data;
2276:   PetscInt       nvtxs;
2277:   const PetscInt *xadj,*adjncy;
2278:   Mat            mat_adj;
2279:   PetscBool      symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE;
2280:   PCBDDCGraph    mat_graph       =pcbddc->mat_graph;

2284:   /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */
2285:   if (!mat_graph->xadj) {
2286:     MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);
2287:     MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);
2288:     if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ()\n");
2289:     /* Get adjacency into BDDC workspace */
2290:     PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
2291:     MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);
2292:     if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ()\n");
2293:     MatDestroy(&mat_adj);
2294:   }
2295:   return(0);
2296: }
2297: /* -------------------------------------------------------------------------- */
2300: static PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
2301: {
2302:   PetscErrorCode    ierr;
2303:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
2304:   PC_IS             *pcis   = (PC_IS*)(pc->data);
2305:   const PetscScalar zero     = 0.0;

2308:   /* Application of PHI^T  */
2309:   MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
2310:   if (pcbddc->inexact_prec_type) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }

2312:   /* Scatter data of coarse_rhs */
2313:   if (pcbddc->coarse_rhs) { VecSet(pcbddc->coarse_rhs,zero); }
2314:   PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);

2316:   /* Local solution on R nodes */
2317:   VecSet(pcbddc->vec1_R,zero);
2318:   VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
2319:   VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
2320:   if (pcbddc->inexact_prec_type) {
2321:     VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
2322:     VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
2323:   }
2324:   PCBDDCSolveSaddlePoint(pc);
2325:   VecSet(pcis->vec1_B,zero);
2326:   VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2327:   VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2328:   if (pcbddc->inexact_prec_type) {
2329:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
2330:     VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
2331:   }

2333:   /* Coarse solution */
2334:   PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);
2335:   if (pcbddc->coarse_rhs) {
2336:     if (pcbddc->CoarseNullSpace) {
2337:       MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_rhs,NULL);
2338:     }
2339:     KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
2340:     if (pcbddc->CoarseNullSpace) {
2341:       MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL);
2342:     }
2343:   }
2344:   PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
2345:   PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);

2347:   /* Sum contributions from two levels */
2348:   MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
2349:   if (pcbddc->inexact_prec_type) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
2350:   return(0);
2351: }
2352: /* -------------------------------------------------------------------------- */
2355: static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
2356: {
2358:   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);

2361:   KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
2362:   if (pcbddc->local_auxmat1) {
2363:     MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);
2364:     MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);
2365:   }
2366:   return(0);
2367: }
2368: /* -------------------------------------------------------------------------- */
2371: static PetscErrorCode  PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
2372: {
2374:   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);

2377:   switch (pcbddc->coarse_communications_type) {
2378:   case SCATTERS_BDDC:
2379:     VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
2380:     break;
2381:   case GATHERS_BDDC:
2382:     break;
2383:   }
2384:   return(0);
2385: }
2386: /* -------------------------------------------------------------------------- */
2389: static PetscErrorCode  PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
2390: {
2392:   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2393:   PetscScalar    *array_to;
2394:   PetscScalar    *array_from;
2395:   MPI_Comm       comm;
2396:   PetscInt       i;

2399:   PetscObjectGetComm((PetscObject)pc,&comm);
2400:   switch (pcbddc->coarse_communications_type) {
2401:   case SCATTERS_BDDC:
2402:     VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
2403:     break;
2404:   case GATHERS_BDDC:
2405:     if (vec_from) VecGetArray(vec_from,&array_from);
2406:     if (vec_to) VecGetArray(vec_to,&array_to);
2407:     switch (pcbddc->coarse_problem_type) {
2408:     case SEQUENTIAL_BDDC:
2409:       if (smode == SCATTER_FORWARD) {
2410:         MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);
2411:         if (vec_to) {
2412:           if (imode == ADD_VALUES) {
2413:             for (i=0;i<pcbddc->replicated_primal_size;i++) {
2414:               array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
2415:             }
2416:           } else {
2417:             for (i=0;i<pcbddc->replicated_primal_size;i++) {
2418:               array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
2419:             }
2420:           }
2421:         }
2422:       } else {
2423:         if (vec_from) {
2424:           if (imode == ADD_VALUES) {
2425:             printf("Scatter mode %d, insert mode %d for case %d not implemented!\n",smode,imode,pcbddc->coarse_problem_type);
2426:           }
2427:           for (i=0;i<pcbddc->replicated_primal_size;i++) {
2428:             pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]];
2429:           }
2430:         }
2431:         MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);
2432:       }
2433:       break;
2434:     case REPLICATED_BDDC:
2435:       if (smode == SCATTER_FORWARD) {
2436:         MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);
2437:         if (imode == ADD_VALUES) {
2438:           for (i=0;i<pcbddc->replicated_primal_size;i++) {
2439:             array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
2440:           }
2441:         } else {
2442:           for (i=0;i<pcbddc->replicated_primal_size;i++) {
2443:             array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
2444:           }
2445:         }
2446:       } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */
2447:         if (imode == ADD_VALUES) {
2448:           for (i=0;i<pcbddc->local_primal_size;i++) {
2449:             array_to[i]+=array_from[pcbddc->local_primal_indices[i]];
2450:           }
2451:         } else {
2452:           for (i=0;i<pcbddc->local_primal_size;i++) {
2453:             array_to[i]=array_from[pcbddc->local_primal_indices[i]];
2454:           }
2455:         }
2456:       }
2457:       break;
2458:     case MULTILEVEL_BDDC:
2459:       break;
2460:     case PARALLEL_BDDC:
2461:       break;
2462:     }
2463:     if (vec_from) VecRestoreArray(vec_from,&array_from);
2464:     if (vec_to) VecRestoreArray(vec_to,&array_to);
2465:     break;
2466:   }
2467:   return(0);
2468: }
2469: /* -------------------------------------------------------------------------- */
2472: static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc)
2473: {
2475:   PC_IS          *pcis    = (PC_IS*)(pc->data);
2476:   PC_BDDC        *pcbddc  = (PC_BDDC*)pc->data;
2477:   Mat_IS         *matis   = (Mat_IS*)pc->pmat->data;
2478:   PetscInt       *nnz,*is_indices;
2479:   PetscScalar    *temp_quadrature_constraint;
2480:   PetscInt       *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B;
2481:   PetscInt       local_primal_size,i,j,k,total_counts,max_size_of_constraint;
2482:   PetscInt       n_constraints,n_vertices,size_of_constraint;
2483:   PetscScalar    quad_value;
2484:   PetscBool      nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true;
2485:   PetscInt       nnsp_size    =0,nnsp_addone=0,temp_constraints,temp_start_ptr;
2486:   IS             *used_IS;
2487:   MatType        impMatType=MATSEQAIJ;
2488:   PetscBLASInt   Bs,Bt,lwork,lierr;
2489:   PetscReal      tol=1.0e-8;
2490:   MatNullSpace   nearnullsp;
2491:   const Vec      *nearnullvecs;
2492:   Vec            *localnearnullsp;
2493:   PetscScalar    *work,*temp_basis,*array_vector,*correlation_mat;
2494:   PetscReal      *rwork,*singular_vals;
2495:   PetscBLASInt   Bone=1,*ipiv;
2496:   Vec            temp_vec;
2497:   Mat            temp_mat;
2498:   KSP            temp_ksp;
2499:   PC             temp_pc;
2500:   PetscInt       s,start_constraint,dual_dofs;
2501:   PetscBool      compute_submatrix,useksp=PETSC_FALSE;
2502:   PetscInt       *aux_primal_permutation,*aux_primal_numbering;
2503:   PetscBool      boolforface,*change_basis;

2505: /* some ugly conditional declarations */
2506: #if defined(PETSC_MISSING_LAPACK_GESVD)
2507:   PetscScalar  dot_result;
2508:   PetscScalar  one=1.0,zero=0.0;
2509:   PetscInt     ii;
2510:   PetscScalar  *singular_vectors;
2511:   PetscBLASInt *iwork,*ifail;
2512:   PetscReal    dummy_real,abs_tol;
2513:   PetscBLASInt eigs_found;
2514: #if defined(PETSC_USE_COMPLEX)
2515:   PetscScalar val1,val2;
2516: #endif
2517: #endif
2518:   PetscBLASInt dummy_int;
2519:   PetscScalar  dummy_scalar;

2522:   /* check if near null space is attached to global mat */
2523:   MatGetNearNullSpace(pc->pmat,&nearnullsp);
2524:   if (nearnullsp) {
2525:     MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
2526:   } else { /* if near null space is not provided it uses constants */
2527:     nnsp_has_cnst = PETSC_TRUE;
2528:     use_nnsp_true = PETSC_TRUE;
2529:   }
2530:   if (nnsp_has_cnst) nnsp_addone = 1;

2532:   /*
2533:        Evaluate maximum storage size needed by the procedure
2534:        - temp_indices will contain start index of each constraint stored as follows
2535:        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
2536:        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
2537:        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
2538:                                                                                                                                                          */

2540:   total_counts  = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges;
2541:   total_counts *= (nnsp_addone+nnsp_size);

2543:   ISGetSize(pcbddc->ISForVertices,&n_vertices);

2545:   total_counts += n_vertices;

2547:   PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);
2548:   PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);

2550:   total_counts           = 0;
2551:   max_size_of_constraint = 0;
2552:   for (i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++) {
2553:     if (i<pcbddc->n_ISForEdges) used_IS = &pcbddc->ISForEdges[i];
2554:     else used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges];
2555:     ISGetSize(*used_IS,&j);
2556:     total_counts += j;
2557:     if (j>max_size_of_constraint) max_size_of_constraint=j;
2558:   }
2559:   total_counts *= (nnsp_addone+nnsp_size);
2560:   total_counts += n_vertices;

2562:   PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);
2563:   PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);
2564:   PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);
2565:   PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);
2566:   ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);

2568:   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
2569:   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
2570:   ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);

2572:   /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */
2573:   rwork           = 0;
2574:   work            = 0;
2575:   singular_vals   = 0;
2576:   temp_basis      = 0;
2577:   correlation_mat = 0;
2578:   if (!pcbddc->use_nnsp_true) {
2579:     PetscScalar temp_work;
2580: #if defined(PETSC_MISSING_LAPACK_GESVD)
2581:     /* POD */
2582:     PetscInt max_n;
2583:     max_n = nnsp_addone+nnsp_size;
2584:     /* using some techniques borrowed from Proper Orthogonal Decomposition */
2585:     PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);
2586:     PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);
2587:     PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);
2588:     PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);
2589: #if defined(PETSC_USE_COMPLEX)
2590:     PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);
2591: #endif
2592:     PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);
2593:     PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);
2594:     /* now we evaluate the optimal workspace using query with lwork=-1 */
2595:     PetscBLASIntCast(max_n,&Bt);
2596:     lwork =-1;
2597:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2598: #if !defined(PETSC_USE_COMPLEX)
2599:     abs_tol=1.e-8;
2600:     PetscStackCallBLAS("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr));
2601: #else
2602:     SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1");
2603: #endif
2604:     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr);
2605:     PetscFPTrapPop();
2606: #else /* on missing GESVD */
2607:       /* SVD */
2608:     PetscInt max_n,min_n;
2609:     max_n = max_size_of_constraint;
2610:     min_n = nnsp_addone+nnsp_size;
2611:     if (max_size_of_constraint < (nnsp_addone+nnsp_size)) {
2612:       min_n = max_size_of_constraint;
2613:       max_n = nnsp_addone+nnsp_size;
2614:     }
2615:     PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);
2616: #if defined(PETSC_USE_COMPLEX)
2617:     PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);
2618: #endif
2619:     /* now we evaluate the optimal workspace using query with lwork=-1 */
2620:     lwork     =-1;
2621:     PetscBLASIntCast(max_n,&Bs);
2622:     PetscBLASIntCast(min_n,&Bt);
2623:     dummy_int = Bs;
2624:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2625: #if !defined(PETSC_USE_COMPLEX)
2626:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2627: #else
2628:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr);
2629: #endif
2630:     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr);
2631:     PetscFPTrapPop();
2632: #endif
2633:     /* Allocate optimal workspace */
2634:     PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
2635:     total_counts = (PetscInt)lwork;
2636:     PetscMalloc(total_counts*sizeof(PetscScalar),&work);
2637:   }
2638:   /* get local part of global near null space vectors */
2639:   PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);
2640:   for (k=0; k<nnsp_size; k++) {
2641:     VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
2642:     VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2643:     VecScatterEnd  (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2644:   }
2645:   /* Now we can loop on constraining sets */
2646:   total_counts   =0;
2647:   temp_indices[0]=0;
2648:   /* vertices */
2649:   PetscBool used_vertex;
2650:   ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);
2651:   if (nnsp_has_cnst) { /* consider all vertices */
2652:     for (i=0; i<n_vertices; i++) {
2653:       temp_indices_to_constraint[temp_indices[total_counts]]  = is_indices[i];
2654:       temp_indices_to_constraint_B[temp_indices[total_counts]]= local_to_B[is_indices[i]];
2655:       temp_quadrature_constraint[temp_indices[total_counts]]  = 1.0;
2656:       temp_indices[total_counts+1]                            = temp_indices[total_counts]+1;
2657:       change_basis[total_counts]                              = PETSC_FALSE;
2658:       total_counts++;
2659:     }
2660:   } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2661:     for (i=0; i<n_vertices; i++) {
2662:       used_vertex = PETSC_FALSE;
2663:       k           = 0;
2664:       while (!used_vertex && k<nnsp_size) {
2665:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
2666:         if (PetscAbsScalar(array_vector[is_indices[i]])>0.0) {
2667:           temp_indices_to_constraint[temp_indices[total_counts]]  =is_indices[i];
2668:           temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
2669:           temp_quadrature_constraint[temp_indices[total_counts]]  =1.0;
2670:           temp_indices[total_counts+1]                            =temp_indices[total_counts]+1;
2671:           change_basis[total_counts]                              =PETSC_FALSE;
2672:           total_counts++;
2673:           used_vertex=PETSC_TRUE;
2674:         }
2675:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
2676:         k++;
2677:       }
2678:     }
2679:   }
2680:   ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);
2681:   n_vertices = total_counts;

2683:   /* edges and faces */
2684:   for (i=0; i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces; i++) {
2685:     if (i<pcbddc->n_ISForEdges) {
2686:       used_IS     = &pcbddc->ISForEdges[i];
2687:       boolforface = pcbddc->usechangeofbasis;
2688:     } else {
2689:       used_IS     = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges];
2690:       boolforface = pcbddc->usechangeonfaces;
2691:     }
2692:     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2693:     temp_start_ptr   = total_counts; /* need to know the starting index of constraints stored */
2694:     ISGetSize(*used_IS,&size_of_constraint);
2695:     ISGetIndices(*used_IS,(const PetscInt**)&is_indices);
2696:     if (nnsp_has_cnst) {
2697:       temp_constraints++;
2698:       quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2699:       for (j=0; j<size_of_constraint; j++) {
2700:         temp_indices_to_constraint[temp_indices[total_counts]+j]  =is_indices[j];
2701:         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
2702:         temp_quadrature_constraint[temp_indices[total_counts]+j]  =quad_value;
2703:       }
2704:       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2705:       change_basis[total_counts]  =boolforface;
2706:       total_counts++;
2707:     }
2708:     for (k=0; k<nnsp_size; k++) {
2709:       VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
2710:       for (j=0; j<size_of_constraint; j++) {
2711:         temp_indices_to_constraint[temp_indices[total_counts]+j]  =is_indices[j];
2712:         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
2713:         temp_quadrature_constraint[temp_indices[total_counts]+j]  =array_vector[is_indices[j]];
2714:       }
2715:       VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
2716:       quad_value = 1.0;
2717:       if (use_nnsp_true) { /* check if array is null on the connected component in case use_nnsp_true has been requested */
2718:         PetscBLASIntCast(size_of_constraint,&Bs);
2719:         PetscStackCallBLAS("BLASasum",quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone));
2720:       }
2721:       if (quad_value > 0.0) { /* keep indices and values */
2722:         temp_constraints++;
2723:         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2724:         change_basis[total_counts]  =boolforface;
2725:         total_counts++;
2726:       }
2727:     }
2728:     ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);
2729:     /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */
2730:     if (!use_nnsp_true) {
2731:       PetscBLASIntCast(size_of_constraint,&Bs);
2732:       PetscBLASIntCast(temp_constraints,&Bt);

2734: #if defined(PETSC_MISSING_LAPACK_GESVD)
2735:       PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));
2736:       /* Store upper triangular part of correlation matrix */
2737:       for (j=0; j<temp_constraints; j++) {
2738:         for (k=0; k<j+1; k++) {
2739: #if defined(PETSC_USE_COMPLEX)
2740:           /* hand made complex dot product -> replace */
2741:           dot_result = 0.0;
2742:           for (ii=0; ii<size_of_constraint; ii++) {
2743:             val1        = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii];
2744:             val2        = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii];
2745:             dot_result += val1*PetscConj(val2);
2746:           }
2747: #else
2748:           PetscStackCallBLAS("BLASdot",dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone));
2749: #endif
2750:           correlation_mat[j*temp_constraints+k]=dot_result;
2751:         }
2752:       }
2753:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2754: #if !defined(PETSC_USE_COMPLEX)
2755:       PetscStackCallBLAS("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr));
2756: #else
2757:       SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1");
2758: #endif
2759:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr);
2760:       PetscFPTrapPop();
2761:       /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */
2762:       j=0;
2763:       while (j < Bt && singular_vals[j] < tol) j++;
2764:       total_counts=total_counts-j;
2765:       if (j<temp_constraints) {
2766:         for (k=j;k<Bt;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2767:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2768:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs));
2769:         PetscFPTrapPop();
2770:         /* copy POD basis into used quadrature memory */
2771:         for (k=0;k<Bt-j;k++) {
2772:           for (ii=0;ii<size_of_constraint;ii++) {
2773:             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii];
2774:           }
2775:         }
2776:       }

2778: #else  /* on missing GESVD */
2779:       PetscInt min_n = temp_constraints;
2780:       if (min_n > size_of_constraint) min_n = size_of_constraint;
2781:       dummy_int = Bs;
2782:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2783: #if !defined(PETSC_USE_COMPLEX)
2784:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2785: #else
2786:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr);
2787: #endif
2788:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
2789:       PetscFPTrapPop();
2790:       /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */
2791:       j=0;
2792:       while (j < min_n && singular_vals[min_n-j-1] < tol) j++;
2793:       total_counts = total_counts-(PetscInt)Bt+(min_n-j);
2794: #endif
2795:     }
2796:   }

2798:   n_constraints     =total_counts-n_vertices;
2799:   local_primal_size = total_counts;
2800:   /* set quantities in pcbddc data structure */
2801:   pcbddc->n_vertices        = n_vertices;
2802:   pcbddc->n_constraints     = n_constraints;
2803:   pcbddc->local_primal_size = local_primal_size;

2805:   /* Create constraint matrix */
2806:   /* The constraint matrix is used to compute the l2g map of primal dofs */
2807:   /* so we need to set it up properly either with or without change of basis */
2808:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
2809:   MatSetType(pcbddc->ConstraintMatrix,impMatType);
2810:   MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);

2812:   /* compute a local numbering of constraints : vertices first then constraints */
2813:   VecSet(pcis->vec1_N,0.0);
2814:   VecGetArray(pcis->vec1_N,&array_vector);
2815:   PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);
2816:   PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);

2818:   total_counts=0;

2820:   /* find vertices: subdomain corners plus dofs with basis changed */
2821:   for (i=0; i<local_primal_size; i++) {
2822:     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2823:     if (change_basis[i] || size_of_constraint == 1) {
2824:       k=0;
2825:       while (k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) {
2826:         k=k+1;
2827:       }
2828:       j = temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1];

2830:       array_vector[j]                      = 1.0;
2831:       aux_primal_numbering[total_counts]   = j;
2832:       aux_primal_permutation[total_counts] = total_counts;
2833:       total_counts++;
2834:     }
2835:   }
2836:   VecRestoreArray(pcis->vec1_N,&array_vector);
2837:   /* permute indices in order to have a sorted set of vertices */
2838:   PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation);
2839:   /* nonzero structure */
2840:   PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);
2841:   for (i=0;i<total_counts;i++) nnz[i]=1;

2843:   j=total_counts;
2844:   for (i=n_vertices; i<local_primal_size; i++) {
2845:     if (!change_basis[i]) {
2846:       nnz[j]=temp_indices[i+1]-temp_indices[i];
2847:       j++;
2848:     }
2849:   }
2850:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
2851:   PetscFree(nnz);
2852:   /* set values in constraint matrix */
2853:   for (i=0; i<total_counts; i++) {
2854:     j    = aux_primal_permutation[i];
2855:     k    = aux_primal_numbering[j];
2856:     MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);
2857:   }
2858:   for (i=n_vertices; i<local_primal_size; i++) {
2859:     if (!change_basis[i]) {
2860:       size_of_constraint = temp_indices[i+1]-temp_indices[i];
2861:       MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);
2862:       total_counts++;
2863:     }
2864:   }
2865:   PetscFree(aux_primal_numbering);
2866:   PetscFree(aux_primal_permutation);
2867:   /* assembling */
2868:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
2869:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);

2871:   /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */
2872:   if (pcbddc->usechangeofbasis) {
2873:     MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);
2874:     MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);
2875:     MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);
2876:     /* work arrays */
2877:     /* we need to reuse these arrays, so we free them */
2878:     PetscFree(temp_basis);
2879:     PetscFree(work);
2880:     PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);
2881:     PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);
2882:     PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);
2883:     PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);
2884:     for (i=0;i<pcis->n_B;i++) nnz[i]=1;

2886:     /* Overestimated nonzeros per row */
2887:     k=1;
2888:     for (i=pcbddc->n_vertices;i<local_primal_size;i++) {
2889:       if (change_basis[i]) {
2890:         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2891:         if (k < size_of_constraint) k = size_of_constraint;

2893:         for (j=0;j<size_of_constraint;j++) {
2894:           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2895:         }
2896:       }
2897:     }
2898:     MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);
2899:     PetscFree(nnz);
2900:     /* Temporary array to store indices */
2901:     PetscMalloc(k*sizeof(PetscInt),&is_indices);
2902:     /* Set initial identity in the matrix */
2903:     for (i=0; i<pcis->n_B; i++) {
2904:       MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
2905:     }
2906:     /* Now we loop on the constraints which need a change of basis */
2907:     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
2908:     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */
2909:     temp_constraints = 0;
2910:     if (pcbddc->n_vertices < local_primal_size) {
2911:       temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]];
2912:     }
2913:     for (i=pcbddc->n_vertices; i<local_primal_size; i++) {
2914:       if (change_basis[i]) {
2915:         compute_submatrix = PETSC_FALSE;
2916:         useksp            = PETSC_FALSE;
2917:         if (temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) {
2918:           temp_constraints++;
2919:           if (i == local_primal_size -1 ||  temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) {
2920:             compute_submatrix = PETSC_TRUE;
2921:           }
2922:         }
2923:         if (compute_submatrix) {
2924:           if (temp_constraints > 1 || pcbddc->use_nnsp_true) useksp = PETSC_TRUE;
2925:           size_of_constraint = temp_indices[i+1]-temp_indices[i];
2926:           if (useksp) { /* experimental */
2927:             MatCreate(PETSC_COMM_SELF,&temp_mat);
2928:             MatSetType(temp_mat,impMatType);
2929:             MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);
2930:             MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,NULL);
2931:           }
2932:           /* First _size_of_constraint-temp_constraints_ columns */
2933:           dual_dofs        = size_of_constraint-temp_constraints;
2934:           start_constraint = i+1-temp_constraints;
2935:           for (s=0; s<dual_dofs; s++) {
2936:             is_indices[0] = s;
2937:             for (j=0;j<temp_constraints;j++) {
2938:               for (k=0;k<temp_constraints;k++) {
2939:                 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1];
2940:               }
2941:               work[j]         = -temp_quadrature_constraint[temp_indices[start_constraint+j]+s];
2942:               is_indices[j+1] = s+j+1;
2943:             }
2944:             Bt   = temp_constraints;
2945:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2946:             PetscStackCallBLAS("LAPACKgesvd",LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr));
2947:             if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr);
2948:             PetscFPTrapPop();
2949:             j    = temp_indices_to_constraint_B[temp_indices[start_constraint]+s];
2950:             MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);
2951:             if (useksp) {
2952:               /* temp mat with transposed rows and columns */
2953:               MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);
2954:               MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);
2955:             }
2956:           }
2957:           if (useksp) {
2958:             /* last rows of temp_mat */
2959:             for (j=0;j<size_of_constraint;j++) is_indices[j] = j;

2961:             for (s=0;s<temp_constraints;s++) {
2962:               k = s + dual_dofs;
2963:               MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);
2964:             }
2965:             MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);
2966:             MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);
2967:             MatGetVecs(temp_mat,&temp_vec,NULL);
2968:             KSPCreate(PETSC_COMM_SELF,&temp_ksp);
2969:             KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);
2970:             KSPSetType(temp_ksp,KSPPREONLY);
2971:             KSPGetPC(temp_ksp,&temp_pc);
2972:             PCSetType(temp_pc,PCLU);
2973:             KSPSetUp(temp_ksp);
2974:             for (s=0; s<temp_constraints; s++) {
2975:               VecSet(temp_vec,0.0);
2976:               VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);
2977:               VecAssemblyBegin(temp_vec);
2978:               VecAssemblyEnd(temp_vec);
2979:               KSPSolve(temp_ksp,temp_vec,temp_vec);
2980:               VecGetArray(temp_vec,&array_vector);
2981:               j    = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1];
2982:               /* last columns of change of basis matrix associated to new primal dofs */
2983:               MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);
2984:               VecRestoreArray(temp_vec,&array_vector);
2985:             }
2986:             MatDestroy(&temp_mat);
2987:             KSPDestroy(&temp_ksp);
2988:             VecDestroy(&temp_vec);
2989:           } else {
2990:             /* last columns of change of basis matrix associated to new primal dofs */
2991:             for (s=0; s<temp_constraints; s++) {
2992:               j    = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1];
2993:               MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);
2994:             }
2995:           }
2996:           /* prepare for the next cycle */
2997:           temp_constraints = 0;
2998:           if (i != local_primal_size -1) temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]];
2999:         }
3000:       }
3001:     }
3002:     /* assembling */
3003:     MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
3004:     MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
3005:     PetscFree(ipiv);
3006:     PetscFree(is_indices);
3007:   }
3008:   /* free workspace no longer needed */
3009:   PetscFree(rwork);
3010:   PetscFree(work);
3011:   PetscFree(temp_basis);
3012:   PetscFree(singular_vals);
3013:   PetscFree(correlation_mat);
3014:   PetscFree(temp_indices);
3015:   PetscFree(change_basis);
3016:   PetscFree(temp_indices_to_constraint);
3017:   PetscFree(temp_indices_to_constraint_B);
3018:   PetscFree(local_to_B);
3019:   PetscFree(temp_quadrature_constraint);
3020: #if defined(PETSC_MISSING_LAPACK_GESVD)
3021:   PetscFree(iwork);
3022:   PetscFree(ifail);
3023:   PetscFree(singular_vectors);
3024: #endif
3025:   for (k=0; k<nnsp_size; k++) {
3026:     VecDestroy(&localnearnullsp[k]);
3027:   }
3028:   PetscFree(localnearnullsp);
3029:   return(0);
3030: }
3031: /* -------------------------------------------------------------------------- */
3034: static PetscErrorCode PCBDDCCoarseSetUp(PC pc)
3035: {
3037:   PC_IS          *pcis    = (PC_IS*)(pc->data);
3038:   PC_BDDC        *pcbddc  = (PC_BDDC*)pc->data;
3039:   Mat_IS         *matis   = (Mat_IS*)pc->pmat->data;
3040:   Mat            change_mat_all;
3041:   IS             is_R_local;
3042:   IS             is_V_local;
3043:   IS             is_C_local;
3044:   IS             is_aux1;
3045:   IS             is_aux2;
3046:   VecType        impVecType;
3047:   MatType        impMatType;
3048:   PetscInt       n_R  =0;
3049:   PetscInt       n_D  =0;
3050:   PetscInt       n_B  =0;
3051:   PetscScalar    zero =0.0;
3052:   PetscScalar    one  =1.0;
3053:   PetscScalar    m_one=-1.0;
3054:   PetscScalar    * array;
3055:   PetscScalar    *coarse_submat_vals;
3056:   PetscInt       *idx_R_local;
3057:   PetscInt       *idx_V_B;
3058:   PetscScalar    *coarsefunctions_errors;
3059:   PetscScalar    *constraints_errors;

3061:   /* auxiliary indices */
3062:   PetscInt i,j,k;

3064:   /* for verbose output of bddc */
3065:   PetscViewer viewer  =pcbddc->dbg_viewer;
3066:   PetscBool   dbg_flag=pcbddc->dbg_flag;

3068:   /* for counting coarse dofs */
3069:   PetscInt    n_vertices,n_constraints;
3070:   PetscInt    size_of_constraint;
3071:   PetscInt    *row_cmat_indices;
3072:   PetscScalar *row_cmat_values;
3073:   PetscInt    *vertices,*nnz,*is_indices,*temp_indices;

3076:   /* Set Non-overlapping dimensions */
3077:   n_B = pcis->n_B; n_D = pcis->n - n_B;
3078:   /* Set types for local objects needed by BDDC precondtioner */
3079:   impMatType = MATSEQDENSE;
3080:   impVecType = VECSEQ;

3082:   /* get vertex indices from constraint matrix */
3083:   PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);
3084:   n_vertices=0;
3085:   for (i=0; i<pcbddc->local_primal_size; i++) {
3086:     MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
3087:     if (size_of_constraint == 1) {
3088:       vertices[n_vertices]=row_cmat_indices[0];
3089:       n_vertices++;
3090:     }
3091:     MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
3092:   }
3093:   /* Set number of constraints */
3094:   n_constraints = pcbddc->local_primal_size-n_vertices;

3096:   /* vertices in boundary numbering */
3097:   if (n_vertices) {
3098:     VecSet(pcis->vec1_N,m_one);
3099:     VecGetArray(pcis->vec1_N,&array);
3100:     for (i=0; i<n_vertices; i++) array[vertices[i]] = i;
3101:     VecRestoreArray(pcis->vec1_N,&array);
3102:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3103:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3104:     PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);
3105:     VecGetArray(pcis->vec1_B,&array);
3106:     for (i=0; i<n_vertices; i++) {
3107:       j=0;
3108:       while (array[j] != i) j++;
3109:       idx_V_B[i]=j;
3110:     }
3111:     VecRestoreArray(pcis->vec1_B,&array);
3112:   }

3114:   /* transform local matrices if needed */
3115:   if (pcbddc->usechangeofbasis) {
3116:     PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);
3117:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
3118:     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
3119:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
3120:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
3121:     k=1;
3122:     for (i=0;i<n_B;i++) {
3123:       MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
3124:       nnz[is_indices[i]]=j;
3125:       if (k < j) k = j;
3126:       MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
3127:     }
3128:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
3129:     /* assemble change of basis matrix on the whole set of local dofs */
3130:     PetscMalloc(k*sizeof(PetscInt),&temp_indices);
3131:     MatCreate(PETSC_COMM_SELF,&change_mat_all);
3132:     MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);
3133:     MatSetType(change_mat_all,MATSEQAIJ);
3134:     MatSeqAIJSetPreallocation(change_mat_all,0,nnz);
3135:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
3136:     for (i=0; i<n_D; i++) {
3137:       MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);
3138:     }
3139:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
3140:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
3141:     for (i=0; i<n_B; i++) {
3142:       MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
3143:       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
3144:       MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);
3145:       MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
3146:     }
3147:     MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);
3148:     MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);
3149:     MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);
3150:     MatDestroy(&pcis->A_IB);
3151:     MatDestroy(&pcis->A_BI);
3152:     MatDestroy(&pcis->A_BB);
3153:     MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);
3154:     MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);
3155:     MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);
3156:     MatDestroy(&change_mat_all);
3157:     PetscFree(nnz);
3158:     PetscFree(temp_indices);
3159:   } else {
3160:     /* without change of basis, the local matrix is unchanged */
3161:     PetscObjectReference((PetscObject)matis->A);

3163:     pcbddc->local_mat = matis->A;
3164:   }
3165:   /* Change global null space passed in by the user if change of basis has been performed */
3166:   if (pcbddc->NullSpace && pcbddc->usechangeofbasis) {
3167:     PCBDDCAdaptNullSpace(pc);
3168:   }

3170:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
3171:   VecSet(pcis->vec1_N,one);
3172:   VecGetArray(pcis->vec1_N,&array);
3173:   for (i=0;i<n_vertices;i++) array[vertices[i]] = zero;
3174:   PetscMalloc((pcis->n - n_vertices)*sizeof(PetscInt),&idx_R_local);
3175:   for (i=0, n_R=0; i<pcis->n; i++) {
3176:     if (array[i] == one) {
3177:       idx_R_local[n_R] = i;
3178:       n_R++;
3179:     }
3180:   }
3181:   VecRestoreArray(pcis->vec1_N,&array);
3182:   if (dbg_flag) {
3183:     PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
3184:     PetscViewerFlush(viewer);
3185:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
3186:     PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
3187:     PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);
3188:     PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);
3189:     PetscViewerFlush(viewer);
3190:   }

3192:   /* Allocate needed vectors */
3193:   VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);
3194:   VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);
3195:   VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);
3196:   VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);
3197:   VecSetSizes(pcbddc->vec1_R,n_R,n_R);
3198:   VecSetType(pcbddc->vec1_R,impVecType);
3199:   VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3200:   VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);
3201:   VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);
3202:   VecSetType(pcbddc->vec1_P,impVecType);

3204:   /* Creating some index sets needed  */
3205:   /* For submatrices */
3206:   ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);
3207:   if (n_vertices) {
3208:     ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);
3209:   }
3210:   if (n_constraints) {
3211:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);
3212:   }

3214:   /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
3215:   {
3216:     PetscInt *aux_array1;
3217:     PetscInt *aux_array2;
3218:     PetscInt *idx_I_local;

3220:     PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);
3221:     PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);

3223:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);
3224:     VecGetArray(pcis->vec1_N,&array);
3225:     for (i=0; i<n_D; i++) array[idx_I_local[i]] = 0;
3226:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);
3227:     for (i=0, j=0; i<n_R; i++) {
3228:       if (array[idx_R_local[i]] == one) {
3229:         aux_array1[j] = i;
3230:         j++;
3231:       }
3232:     }
3233:     VecRestoreArray(pcis->vec1_N,&array);
3234:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);
3235:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3236:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3237:     VecGetArray(pcis->vec1_B,&array);
3238:     for (i=0, j=0; i<n_B; i++) {
3239:       if (array[i] == one) {
3240:         aux_array2[j] = i; j++;
3241:       }
3242:     }
3243:     VecRestoreArray(pcis->vec1_B,&array);
3244:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);
3245:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
3246:     PetscFree(aux_array1);
3247:     PetscFree(aux_array2);
3248:     ISDestroy(&is_aux1);
3249:     ISDestroy(&is_aux2);

3251:     if (pcbddc->inexact_prec_type || dbg_flag) {
3252:       PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);
3253:       VecGetArray(pcis->vec1_N,&array);
3254:       for (i=0, j=0; i<n_R; i++) {
3255:         if (array[idx_R_local[i]] == zero) {
3256:           aux_array1[j] = i;
3257:           j++;
3258:         }
3259:       }
3260:       VecRestoreArray(pcis->vec1_N,&array);
3261:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);
3262:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
3263:       PetscFree(aux_array1);
3264:       ISDestroy(&is_aux1);
3265:     }
3266:   }

3268:   /* Creating PC contexts for local Dirichlet and Neumann problems */
3269:   {
3270:     Mat A_RR;
3271:     PC  pc_temp;

3273:     /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */
3274:     KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
3275:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
3276:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);
3277:     KSPSetType(pcbddc->ksp_D,KSPPREONLY);
3278:     KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");

3280:     /* default */
3281:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
3282:     PCSetType(pc_temp,PCLU);

3284:     /* Allow user's customization */
3285:     KSPSetFromOptions(pcbddc->ksp_D);

3287:     /* umfpack interface has a bug when matrix dimension is zero */
3288:     if (!n_D) {
3289:       PCSetType(pc_temp,PCNONE);
3290:     }

3292:     /* Set Up KSP for Dirichlet problem of BDDC */
3293:     KSPSetUp(pcbddc->ksp_D);

3295:     /* set ksp_D into pcis data */
3296:     KSPDestroy(&pcis->ksp_D);
3297:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
3298:     pcis->ksp_D = pcbddc->ksp_D;

3300:     /* Matrix for Neumann problem is A_RR -> we need to create it */
3301:     MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);
3302:     KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
3303:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
3304:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);
3305:     KSPSetType(pcbddc->ksp_R,KSPPREONLY);
3306:     KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");

3308:     /* default */
3309:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
3310:     PCSetType(pc_temp,PCLU);

3312:     /* Allow user's customization */
3313:     KSPSetFromOptions(pcbddc->ksp_R);

3315:     /* umfpack interface has a bug when matrix dimension is zero */
3316:     if (!pcis->n) {
3317:       PCSetType(pc_temp,PCNONE);
3318:     }

3320:     /* Set Up KSP for Neumann problem of BDDC */
3321:     KSPSetUp(pcbddc->ksp_R);

3323:     /* check Dirichlet and Neumann solvers and adapt them is a nullspace correction is needed */
3324:     {
3325:       Vec         temp_vec;
3326:       PetscReal   value;
3327:       PetscMPIInt use_exact,use_exact_reduced;

3329:       VecDuplicate(pcis->vec1_D,&temp_vec);
3330:       VecSetRandom(pcis->vec1_D,NULL);
3331:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
3332:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);
3333:       VecAXPY(temp_vec,m_one,pcis->vec1_D);
3334:       VecNorm(temp_vec,NORM_INFINITY,&value);
3335:       VecDestroy(&temp_vec);
3336:       use_exact = 1;
3337:       if (PetscAbsReal(value) > 1.e-4) use_exact = 0;

3339:       MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));
3340:       pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced;
3341:       if (dbg_flag) {
3342:         PetscViewerFlush(viewer);
3343:         PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
3344:         PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");
3345:         PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);
3346:       }
3347:       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) {
3348:         PCBDDCAdaptLocalProblem(pc,pcis->is_I_local);
3349:       }
3350:       VecDuplicate(pcbddc->vec1_R,&temp_vec);
3351:       VecSetRandom(pcbddc->vec1_R,NULL);
3352:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
3353:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);
3354:       VecAXPY(temp_vec,m_one,pcbddc->vec1_R);
3355:       VecNorm(temp_vec,NORM_INFINITY,&value);
3356:       VecDestroy(&temp_vec);

3358:       use_exact = 1;
3359:       if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
3360:       MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));
3361:       if (dbg_flag) {
3362:         PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);
3363:         PetscViewerFlush(viewer);
3364:       }
3365:       if (pcbddc->NullSpace && !use_exact_reduced) {
3366:         PCBDDCAdaptLocalProblem(pc,is_R_local);
3367:       }
3368:     }
3369:     /* free Neumann problem's matrix */
3370:     MatDestroy(&A_RR);
3371:   }

3373:   /* Assemble all remaining stuff needed to apply BDDC  */
3374:   {
3375:     Mat           A_RV,A_VR,A_VV;
3376:     Mat           M1;
3377:     Mat           C_CR;
3378:     Mat           AUXMAT;
3379:     Vec           vec1_C;
3380:     Vec           vec2_C;
3381:     Vec           vec1_V;
3382:     Vec           vec2_V;
3383:     PetscInt      *nnz;
3384:     PetscInt      *auxindices;
3385:     PetscInt      index;
3386:     PetscScalar   *array2;
3387:     MatFactorInfo matinfo;

3389:     /* Allocating some extra storage just to be safe */
3390:     PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);
3391:     PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);
3392:     for (i=0;i<pcis->n;i++) auxindices[i]=i;

3394:     /* some work vectors on vertices and/or constraints */
3395:     if (n_vertices) {
3396:       VecCreate(PETSC_COMM_SELF,&vec1_V);
3397:       VecSetSizes(vec1_V,n_vertices,n_vertices);
3398:       VecSetType(vec1_V,impVecType);
3399:       VecDuplicate(vec1_V,&vec2_V);
3400:     }
3401:     if (n_constraints) {
3402:       VecCreate(PETSC_COMM_SELF,&vec1_C);
3403:       VecSetSizes(vec1_C,n_constraints,n_constraints);
3404:       VecSetType(vec1_C,impVecType);
3405:       VecDuplicate(vec1_C,&vec2_C);
3406:       VecDuplicate(vec1_C,&pcbddc->vec1_C);
3407:     }
3408:     /* Precompute stuffs needed for preprocessing and application of BDDC*/
3409:     if (n_constraints) {
3410:       MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);
3411:       MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);
3412:       MatSetType(pcbddc->local_auxmat2,impMatType);
3413:       MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,NULL);

3415:       /* Create Constraint matrix on R nodes: C_{CR}  */
3416:       MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3417:       ISDestroy(&is_C_local);

3419:       /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
3420:       for (i=0; i<n_constraints; i++) {
3421:         VecSet(pcbddc->vec1_R,zero);

3423:         /* Get row of constraint matrix in R numbering */
3424:         VecGetArray(pcbddc->vec1_R,&array);
3425:         MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
3426:         for (j=0;j<size_of_constraint;j++) array[row_cmat_indices[j]] = -row_cmat_values[j];
3427:         MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
3428:         VecRestoreArray(pcbddc->vec1_R,&array);

3430:         /* Solve for row of constraint matrix in R numbering */
3431:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);

3433:         /* Set values */
3434:         VecGetArray(pcbddc->vec2_R,&array);
3435:         MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);
3436:         VecRestoreArray(pcbddc->vec2_R,&array);
3437:       }
3438:       MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
3439:       MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);

3441:       /* Assemble AUXMAT = (LUFactor)(-C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */
3442:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);
3443:       MatFactorInfoInitialize(&matinfo);
3444:       ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);
3445:       MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);
3446:       ISDestroy(&is_aux1);

3448:       /* Assemble explicitly M1 = (C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} needed in preproc  */
3449:       MatCreate(PETSC_COMM_SELF,&M1);
3450:       MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);
3451:       MatSetType(M1,impMatType);
3452:       MatSeqDenseSetPreallocation(M1,NULL);
3453:       for (i=0; i<n_constraints; i++) {
3454:         VecSet(vec1_C,zero);
3455:         VecSetValue(vec1_C,i,one,INSERT_VALUES);
3456:         VecAssemblyBegin(vec1_C);
3457:         VecAssemblyEnd(vec1_C);
3458:         MatSolve(AUXMAT,vec1_C,vec2_C);
3459:         VecScale(vec2_C,m_one);
3460:         VecGetArray(vec2_C,&array);
3461:         MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);
3462:         VecRestoreArray(vec2_C,&array);
3463:       }
3464:       MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);
3465:       MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);
3466:       MatDestroy(&AUXMAT);
3467:       /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
3468:       MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);

3470:     }

3472:     /* Get submatrices from subdomain matrix */
3473:     if (n_vertices) {
3474:       MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);
3475:       MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);
3476:       MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);
3477:     }

3479:     /* Matrix of coarse basis functions (local) */
3480:     MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);
3481:     MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
3482:     MatSetType(pcbddc->coarse_phi_B,impMatType);
3483:     MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,NULL);
3484:     if (pcbddc->inexact_prec_type || dbg_flag) {
3485:       MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);
3486:       MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
3487:       MatSetType(pcbddc->coarse_phi_D,impMatType);
3488:       MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,NULL);
3489:     }

3491:     if (dbg_flag) {
3492:       PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);
3493:       PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);
3494:     }
3495:     /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3496:     PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);

3498:     /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
3499:     for (i=0; i<n_vertices; i++) {
3500:       VecSet(vec1_V,zero);
3501:       VecSetValue(vec1_V,i,one,INSERT_VALUES);
3502:       VecAssemblyBegin(vec1_V);
3503:       VecAssemblyEnd(vec1_V);
3504:       /* solution of saddle point problem */
3505:       MatMult(A_RV,vec1_V,pcbddc->vec1_R);
3506:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
3507:       VecScale(pcbddc->vec1_R,m_one);
3508:       if (n_constraints) {
3509:         MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);
3510:         MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
3511:         VecScale(vec1_C,m_one);
3512:       }
3513:       MatMult(A_VR,pcbddc->vec1_R,vec2_V);
3514:       MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);

3516:       /* Set values in coarse basis function and subdomain part of coarse_mat */
3517:       /* coarse basis functions */
3518:       VecSet(pcis->vec1_B,zero);
3519:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3520:       VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3521:       VecGetArray(pcis->vec1_B,&array);
3522:       MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
3523:       VecRestoreArray(pcis->vec1_B,&array);
3524:       MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);
3525:       if (pcbddc->inexact_prec_type || dbg_flag) {
3526:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
3527:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
3528:         VecGetArray(pcis->vec1_D,&array);
3529:         MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
3530:         VecRestoreArray(pcis->vec1_D,&array);
3531:       }
3532:       /* subdomain contribution to coarse matrix */
3533:       VecGetArray(vec2_V,&array);
3534:       for (j=0; j<n_vertices; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j];   /* WARNING -> column major ordering */
3535:       VecRestoreArray(vec2_V,&array);
3536:       if (n_constraints) {
3537:         VecGetArray(vec1_C,&array);
3538:         for (j=0; j<n_constraints; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j];   /* WARNING -> column major ordering */
3539:         VecRestoreArray(vec1_C,&array);
3540:       }

3542:       if (dbg_flag) {
3543:         /* assemble subdomain vector on nodes */
3544:         VecSet(pcis->vec1_N,zero);
3545:         VecGetArray(pcis->vec1_N,&array);
3546:         VecGetArray(pcbddc->vec1_R,&array2);
3547:         for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j];
3548:         array[vertices[i]] = one;
3549:         VecRestoreArray(pcbddc->vec1_R,&array2);
3550:         VecRestoreArray(pcis->vec1_N,&array);

3552:         /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
3553:         VecSet(pcbddc->vec1_P,zero);
3554:         VecGetArray(pcbddc->vec1_P,&array2);
3555:         VecGetArray(vec2_V,&array);
3556:         for (j=0;j<n_vertices;j++) array2[j]=array[j];
3557:         VecRestoreArray(vec2_V,&array);
3558:         if (n_constraints) {
3559:           VecGetArray(vec1_C,&array);
3560:           for (j=0;j<n_constraints;j++) array2[j+n_vertices]=array[j];
3561:           VecRestoreArray(vec1_C,&array);
3562:         }
3563:         VecRestoreArray(pcbddc->vec1_P,&array2);
3564:         VecScale(pcbddc->vec1_P,m_one);

3566:         /* check saddle point solution */
3567:         MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
3568:         MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
3569:         VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);
3570:         MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
3571:         VecGetArray(pcbddc->vec1_P,&array);
3572:         array[i]=array[i]+m_one;  /* shift by the identity matrix */
3573:         VecRestoreArray(pcbddc->vec1_P,&array);
3574:         VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);
3575:       }
3576:     }

3578:     for (i=0; i<n_constraints; i++) {
3579:       VecSet(vec2_C,zero);
3580:       VecSetValue(vec2_C,i,m_one,INSERT_VALUES);
3581:       VecAssemblyBegin(vec2_C);
3582:       VecAssemblyEnd(vec2_C);

3584:       /* solution of saddle point problem */
3585:       MatMult(M1,vec2_C,vec1_C);
3586:       MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);
3587:       VecScale(vec1_C,m_one);
3588:       if (n_vertices) { MatMult(A_VR,pcbddc->vec1_R,vec2_V); }

3590:       /* Set values in coarse basis function and subdomain part of coarse_mat */
3591:       /* coarse basis functions */
3592:       index=i+n_vertices;
3593:       VecSet(pcis->vec1_B,zero);
3594:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3595:       VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3596:       VecGetArray(pcis->vec1_B,&array);
3597:       MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);
3598:       VecRestoreArray(pcis->vec1_B,&array);
3599:       if (pcbddc->inexact_prec_type || dbg_flag) {
3600:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
3601:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
3602:         VecGetArray(pcis->vec1_D,&array);
3603:         MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);
3604:         VecRestoreArray(pcis->vec1_D,&array);
3605:       }

3607:       /* subdomain contribution to coarse matrix */
3608:       if (n_vertices) {
3609:         VecGetArray(vec2_V,&array);
3610:         for (j=0; j<n_vertices; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j]; /* WARNING -> column major ordering */
3611:         VecRestoreArray(vec2_V,&array);
3612:       }
3613:       VecGetArray(vec1_C,&array);
3614:       for (j=0; j<n_constraints; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j]; /* WARNING -> column major ordering */
3615:       VecRestoreArray(vec1_C,&array);

3617:       if (dbg_flag) {
3618:         /* assemble subdomain vector on nodes */
3619:         VecSet(pcis->vec1_N,zero);
3620:         VecGetArray(pcis->vec1_N,&array);
3621:         VecGetArray(pcbddc->vec1_R,&array2);
3622:         for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j];
3623:         VecRestoreArray(pcbddc->vec1_R,&array2);
3624:         VecRestoreArray(pcis->vec1_N,&array);

3626:         /* assemble subdomain vector of lagrange multipliers */
3627:         VecSet(pcbddc->vec1_P,zero);
3628:         VecGetArray(pcbddc->vec1_P,&array2);
3629:         if (n_vertices) {
3630:           VecGetArray(vec2_V,&array);
3631:           for (j=0;j<n_vertices;j++) array2[j]=-array[j];
3632:           VecRestoreArray(vec2_V,&array);
3633:         }
3634:         VecGetArray(vec1_C,&array);
3635:         for (j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];}
3636:         VecRestoreArray(vec1_C,&array);
3637:         VecRestoreArray(pcbddc->vec1_P,&array2);

3639:         /* check saddle point solution */
3640:         MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
3641:         MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
3642:         VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);
3643:         MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
3644:         VecGetArray(pcbddc->vec1_P,&array);
3645:         array[index] = array[index]+m_one; /* shift by the identity matrix */
3646:         VecRestoreArray(pcbddc->vec1_P,&array);
3647:         VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);
3648:       }
3649:     }
3650:     MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
3651:     MatAssemblyEnd  (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
3652:     if (pcbddc->inexact_prec_type || dbg_flag) {
3653:       MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
3654:       MatAssemblyEnd  (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
3655:     }
3656:     /* Checking coarse_sub_mat and coarse basis functios */
3657:     /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
3658:     if (dbg_flag) {
3659:       Mat         coarse_sub_mat;
3660:       Mat         TM1,TM2,TM3,TM4;
3661:       Mat         coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI;
3662:       MatType     checkmattype=MATSEQAIJ;
3663:       PetscScalar value;

3665:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
3666:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
3667:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
3668:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
3669:       MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
3670:       MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
3671:       MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
3672:       MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);

3674:       PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
3675:       PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");
3676:       PetscViewerFlush(viewer);
3677:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
3678:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
3679:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
3680:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
3681:       MatDestroy(&AUXMAT);
3682:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
3683:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
3684:       MatDestroy(&AUXMAT);
3685:       MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
3686:       MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
3687:       MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
3688:       MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
3689:       MatNorm(TM1,NORM_INFINITY,&value);
3690:       PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");
3691:       PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);
3692:       PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);
3693:       PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");
3694:       for (i=0; i<pcbddc->local_primal_size; i++) { PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]); }
3695:       PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");
3696:       for (i=0; i<pcbddc->local_primal_size; i++) { PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]); }
3697:       PetscViewerFlush(viewer);
3698:       MatDestroy(&A_II);
3699:       MatDestroy(&A_BB);
3700:       MatDestroy(&A_IB);
3701:       MatDestroy(&A_BI);
3702:       MatDestroy(&TM1);
3703:       MatDestroy(&TM2);
3704:       MatDestroy(&TM3);
3705:       MatDestroy(&TM4);
3706:       MatDestroy(&coarse_phi_D);
3707:       MatDestroy(&coarse_sub_mat);
3708:       MatDestroy(&coarse_phi_B);
3709:       PetscFree(coarsefunctions_errors);
3710:       PetscFree(constraints_errors);
3711:     }
3712:     /* free memory */
3713:     if (n_vertices) {
3714:       VecDestroy(&vec1_V);
3715:       VecDestroy(&vec2_V);
3716:       MatDestroy(&A_RV);
3717:       MatDestroy(&A_VR);
3718:       MatDestroy(&A_VV);
3719:     }
3720:     if (n_constraints) {
3721:       VecDestroy(&vec1_C);
3722:       VecDestroy(&vec2_C);
3723:       MatDestroy(&M1);
3724:       MatDestroy(&C_CR);
3725:     }
3726:     PetscFree(auxindices);
3727:     PetscFree(nnz);
3728:     /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */
3729:     PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);
3730:     PetscFree(coarse_submat_vals);
3731:   }
3732:   /* free memory */
3733:   if (n_vertices) {
3734:     PetscFree(idx_V_B);
3735:     ISDestroy(&is_V_local);
3736:   }
3737:   ISDestroy(&is_R_local);
3738:   return(0);
3739: }

3741: /* -------------------------------------------------------------------------- */

3745: static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals)
3746: {
3747:   Mat_IS   *matis    = (Mat_IS*)pc->pmat->data;
3748:   PC_BDDC  *pcbddc   = (PC_BDDC*)pc->data;
3749:   PC_IS    *pcis     = (PC_IS*)pc->data;
3750:   MPI_Comm prec_comm;
3751:   MPI_Comm coarse_comm;

3753:   /* common to all choiches */
3754:   PetscScalar *temp_coarse_mat_vals;
3755:   PetscScalar *ins_coarse_mat_vals;
3756:   PetscInt    *ins_local_primal_indices;
3757:   PetscMPIInt *localsizes2,*localdispl2;
3758:   PetscMPIInt size_prec_comm;
3759:   PetscMPIInt rank_prec_comm;
3760:   PetscMPIInt active_rank=MPI_PROC_NULL;
3761:   PetscMPIInt master_proc=0;
3762:   PetscInt    ins_local_primal_size;

3764:   /* specific to MULTILEVEL_BDDC */
3765:   PetscMPIInt            *ranks_recv;
3766:   PetscMPIInt            count_recv=0;
3767:   PetscMPIInt            rank_coarse_proc_send_to;
3768:   PetscMPIInt            coarse_color = MPI_UNDEFINED;
3769:   ISLocalToGlobalMapping coarse_ISLG;

3771:   /* some other variables */
3773:   MatType        coarse_mat_type;
3774:   PCType         coarse_pc_type;
3775:   KSPType        coarse_ksp_type;
3776:   PC             pc_temp;
3777:   PetscInt       i,j,k;
3778:   PetscInt       max_it_coarse_ksp=1;          /* don't increase this value */

3780:   /* verbose output viewer */
3781:   PetscViewer viewer   = pcbddc->dbg_viewer;
3782:   PetscBool   dbg_flag = pcbddc->dbg_flag;

3784:   PetscInt    offset,offset2;
3785:   PetscMPIInt im_active,active_procs;
3786:   PetscInt    *dnz,*onz;

3788:   PetscBool setsym,issym=PETSC_FALSE;

3791:   PetscObjectGetComm((PetscObject)pc,&prec_comm);
3792:   ins_local_primal_indices = 0;
3793:   ins_coarse_mat_vals      = 0;
3794:   localsizes2              = 0;
3795:   localdispl2              = 0;
3796:   temp_coarse_mat_vals     = 0;
3797:   coarse_ISLG              = 0;

3799:   MPI_Comm_size(prec_comm,&size_prec_comm);
3800:   MPI_Comm_rank(prec_comm,&rank_prec_comm);
3801:   MatIsSymmetricKnown(pc->pmat,&setsym,&issym);

3803:   /* Assign global numbering to coarse dofs */
3804:   {
3805:     PetscInt    *auxlocal_primal;
3806:     PetscInt    *row_cmat_indices;
3807:     PetscInt    *aux_ordering;
3808:     PetscInt    *row_cmat_global_indices;
3809:     PetscInt    *dof_sizes,*dof_displs;
3810:     PetscInt    size_of_constraint;
3811:     PetscBool   *array_bool;
3812:     PetscBool   first_found;
3813:     PetscInt    first_index,old_index,s;
3814:     PetscMPIInt mpi_local_primal_size;
3815:     PetscScalar coarsesum,*array;

3817:     mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size;

3819:     /* Construct needed data structures for message passing */
3820:     PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->local_primal_indices);
3821:     j = 0;
3822:     if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3823:       j = size_prec_comm;
3824:     }
3825:     PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);
3826:     PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);
3827:     /* Gather local_primal_size information for all processes  */
3828:     if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3829:       MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);
3830:     } else {
3831:       MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);
3832:     }
3833:     pcbddc->replicated_primal_size = 0;
3834:     for (i=0; i<j; i++) {
3835:       pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size;
3836:       pcbddc->replicated_primal_size       += pcbddc->local_primal_sizes[i];
3837:     }

3839:     /* First let's count coarse dofs.
3840:        This code fragment assumes that the number of local constraints per connected component
3841:        is not greater than the number of nodes defined for the connected component
3842:        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3843:     PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);
3844:     j    = 0;
3845:     for (i=0; i<pcbddc->local_primal_size; i++) {
3846:       MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
3847:       if (j < size_of_constraint) j = size_of_constraint;
3848:       MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
3849:     }
3850:     PetscMalloc(j*sizeof(PetscInt),&aux_ordering);
3851:     PetscMalloc(j*sizeof(PetscInt),&row_cmat_global_indices);
3852:     PetscMalloc(pcis->n*sizeof(PetscBool),&array_bool);
3853:     for (i=0;i<pcis->n;i++) array_bool[i] = PETSC_FALSE;

3855:     for (i=0;i<pcbddc->local_primal_size;i++) {
3856:       MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
3857:       for (j=0; j<size_of_constraint; j++) aux_ordering[j] = j;

3859:       ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,row_cmat_indices,row_cmat_global_indices);
3860:       PetscSortIntWithPermutation(size_of_constraint,row_cmat_global_indices,aux_ordering);
3861:       for (j=0; j<size_of_constraint; j++) {
3862:         k = row_cmat_indices[aux_ordering[j]];
3863:         if (!array_bool[k]) {
3864:           array_bool[k]      = PETSC_TRUE;
3865:           auxlocal_primal[i] = k;
3866:           break;
3867:         }
3868:       }
3869:       MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
3870:     }
3871:     PetscFree(aux_ordering);
3872:     PetscFree(array_bool);
3873:     PetscFree(row_cmat_global_indices);

3875:     /* Compute number of coarse dofs */
3876:     VecSet(pcis->vec1_N,0.0);
3877:     VecGetArray(pcis->vec1_N,&array);
3878:     for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]] = 1.0;
3879:     VecRestoreArray(pcis->vec1_N,&array);
3880:     VecSet(pcis->vec1_global,0.0);
3881:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);
3882:     VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);
3883:     VecSum(pcis->vec1_global,&coarsesum);
3884:     pcbddc->coarse_size = (PetscInt)coarsesum;

3886:     /* Fill pcis->vec1_global with cumulative function for global numbering */
3887:     VecGetArray(pcis->vec1_global,&array);
3888:     VecGetLocalSize(pcis->vec1_global,&s);
3889:     k           = 0;
3890:     first_index = -1;
3891:     first_found = PETSC_FALSE;
3892:     for (i=0; i<s; i++) {
3893:       if (!first_found && array[i] > 0.0) {
3894:         first_found = PETSC_TRUE;
3895:         first_index = i;
3896:       }
3897:       k += (PetscInt)array[i];
3898:     }
3899:     j    = (!rank_prec_comm ? size_prec_comm : 0);
3900:     PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);
3901:     PetscMalloc(j*sizeof(*dof_displs),&dof_displs);
3902:     MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,prec_comm);
3903:     if (!rank_prec_comm) {
3904:       dof_displs[0]=0;
3905:       for (i=1;i<size_prec_comm;i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3906:     }
3907:     MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,prec_comm);
3908:     if (first_found) {
3909:       array[first_index] += k;
3910:       old_index = first_index;
3911:       for (i=first_index+1;i<s;i++) {
3912:         if (array[i] > 0.0) {
3913:           array[i] += array[old_index];
3914:           old_index = i;
3915:         }
3916:       }
3917:     }
3918:     VecRestoreArray(pcis->vec1_global,&array);
3919:     VecSet(pcis->vec1_N,0.0);
3920:     VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3921:     VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3922:     VecGetArray(pcis->vec1_N,&array);
3923:     for (i=0;i<pcbddc->local_primal_size;i++) pcbddc->local_primal_indices[i] = (PetscInt)array[auxlocal_primal[i]]-1;
3924:     VecRestoreArray(pcis->vec1_N,&array);
3925:     PetscFree(dof_displs);
3926:     PetscFree(dof_sizes);

3928:     if (dbg_flag) {
3929:       PetscViewerFlush(viewer);
3930:       PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
3931:       PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");
3932:       VecSet(pcis->vec1_N,0.0);
3933:       VecGetArray(pcis->vec1_N,&array);
3934:       for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0;
3935:       VecRestoreArray(pcis->vec1_N,&array);
3936:       VecSet(pcis->vec1_global,0.0);
3937:       VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3938:       VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3939:       VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3940:       VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3941:       VecGetArray(pcis->vec1_N,&array);
3942:       for (i=0;i<pcis->n;i++) {
3943:         if (array[i] == 1.0) {
3944:           ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);
3945:           PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);
3946:         }
3947:       }
3948:       PetscViewerFlush(viewer);
3949:       for (i=0;i<pcis->n;i++) {
3950:         if (array[i] > 0.0) array[i] = 1.0/array[i];
3951:       }
3952:       VecRestoreArray(pcis->vec1_N,&array);
3953:       VecSet(pcis->vec1_global,0.0);
3954:       VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3955:       VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3956:       VecSum(pcis->vec1_global,&coarsesum);
3957:       PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);
3958:       PetscViewerFlush(viewer);
3959:     }
3960:     PetscFree(auxlocal_primal);
3961:   }

3963:   if (dbg_flag) {
3964:     PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",pcbddc->coarse_size);
3965:     PetscViewerFlush(viewer);
3966:   }

3968:   im_active = 0;
3969:   if (pcis->n) im_active = 1;
3970:   MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);

3972:   /* adapt coarse problem type */
3973:   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3974:     if (pcbddc->current_level < pcbddc->max_levels) {
3975:       if ((active_procs/pcbddc->coarsening_ratio) < 2) {
3976:         if (dbg_flag) {
3977:           PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
3978:           PetscViewerFlush(viewer);
3979:         }
3980:         pcbddc->coarse_problem_type = PARALLEL_BDDC;
3981:       }
3982:     } else {
3983:       if (dbg_flag) {
3984:         PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);
3985:         PetscViewerFlush(viewer);
3986:       }
3987:       pcbddc->coarse_problem_type = PARALLEL_BDDC;
3988:     }
3989:   }

3991:   switch (pcbddc->coarse_problem_type) {

3993:   case (MULTILEVEL_BDDC):    /* we define a coarse mesh where subdomains are elements */
3994:   {
3995:     /* we need additional variables */
3996:     MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
3997:     MetisInt    *metis_coarse_subdivision;
3998:     MetisInt    options[METIS_NOPTIONS];
3999:     PetscMPIInt size_coarse_comm,rank_coarse_comm;
4000:     PetscMPIInt procs_jumps_coarse_comm;
4001:     PetscMPIInt *coarse_subdivision;
4002:     PetscMPIInt *total_count_recv;
4003:     PetscMPIInt *total_ranks_recv;
4004:     PetscMPIInt *displacements_recv;
4005:     PetscMPIInt *my_faces_connectivity;
4006:     PetscMPIInt *petsc_faces_adjncy;
4007:     MetisInt    *faces_adjncy;
4008:     MetisInt    *faces_xadj;
4009:     PetscMPIInt *number_of_faces;
4010:     PetscMPIInt *faces_displacements;
4011:     PetscInt    *array_int;
4012:     PetscMPIInt my_faces   =0;
4013:     PetscMPIInt total_faces=0;
4014:     PetscInt    ranks_stretching_ratio;

4016:     /* define some quantities */
4017:     pcbddc->coarse_communications_type = SCATTERS_BDDC;
4018:     coarse_mat_type                    = MATIS;
4019:     coarse_pc_type                     = PCBDDC;
4020:     coarse_ksp_type                    = KSPRICHARDSON;

4022:     /* details of coarse decomposition */
4023:     n_subdomains            = active_procs;
4024:     n_parts                 = n_subdomains/pcbddc->coarsening_ratio;
4025:     ranks_stretching_ratio  = size_prec_comm/active_procs;
4026:     procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;

4028: #if 0
4029:     PetscMPIInt     *old_ranks;
4030:     PetscInt        *new_ranks,*jj,*ii;
4031:     MatPartitioning mat_part;
4032:     IS              coarse_new_decomposition,is_numbering;
4033:     PetscViewer     viewer_test;
4034:     MPI_Comm        test_coarse_comm;
4035:     PetscMPIInt     test_coarse_color;
4036:     Mat             mat_adj;
4037:     /* Create new communicator for coarse problem splitting the old one */
4038:     /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
4039:        key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
4040:     test_coarse_color = (im_active ? 0 : MPI_UNDEFINED);
4041:     test_coarse_comm  = MPI_COMM_NULL;
4042:     MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);
4043:     if (im_active) {
4044:       PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
4045:       PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
4046:       MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);
4047:       MPI_Comm_size(test_coarse_comm,&j);
4048:       MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);
4049:       for (i=0; i<size_prec_comm; i++)
4050:         new_ranks[i] = -1;
4051:       for (i=0; i<n_subdomains; i++)
4052:         new_ranks[old_ranks[i]] = i;
4053:       PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);
4054:       k    = pcis->n_neigh-1;
4055:       PetscMalloc(2*sizeof(PetscInt),&ii);
4056:       ii[0]=0;
4057:       ii[1]=k;
4058:       PetscMalloc(k*sizeof(PetscInt),&jj);
4059:       for (i=0; i<k; i++)
4060:         jj[i]=new_ranks[pcis->neigh[i+1]];
4061:       PetscSortInt(k,jj);
4062:       MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);
4063:       MatView(mat_adj,viewer_test);
4064:       MatPartitioningCreate(test_coarse_comm,&mat_part);
4065:       MatPartitioningSetAdjacency(mat_part,mat_adj);
4066:       MatPartitioningSetFromOptions(mat_part);
4067:       printf("Setting Nparts %d\n",n_parts);
4068:       MatPartitioningSetNParts(mat_part,n_parts);
4069:       MatPartitioningView(mat_part,viewer_test);
4070:       MatPartitioningApply(mat_part,&coarse_new_decomposition);
4071:       ISView(coarse_new_decomposition,viewer_test);
4072:       ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);
4073:       ISView(is_numbering,viewer_test);
4074:       PetscViewerDestroy(&viewer_test);
4075:       ISDestroy(&coarse_new_decomposition);
4076:       ISDestroy(&is_numbering);
4077:       MatPartitioningDestroy(&mat_part);
4078:       PetscFree(old_ranks);
4079:       PetscFree(new_ranks);
4080:       MPI_Comm_free(&test_coarse_comm);
4081:     }
4082: #endif

4084:     /* build CSR graph of subdomains' connectivity */
4085:     PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);
4086:     PetscMemzero(array_int,pcis->n*sizeof(PetscInt));
4087:     for (i=1;i<pcis->n_neigh;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
4088:       for (j=0;j<pcis->n_shared[i];j++) {
4089:         array_int[pcis->shared[i][j]]+=1;
4090:       }
4091:     }
4092:     for (i=1;i<pcis->n_neigh;i++) {
4093:       for (j=0;j<pcis->n_shared[i];j++) {
4094:         if (array_int[pcis->shared[i][j]] > 0) {
4095:           my_faces++;
4096:           break;
4097:         }
4098:       }
4099:     }

4101:     MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);
4102:     PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);
4103:     my_faces=0;
4104:     for (i=1;i<pcis->n_neigh;i++) {
4105:       for (j=0;j<pcis->n_shared[i];j++) {
4106:         if (array_int[pcis->shared[i][j]] > 0) {
4107:           my_faces_connectivity[my_faces]=pcis->neigh[i];
4108:           my_faces++;
4109:           break;
4110:         }
4111:       }
4112:     }
4113:     if (rank_prec_comm == master_proc) {
4114:       PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);
4115:       PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);
4116:       PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);
4117:       PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);
4118:       PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);
4119:     }
4120:     MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);
4121:     if (rank_prec_comm == master_proc) {
4122:       faces_xadj[0]          = 0;
4123:       faces_displacements[0] = 0;

4125:       j=0;
4126:       for (i=1;i<size_prec_comm+1;i++) {
4127:         faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
4128:         if (number_of_faces[i-1]) {
4129:           j++;
4130:           faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
4131:         }
4132:       }
4133:     }
4134:     MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);
4135:     PetscFree(my_faces_connectivity);
4136:     PetscFree(array_int);
4137:     if (rank_prec_comm == master_proc) {
4138:       for (i=0; i<total_faces; i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
4139:       PetscFree(faces_displacements);
4140:       PetscFree(number_of_faces);
4141:       PetscFree(petsc_faces_adjncy);
4142:     }

4144:     if (rank_prec_comm == master_proc) {
4145:       PetscInt heuristic_for_metis=3;
4146:       ncon       =1;
4147:       faces_nvtxs=n_subdomains;
4148:       /* partition graoh induced by face connectivity */
4149:       PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);
4150:       METIS_SetDefaultOptions(options);
4151:       /* we need a contiguous partition of the coarse mesh */
4152:       options[METIS_OPTION_CONTIG]=1;
4153:       options[METIS_OPTION_NITER] =30;
4154:       if (pcbddc->coarsening_ratio > 1) {
4155:         if (n_subdomains>n_parts*heuristic_for_metis) {
4156:           options[METIS_OPTION_IPTYPE] =METIS_IPTYPE_EDGE;
4157:           options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;

4159:           METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
4160:           if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr);
4161:         } else {
4162:           METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
4163:           if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr);
4164:         }
4165:       } else {
4166:         for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
4167:       }
4168:       PetscFree(faces_xadj);
4169:       PetscFree(faces_adjncy);
4170:       PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);

4172:       /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
4173:       for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
4174:       for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
4175:       PetscFree(metis_coarse_subdivision);
4176:     }

4178:     /* Create new communicator for coarse problem splitting the old one */
4179:     if (!(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts) {
4180:       coarse_color=0;                /* for communicator splitting */
4181:       active_rank =rank_prec_comm;   /* for insertion of matrix values */
4182:     }
4183:     /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
4184:        key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
4185:     MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);

4187:     if (coarse_color == 0) {
4188:       MPI_Comm_size(coarse_comm,&size_coarse_comm);
4189:       MPI_Comm_rank(coarse_comm,&rank_coarse_comm);
4190:     } else {
4191:       rank_coarse_comm = MPI_PROC_NULL;
4192:     }

4194:     /* master proc take care of arranging and distributing coarse information */
4195:     if (rank_coarse_comm == master_proc) {
4196:       PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);
4197:       PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);
4198:       PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);
4199:       /* some initializations */
4200:       displacements_recv[0]=0;
4201:       PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));
4202:       /* count from how many processes the j-th process of the coarse decomposition will receive data */
4203:       for (j=0; j<size_coarse_comm; j++) {
4204:         for (i=0; i<size_prec_comm; i++) {
4205:           if (coarse_subdivision[i]==j) total_count_recv[j]++;
4206:         }
4207:       }
4208:       /* displacements needed for scatterv of total_ranks_recv */
4209:       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];

4211:       /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
4212:       PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));
4213:       for (j=0; j<size_coarse_comm; j++) {
4214:         for (i=0; i<size_prec_comm; i++) {
4215:           if (coarse_subdivision[i]==j) {
4216:             total_ranks_recv[displacements_recv[j]+total_count_recv[j]] = i;

4218:             total_count_recv[j] += 1;
4219:           }
4220:         }
4221:       }
4222:       /* for (j=0;j<size_coarse_comm;j++) {
4223:         printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
4224:         for (i=0;i<total_count_recv[j];i++) {
4225:           printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
4226:         }
4227:         printf("\n");
4228:       } */

4230:       /* identify new decomposition in terms of ranks in the old communicator */
4231:       for (i=0; i<n_subdomains; i++) {
4232:         coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
4233:       }
4234:       /* printf("coarse_subdivision in old end new ranks\n");
4235:       for (i=0;i<size_prec_comm;i++) {
4236:         if (coarse_subdivision[i]!=MPI_PROC_NULL) {
4237:           printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
4238:         } else {
4239:           printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
4240:         }
4241:       }
4242:       printf("\n"); */
4243:     }

4245:     /* Scatter new decomposition for send details */
4246:     MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);
4247:     /* Scatter receiving details to members of coarse decomposition */
4248:     if (coarse_color == 0) {
4249:       MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);
4250:       PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);
4251:       MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);
4252:     }

4254:     /* printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
4255:     if (coarse_color == 0) {
4256:       printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
4257:       for (i=0;i<count_recv;i++)
4258:         printf("%d ",ranks_recv[i]);
4259:       printf("\n");
4260:     } */

4262:     if (rank_prec_comm == master_proc) {
4263:       PetscFree(coarse_subdivision);
4264:       PetscFree(total_count_recv);
4265:       PetscFree(total_ranks_recv);
4266:       PetscFree(displacements_recv);
4267:     }
4268:     break;
4269:   }

4271:   case (REPLICATED_BDDC):

4273:     pcbddc->coarse_communications_type = GATHERS_BDDC;
4274:     coarse_mat_type                    = MATSEQAIJ;
4275:     coarse_pc_type                     = PCLU;
4276:     coarse_ksp_type                    = KSPPREONLY;
4277:     coarse_comm                        = PETSC_COMM_SELF;
4278:     active_rank                        = rank_prec_comm;
4279:     break;

4281:   case (PARALLEL_BDDC):

4283:     pcbddc->coarse_communications_type = SCATTERS_BDDC;
4284:     coarse_mat_type                    = MATMPIAIJ;
4285:     coarse_pc_type                     = PCREDUNDANT;
4286:     coarse_ksp_type                    = KSPPREONLY;
4287:     coarse_comm                        = prec_comm;
4288:     active_rank                        = rank_prec_comm;
4289:     break;

4291:   case (SEQUENTIAL_BDDC):
4292:     pcbddc->coarse_communications_type = GATHERS_BDDC;
4293:     coarse_mat_type                    = MATSEQAIJ;
4294:     coarse_pc_type                     = PCLU;
4295:     coarse_ksp_type                    = KSPPREONLY;
4296:     coarse_comm                        = PETSC_COMM_SELF;
4297:     active_rank                        = master_proc;
4298:     break;
4299:   }

4301:   switch (pcbddc->coarse_communications_type) {

4303:   case(SCATTERS_BDDC):
4304:   {
4305:     if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {

4307:       IS coarse_IS;

4309:       if (pcbddc->coarsening_ratio == 1) {
4310:         ins_local_primal_size = pcbddc->local_primal_size;
4311:         ins_local_primal_indices = pcbddc->local_primal_indices;
4312:         if (coarse_color == 0) { PetscFree(ranks_recv); }
4313:         /* nonzeros */
4314:         PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);
4315:         PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));
4316:         for (i=0;i<ins_local_primal_size;i++) {
4317:           dnz[i] = ins_local_primal_size;
4318:         }
4319:       } else {
4320:         PetscMPIInt send_size;
4321:         PetscMPIInt *send_buffer;
4322:         PetscInt    *aux_ins_indices;
4323:         PetscInt    ii,jj;
4324:         MPI_Request *requests;

4326:         PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);
4327:         /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */
4328:         PetscFree(pcbddc->local_primal_displacements);
4329:         PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);
4330:         pcbddc->replicated_primal_size = count_recv;
4331:         j = 0;
4332:         for (i=0;i<count_recv;i++) {
4333:           pcbddc->local_primal_displacements[i] = j;
4334:           j += pcbddc->local_primal_sizes[ranks_recv[i]];
4335:         }
4336:         pcbddc->local_primal_displacements[count_recv] = j;
4337:         PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);
4338:         /* allocate auxiliary space */
4339:         PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);
4340:         PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);
4341:         PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));
4342:         /* allocate stuffs for message massing */
4343:         PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);
4344:         for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
4345:         /* send indices to be inserted */
4346:         for (i=0;i<count_recv;i++) {
4347:           send_size = pcbddc->local_primal_sizes[ranks_recv[i]];
4348:           MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);
4349:         }
4350:         if (rank_coarse_proc_send_to != MPI_PROC_NULL) {
4351:           send_size = pcbddc->local_primal_size;
4352:           PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);
4353:           for (i=0;i<send_size;i++) {
4354:             send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
4355:           }
4356:           MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);
4357:         }
4358:         MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);
4359:         if (rank_coarse_proc_send_to != MPI_PROC_NULL) {
4360:           PetscFree(send_buffer);
4361:         }
4362:         j = 0;
4363:         for (i=0;i<count_recv;i++) {
4364:           ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i];
4365:           localsizes2[i] = ii*ii;
4366:           localdispl2[i] = j;
4367:           j += localsizes2[i];
4368:           jj = pcbddc->local_primal_displacements[i];
4369:           /* it counts the coarse subdomains sharing the coarse node */
4370:           for (k=0;k<ii;k++) {
4371:             aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1;
4372:           }
4373:         }
4374:         /* temp_coarse_mat_vals used to store matrix values to be received */
4375:         PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);
4376:         /* evaluate how many values I will insert in coarse mat */
4377:         ins_local_primal_size = 0;
4378:         for (i=0;i<pcbddc->coarse_size;i++) {
4379:           if (aux_ins_indices[i]) {
4380:             ins_local_primal_size++;
4381:           }
4382:         }
4383:         /* evaluate indices I will insert in coarse mat */
4384:         PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4385:         j = 0;
4386:         for (i=0;i<pcbddc->coarse_size;i++) {
4387:           if (aux_ins_indices[i]) {
4388:             ins_local_primal_indices[j] = i;
4389:             j++;
4390:           }
4391:         }
4392:         /* processes partecipating in coarse problem receive matrix data from their friends */
4393:         for (i=0;i<count_recv;i++) {
4394:           MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);
4395:         }
4396:         if (rank_coarse_proc_send_to != MPI_PROC_NULL) {
4397:           send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
4398:           MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);
4399:         }
4400:         MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);
4401:         /* nonzeros */
4402:         PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);
4403:         PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));
4404:         /* use aux_ins_indices to realize a global to local mapping */
4405:         j=0;
4406:         for (i=0;i<pcbddc->coarse_size;i++) {
4407:           if (aux_ins_indices[i]==0) {
4408:             aux_ins_indices[i]=-1;
4409:           } else {
4410:             aux_ins_indices[i]=j;
4411:             j++;
4412:           }
4413:         }
4414:         for (i=0;i<count_recv;i++) {
4415:           j = pcbddc->local_primal_sizes[ranks_recv[i]];
4416:           for (k=0;k<j;k++) {
4417:             dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j;
4418:           }
4419:         }
4420:         /* check */
4421:         for (i=0;i<ins_local_primal_size;i++) {
4422:           if (dnz[i] > ins_local_primal_size) {
4423:             dnz[i] = ins_local_primal_size;
4424:           }
4425:         }
4426:         PetscFree(requests);
4427:         PetscFree(aux_ins_indices);
4428:         if (coarse_color == 0) { PetscFree(ranks_recv); }
4429:       }
4430:       /* create local to global mapping needed by coarse MATIS */
4431:       if (coarse_comm != MPI_COMM_NULL) {MPI_Comm_free(&coarse_comm);}
4432:       coarse_comm = prec_comm;
4433:       active_rank = rank_prec_comm;
4434:       ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);
4435:       ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);
4436:       ISDestroy(&coarse_IS);
4437:     } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) {
4438:       /* arrays for values insertion */
4439:       ins_local_primal_size = pcbddc->local_primal_size;
4440:       PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4441:       PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);
4442:       for (j=0;j<ins_local_primal_size;j++) {
4443:         ins_local_primal_indices[j]=pcbddc->local_primal_indices[j];
4444:         for (i=0;i<ins_local_primal_size;i++) {
4445:           ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
4446:         }
4447:       }
4448:     }
4449:     break;
4450:   }

4452:   case (GATHERS_BDDC):
4453:   {
4454:     PetscMPIInt mysize,mysize2;
4455:     PetscMPIInt *send_buffer;

4457:     if (rank_prec_comm==active_rank) {
4458:       PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);
4459:       PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);
4460:       PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localsizes2);
4461:       PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localdispl2);
4462:       /* arrays for values insertion */
4463:       for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i];
4464:       localdispl2[0]=0;
4465:       for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1];
4466:       j = 0;
4467:       for (i=0;i<size_prec_comm;i++) j+=localsizes2[i];
4468:       PetscMalloc (j*sizeof(PetscScalar),&temp_coarse_mat_vals);
4469:     }

4471:     mysize =pcbddc->local_primal_size;
4472:     mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size;
4473:     PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);
4474:     for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];

4476:     if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC) {
4477:       MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);
4478:       MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);
4479:     } else {
4480:       MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);
4481:       MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);
4482:     }
4483:     PetscFree(send_buffer);
4484:     break;
4485:   }/* switch on coarse problem and communications associated with finished */
4486:   }

4488:   /* Now create and fill up coarse matrix */
4489:   if (rank_prec_comm == active_rank) {

4491:     Mat matis_coarse_local_mat;

4493:     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
4494:       MatCreate(coarse_comm,&pcbddc->coarse_mat);
4495:       MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);
4496:       MatSetType(pcbddc->coarse_mat,coarse_mat_type);
4497:       MatSetUp(pcbddc->coarse_mat);
4498:       MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE); /* local values stored in column major */
4499:       MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
4500:     } else {
4501:       MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);
4502:       MatSetUp(pcbddc->coarse_mat);
4503:       MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);
4504:       MatSetUp(matis_coarse_local_mat);
4505:       MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE); /* local values stored in column major */
4506:       MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
4507:     }
4508:     /* preallocation */
4509:     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {

4511:       PetscInt lrows,lcols;

4513:       MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);
4514:       MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);

4516:       if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {

4518:         Vec         vec_dnz,vec_onz;
4519:         PetscScalar *my_dnz,*my_onz,*array;
4520:         PetscInt    *mat_ranges,*row_ownership;
4521:         PetscInt    coarse_index_row,coarse_index_col,owner;

4523:         VecCreate(prec_comm,&vec_dnz);
4524:         VecSetSizes(vec_dnz,PETSC_DECIDE,pcbddc->coarse_size);
4525:         VecSetType(vec_dnz,VECMPI);
4526:         VecDuplicate(vec_dnz,&vec_onz);

4528:         PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);
4529:         PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);
4530:         PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));
4531:         PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));

4533:         PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&row_ownership);
4534:         MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);
4535:         for (i=0; i<size_prec_comm; i++) {
4536:           for (j=mat_ranges[i]; j<mat_ranges[i+1]; j++) {
4537:             row_ownership[j]=i;
4538:           }
4539:         }

4541:         for (i=0; i<pcbddc->local_primal_size; i++) {
4542:           coarse_index_row = pcbddc->local_primal_indices[i];
4543:           owner = row_ownership[coarse_index_row];
4544:           for (j=i; j<pcbddc->local_primal_size; j++) {
4545:             owner = row_ownership[coarse_index_row];
4546:             coarse_index_col = pcbddc->local_primal_indices[j];
4547:             if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1]) {
4548:               my_dnz[i] += 1.0;
4549:             } else {
4550:               my_onz[i] += 1.0;
4551:             }
4552:             if (i != j) {
4553:               owner = row_ownership[coarse_index_col];
4554:               if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1]) {
4555:                 my_dnz[j] += 1.0;
4556:               } else {
4557:                 my_onz[j] += 1.0;
4558:               }
4559:             }
4560:           }
4561:         }
4562:         VecSet(vec_dnz,0.0);
4563:         VecSet(vec_onz,0.0);
4564:         if (pcbddc->local_primal_size) {
4565:           VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);
4566:           VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);
4567:         }
4568:         VecAssemblyBegin(vec_dnz);
4569:         VecAssemblyBegin(vec_onz);
4570:         VecAssemblyEnd(vec_dnz);
4571:         VecAssemblyEnd(vec_onz);
4572:         j    = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm];
4573:         VecGetArray(vec_dnz,&array);
4574:         for (i=0; i<j; i++) dnz[i] = (PetscInt)array[i];

4576:         VecRestoreArray(vec_dnz,&array);
4577:         VecGetArray(vec_onz,&array);
4578:         for (i=0;i<j;i++) onz[i] = (PetscInt)array[i];

4580:         VecRestoreArray(vec_onz,&array);
4581:         PetscFree(my_dnz);
4582:         PetscFree(my_onz);
4583:         PetscFree(row_ownership);
4584:         VecDestroy(&vec_dnz);
4585:         VecDestroy(&vec_onz);
4586:       } else {
4587:         for (k=0;k<size_prec_comm;k++) {
4588:           offset=pcbddc->local_primal_displacements[k];
4589:           offset2=localdispl2[k];
4590:           ins_local_primal_size = pcbddc->local_primal_sizes[k];
4591:           PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4592:           for (j=0;j<ins_local_primal_size;j++) {
4593:             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
4594:           }
4595:           for (j=0;j<ins_local_primal_size;j++) {
4596:             MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);
4597:           }
4598:           PetscFree(ins_local_primal_indices);
4599:         }
4600:       }

4602:       /* check */
4603:       for (i=0;i<lrows;i++) {
4604:         if (dnz[i]>lcols) dnz[i]=lcols;
4605:         if (onz[i]>pcbddc->coarse_size-lcols) onz[i]=pcbddc->coarse_size-lcols;
4606:       }
4607:       MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);
4608:       MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);
4609:       MatPreallocateFinalize(dnz,onz);
4610:     } else {
4611:       MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);
4612:       PetscFree(dnz);
4613:     }

4615:     /* insert values */
4616:     if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
4617:       MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);
4618:     } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
4619:       if (pcbddc->coarsening_ratio == 1) {
4620:         ins_coarse_mat_vals = coarse_submat_vals;
4621:         MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);
4622:       } else {
4623:         PetscFree(ins_local_primal_indices);
4624:         for (k=0; k<pcbddc->replicated_primal_size; k++) {
4625:           offset                = pcbddc->local_primal_displacements[k];
4626:           offset2               = localdispl2[k];
4627:           ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k];
4628:           PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4629:           for (j=0; j<ins_local_primal_size; j++) {
4630:             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
4631:           }
4632:           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
4633:           MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);
4634:           PetscFree(ins_local_primal_indices);
4635:         }
4636:       }
4637:       ins_local_primal_indices = 0;
4638:       ins_coarse_mat_vals      = 0;
4639:     } else {
4640:       for (k=0; k<size_prec_comm; k++) {
4641:         offset                =pcbddc->local_primal_displacements[k];
4642:         offset2               =localdispl2[k];
4643:         ins_local_primal_size = pcbddc->local_primal_sizes[k];
4644:         PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4645:         for (j=0; j<ins_local_primal_size; j++) {
4646:           ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
4647:         }
4648:         ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
4649:         MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);
4650:         PetscFree(ins_local_primal_indices);
4651:       }
4652:       ins_local_primal_indices = 0;
4653:       ins_coarse_mat_vals      = 0;
4654:     }
4655:     MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);
4656:     MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);
4657:     /* symmetry of coarse matrix */
4658:     if (issym) {
4659:       MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);
4660:     }
4661:     MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);
4662:   }

4664:   /* create loc to glob scatters if needed */
4665:   if (pcbddc->coarse_communications_type == SCATTERS_BDDC) {
4666:     IS local_IS,global_IS;
4667:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);
4668:     ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);
4669:     VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);
4670:     ISDestroy(&local_IS);
4671:     ISDestroy(&global_IS);
4672:   }

4674:   /* free memory no longer needed */
4675:   if (coarse_ISLG)              { ISLocalToGlobalMappingDestroy(&coarse_ISLG); }
4676:   if (ins_local_primal_indices) { PetscFree(ins_local_primal_indices); }
4677:   if (ins_coarse_mat_vals)      { PetscFree(ins_coarse_mat_vals); }
4678:   if (localsizes2)              { PetscFree(localsizes2); }
4679:   if (localdispl2)              { PetscFree(localdispl2); }
4680:   if (temp_coarse_mat_vals)     { PetscFree(temp_coarse_mat_vals); }

4682:   /* Eval coarse null space */
4683:   if (pcbddc->NullSpace) {
4684:     const Vec *nsp_vecs;
4685:     PetscInt  nsp_size,coarse_nsp_size;
4686:     PetscBool nsp_has_cnst;
4687:     PetscReal test_null;
4688:     Vec       *coarse_nsp_vecs;

4690:     coarse_nsp_size = 0;
4691:     coarse_nsp_vecs = 0;
4692:     MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);
4693:     if (rank_prec_comm == active_rank) {
4694:       PetscMalloc((nsp_size+1)*sizeof(Vec),&coarse_nsp_vecs);
4695:       for (i=0; i<nsp_size+1; i++) {
4696:         VecDuplicate(pcbddc->coarse_vec,&coarse_nsp_vecs[i]);
4697:       }
4698:     }
4699:     if (nsp_has_cnst) {
4700:       VecSet(pcis->vec1_N,1.0);
4701:       MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
4702:       PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
4703:       PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
4704:       if (rank_prec_comm == active_rank) {
4705:         MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);
4706:         VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&test_null);
4707:         if (test_null > 1.0e-12 && pcbddc->dbg_flag) {
4708:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Constant coarse null space error % 1.14e\n",test_null);
4709:         }
4710:         VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);
4711:         coarse_nsp_size++;
4712:       }
4713:     }
4714:     for (i=0; i<nsp_size; i++)  {
4715:       VecScatterBegin(matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4716:       VecScatterEnd  (matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4717:       MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
4718:       PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
4719:       PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
4720:       if (rank_prec_comm == active_rank) {
4721:         MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);
4722:         VecNorm(pcbddc->coarse_rhs,NORM_2,&test_null);
4723:         if (test_null > 1.0e-12 && pcbddc->dbg_flag) {
4724:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Vec %d coarse null space error % 1.14e\n",i,test_null);
4725:         }
4726:         VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);
4727:         coarse_nsp_size++;
4728:       }
4729:     }
4730:     if (coarse_nsp_size > 0) {
4731:       /* TODO orthonormalize vecs */
4732:       VecNormalize(coarse_nsp_vecs[0],NULL);
4733:       MatNullSpaceCreate(coarse_comm,PETSC_FALSE,coarse_nsp_size,coarse_nsp_vecs,&pcbddc->CoarseNullSpace);
4734:       for (i=0; i<nsp_size+1; i++) {
4735:         VecDestroy(&coarse_nsp_vecs[i]);
4736:       }
4737:     }
4738:     PetscFree(coarse_nsp_vecs);
4739:   }

4741:   /* KSP for coarse problem */
4742:   if (rank_prec_comm == active_rank) {
4743:     PetscBool isbddc=PETSC_FALSE;

4745:     KSPCreate(coarse_comm,&pcbddc->coarse_ksp);
4746:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
4747:     KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);
4748:     KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);
4749:     KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
4750:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4751:     PCSetType(pc_temp,coarse_pc_type);
4752:     /* Allow user's customization */
4753:     KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");
4754:     /* Set Up PC for coarse problem BDDC */
4755:     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
4756:       i    = pcbddc->current_level+1;
4757:       PCBDDCSetLevel(pc_temp,i);
4758:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
4759:       PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);
4760:       PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);
4761:       if (pcbddc->CoarseNullSpace) { PCBDDCSetNullSpace(pc_temp,pcbddc->CoarseNullSpace); }
4762:       if (dbg_flag) {
4763:         PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);
4764:         PetscViewerFlush(viewer);
4765:       }
4766:     }
4767:     KSPSetFromOptions(pcbddc->coarse_ksp);
4768:     KSPSetUp(pcbddc->coarse_ksp);

4770:     KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);
4771:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4772:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
4773:     if (j == 1) {
4774:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
4775:       if (isbddc) {
4776:         PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);
4777:       }
4778:     }
4779:   }
4780:   /* Check coarse problem if requested */
4781:   if (dbg_flag && rank_prec_comm == active_rank) {
4782:     KSP       check_ksp;
4783:     PC        check_pc;
4784:     Vec       check_vec;
4785:     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
4786:     KSPType   check_ksp_type;

4788:     /* Create ksp object suitable for extreme eigenvalues' estimation */
4789:     KSPCreate(coarse_comm,&check_ksp);
4790:     KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);
4791:     KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
4792:     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
4793:       if (issym) check_ksp_type = KSPCG;
4794:       else check_ksp_type = KSPGMRES;
4795:       KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);
4796:     } else {
4797:       check_ksp_type = KSPPREONLY;
4798:     }
4799:     KSPSetType(check_ksp,check_ksp_type);
4800:     KSPGetPC(pcbddc->coarse_ksp,&check_pc);
4801:     KSPSetPC(check_ksp,check_pc);
4802:     KSPSetUp(check_ksp);
4803:     /* create random vec */
4804:     VecDuplicate(pcbddc->coarse_vec,&check_vec);
4805:     VecSetRandom(check_vec,NULL);
4806:     if (pcbddc->CoarseNullSpace) { MatNullSpaceRemove(pcbddc->CoarseNullSpace,check_vec,NULL); }
4807:     MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);
4808:     /* solve coarse problem */
4809:     KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
4810:     if (pcbddc->CoarseNullSpace) { MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL); }
4811:     /* check coarse problem residual error */
4812:     VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);
4813:     VecNorm(check_vec,NORM_INFINITY,&infty_error);
4814:     MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);
4815:     VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);
4816:     VecDestroy(&check_vec);
4817:     /* get eigenvalue estimation if inexact */
4818:     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
4819:       KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);
4820:       KSPGetIterationNumber(check_ksp,&k);
4821:       PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);
4822:       PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);
4823:     }
4824:     PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);
4825:     PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);
4826:     KSPDestroy(&check_ksp);
4827:   }
4828:   if (dbg_flag) { PetscViewerFlush(viewer); }
4829:   return(0);
4830: }

4834: static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc)
4835: {
4836:   PC_BDDC        *pcbddc  = (PC_BDDC*)pc->data;
4837:   PC_IS          *pcis    = (PC_IS*)pc->data;
4838:   Mat_IS         *matis   = (Mat_IS*)pc->pmat->data;
4839:   PCBDDCGraph    mat_graph=pcbddc->mat_graph;
4840:   PetscInt       *is_indices,*auxis;
4841:   PetscInt       bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize;
4842:   PetscInt       total_counts,nodes_touched,where_values=1,vertex_size;
4843:   PetscMPIInt    adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0;
4844:   PetscBool      same_set;
4845:   MPI_Comm       interface_comm;
4846:   PetscBool      use_faces = PETSC_FALSE,use_edges = PETSC_FALSE;
4847:   const PetscInt *neumann_nodes;
4848:   const PetscInt *dirichlet_nodes;
4849:   IS             used_IS,*custom_ISForDofs;
4850:   PetscScalar    *array;
4851:   PetscScalar    *array2;
4852:   PetscViewer    viewer=pcbddc->dbg_viewer;
4853:   PetscInt       *queue_in_global_numbering;

4856:   PetscObjectGetComm((PetscObject)pc,&interface_comm);
4857:   /* Setup local adjacency graph */
4858:   mat_graph->nvtxs=pcis->n;
4859:   if (!mat_graph->xadj) NEUMANNCNT = 1;
4860:   PCBDDCSetupLocalAdjacencyGraph(pc);

4862:   i    = mat_graph->nvtxs;
4863:   PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);
4864:   PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);
4865:   PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));
4866:   PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));
4867:   PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));
4868:   PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));
4869:   PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));

4871:   /* Setting dofs splitting in mat_graph->which_dof
4872:      Get information about dofs' splitting if provided by the user
4873:      Otherwise it assumes a constant block size */
4874:   vertex_size=0;
4875:   if (!pcbddc->n_ISForDofs) {
4876:     MatGetBlockSize(matis->A,&bs);
4877:     PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);
4878:     for (i=0; i<bs; i++) {
4879:       ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);
4880:     }
4881:     PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);
4882:     vertex_size=1;
4883:     /* remove my references to IS objects */
4884:     for (i=0; i<bs; i++) {
4885:       ISDestroy(&custom_ISForDofs[i]);
4886:     }
4887:     PetscFree(custom_ISForDofs);
4888:   }
4889:   for (i=0; i<pcbddc->n_ISForDofs; i++) {
4890:     ISGetSize(pcbddc->ISForDofs[i],&k);
4891:     ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);
4892:     for (j=0; j<k; j++) mat_graph->which_dof[is_indices[j]]=i;
4893:     ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);
4894:   }
4895:   /* use mat block size as vertex size if it has not yet set */
4896:   if (!vertex_size) {
4897:     MatGetBlockSize(matis->A,&vertex_size);
4898:   }

4900:   /* count number of neigh per node */
4901:   total_counts=0;
4902:   for (i=1; i<pcis->n_neigh; i++) {
4903:     s             = pcis->n_shared[i];
4904:     total_counts += s;
4905:     for (j=0;j<s;j++) mat_graph->count[pcis->shared[i][j]] += 1;
4906:   }

4908:   /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */
4909:   PCBDDCGetNeumannBoundaries(pc,&used_IS);
4910:   VecSet(pcis->vec1_N,0.0);
4911:   VecGetArray(pcis->vec1_N,&array);
4912:   if (used_IS) {
4913:     ISGetSize(used_IS,&neumann_bsize);
4914:     ISGetIndices(used_IS,&neumann_nodes);
4915:     for (i=0; i<neumann_bsize; i++) {
4916:       iindex = neumann_nodes[i];
4917:       if (mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0) {
4918:         mat_graph->count[iindex]+=1;
4919:         total_counts++;
4920:         array[iindex]=array[iindex]+1.0;
4921:       } else if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex);
4922:     }
4923:   }
4924:   VecRestoreArray(pcis->vec1_N,&array);
4925:   /* allocate space for storing the set of neighbours for each node */
4926:   PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);
4927:   if (mat_graph->nvtxs) { PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]); }
4928:   for (i=1; i<mat_graph->nvtxs; i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1];
4929:   PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));
4930:   for (i=1; i<pcis->n_neigh; i++) {
4931:     s=pcis->n_shared[i];
4932:     for (j=0; j<s; j++) {
4933:       k=pcis->shared[i][j];

4935:       mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i];

4937:       mat_graph->count[k]+=1;
4938:     }
4939:   }
4940:   /* Check consistency of Neumann nodes */
4941:   VecSet(pcis->vec1_global,0.0);
4942:   VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4943:   VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4944:   VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4945:   VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4946:   VecGetArray(pcis->vec1_N,&array);
4947:   /* set -1 fake neighbour to mimic Neumann boundary */
4948:   if (used_IS) {
4949:     for (i=0; i<neumann_bsize; i++) {
4950:       iindex = neumann_nodes[i];
4951:       if (mat_graph->count[iindex] > NEUMANNCNT) {
4952:         if (mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]);
4953:         mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1;

4955:         mat_graph->count[iindex]+=1;
4956:       }
4957:     }
4958:     ISRestoreIndices(used_IS,&neumann_nodes);
4959:   }
4960:   VecRestoreArray(pcis->vec1_N,&array);
4961:   /* sort set of sharing subdomains */
4962:   for (i=0;i<mat_graph->nvtxs;i++) {
4963:     PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);
4964:   }

4966:   /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */
4967:   for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE;
4968:   nodes_touched=0;

4970:   PCBDDCGetDirichletBoundaries(pc,&used_IS);
4971:   VecSet(pcis->vec2_N,0.0);
4972:   VecGetArray(pcis->vec1_N,&array);
4973:   VecGetArray(pcis->vec2_N,&array2);
4974:   if (used_IS) {
4975:     ISGetSize(used_IS,&dirichlet_bsize);
4976:     if (dirichlet_bsize && matis->pure_neumann) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n");
4977:     ISGetIndices(used_IS,&dirichlet_nodes);
4978:     for (i=0; i<dirichlet_bsize; i++) {
4979:       iindex = dirichlet_nodes[i];
4980:       if (mat_graph->count[iindex] && !mat_graph->touched[iindex]) {
4981:         if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex);
4982:         mat_graph->touched[iindex] = PETSC_TRUE;
4983:         mat_graph->where[iindex]   = 0;
4984:         nodes_touched++;
4985:         array2[iindex] = array2[iindex]+1.0;
4986:       }
4987:     }
4988:     ISRestoreIndices(used_IS,&dirichlet_nodes);
4989:   }
4990:   VecRestoreArray(pcis->vec1_N,&array);
4991:   VecRestoreArray(pcis->vec2_N,&array2);

4993:   /* Check consistency of Dirichlet nodes */
4994:   VecSet(pcis->vec1_N,1.0);
4995:   VecSet(pcis->vec1_global,0.0);
4996:   VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4997:   VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4998:   VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4999:   VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
5000:   VecSet(pcis->vec1_global,0.0);
5001:   VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
5002:   VecScatterEnd  (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
5003:   VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
5004:   VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
5005:   VecGetArray(pcis->vec1_N,&array);
5006:   VecGetArray(pcis->vec2_N,&array2);
5007:   if (used_IS) {
5008:     ISGetSize(used_IS,&dirichlet_bsize);
5009:     ISGetIndices(used_IS,&dirichlet_nodes);
5010:     for (i=0; i<dirichlet_bsize; i++) {
5011:       iindex=dirichlet_nodes[i];
5012:       if (array[iindex]>1.0 && array[iindex]!=array2[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]);
5013:     }
5014:     ISRestoreIndices(used_IS,&dirichlet_nodes);
5015:   }
5016:   VecRestoreArray(pcis->vec1_N,&array);
5017:   VecRestoreArray(pcis->vec2_N,&array2);

5019:   for (i=0; i<mat_graph->nvtxs; i++) {
5020:     if (!mat_graph->count[i]) {  /* interior nodes */
5021:       mat_graph->touched[i] = PETSC_TRUE;
5022:       mat_graph->where[i]   = 0;
5023:       nodes_touched++;
5024:     }
5025:   }
5026:   mat_graph->ncmps = 0;

5028:   i=0;
5029:   while (nodes_touched<mat_graph->nvtxs) {
5030:     /*  find first untouched node in local ordering */
5031:     while (mat_graph->touched[i]) i++;
5032:     mat_graph->touched[i]=PETSC_TRUE;
5033:     mat_graph->where[i]  =where_values;
5034:     nodes_touched++;
5035:     /* now find all other nodes having the same set of sharing subdomains */
5036:     for (j=i+1; j<mat_graph->nvtxs; j++) {
5037:       /* check for same number of sharing subdomains and dof number */
5038:       if (!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j]) {
5039:         /* check for same set of sharing subdomains */
5040:         same_set=PETSC_TRUE;
5041:         for (k=0; k<mat_graph->count[j]; k++) {
5042:           if (mat_graph->neighbours_set[i][k] != mat_graph->neighbours_set[j][k]) {
5043:             same_set=PETSC_FALSE;
5044:           }
5045:         }
5046:         /* I found a friend of mine */
5047:         if (same_set) {
5048:           mat_graph->where[j]   = where_values;
5049:           mat_graph->touched[j] = PETSC_TRUE;
5050:           nodes_touched++;
5051:         }
5052:       }
5053:     }
5054:     where_values++;
5055:   }
5056:   where_values--; if (where_values<0) where_values=0;
5057:   PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);
5058:   /* Find connected components defined on the shared interface */
5059:   if (where_values) {
5060:     PCBDDCFindConnectedComponents(mat_graph, where_values);
5061:   }
5062:   PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&queue_in_global_numbering);
5063:   /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */
5064:   for (i=0;i<where_values;i++) {
5065:     /* We are not sure that on a given subset of the local interface,
5066:        two connected components will be the same among sharing subdomains */
5067:     if (mat_graph->where_ncmps[i]>1) {
5068:       adapt_interface=1;
5069:       break;
5070:     }
5071:   }

5073:   MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);
5074:   if (pcbddc->dbg_flag && adapt_interface_reduced) {
5075:     PetscViewerASCIIPrintf(viewer,"Adapting interface\n");
5076:     PetscViewerFlush(viewer);
5077:   }
5078:   if (where_values && adapt_interface_reduced) {
5079:     PetscInt    sum_requests=0,my_rank;
5080:     PetscInt    buffer_size,start_of_recv,size_of_recv,start_of_send;
5081:     PetscInt    temp_buffer_size,ins_val,global_where_counter;
5082:     PetscInt    *cum_recv_counts;
5083:     PetscInt    *where_to_nodes_indices;
5084:     PetscInt    *petsc_buffer;
5085:     PetscMPIInt *recv_buffer;
5086:     PetscMPIInt *recv_buffer_where;
5087:     PetscMPIInt *send_buffer;
5088:     PetscMPIInt size_of_send;
5089:     PetscInt    *sizes_of_sends;
5090:     MPI_Request *send_requests;
5091:     MPI_Request *recv_requests;
5092:     PetscInt    *where_cc_adapt;
5093:     PetscInt    **temp_buffer;
5094:     PetscInt    *nodes_to_temp_buffer_indices;
5095:     PetscInt    *add_to_where;
5096:     PetscInt    *aux_new_xadj,*new_xadj,*new_adjncy;

5098:     /* Retrict adjacency graph using information from connected components */
5099:     PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&aux_new_xadj);
5100:     for (i=0; i<mat_graph->nvtxs; i++) aux_new_xadj[i]=1;
5101:     for (i=0;i<mat_graph->ncmps;i++) {
5102:       k = mat_graph->cptr[i+1]-mat_graph->cptr[i];
5103:       for (j=0;j<k;j++) aux_new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]=k;
5104:     }
5105:     j = 0;
5106:     for (i=0;i<mat_graph->nvtxs;i++) j += aux_new_xadj[i];

5108:     PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&new_xadj);
5109:     PetscMalloc(j*sizeof(PetscInt),&new_adjncy);
5110:     new_xadj[0]=0;
5111:     for (i=0;i<mat_graph->nvtxs;i++) {
5112:       new_xadj[i+1]=new_xadj[i]+aux_new_xadj[i];
5113:       if (aux_new_xadj[i]==1) new_adjncy[new_xadj[i]]=i;
5114:     }
5115:     PetscFree(aux_new_xadj);
5116:     for (i=0; i<mat_graph->ncmps; i++) {
5117:       k = mat_graph->cptr[i+1]-mat_graph->cptr[i];
5118:       for (j=0; j<k; j++) {
5119:         PetscMemcpy(&new_adjncy[new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]],&mat_graph->queue[mat_graph->cptr[i]],k*sizeof(PetscInt));
5120:       }
5121:     }
5122:     PCBDDCSetLocalAdjacencyGraph(pc,mat_graph->nvtxs,new_xadj,new_adjncy,PETSC_OWN_POINTER);
5123:     /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */
5124:     for (i=0; i<mat_graph->ncmps; i++) {
5125:       k    = mat_graph->cptr[i+1]-mat_graph->cptr[i];
5126:       ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);
5127:       PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);
5128:     }
5129:     /* allocate some space */
5130:     MPI_Comm_rank(interface_comm,&my_rank);
5131:     PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);
5132:     PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));
5133:     PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);
5134:     /* first count how many neighbours per connected component I will receive from */
5135:     cum_recv_counts[0]=0;
5136:     for (i=1; i<where_values+1; i++) {
5137:       j=0;
5138:       while (mat_graph->where[j] != i) j++;
5139:       where_to_nodes_indices[i-1]=j;
5140:       if (mat_graph->neighbours_set[j][0]!=-1) cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; /* We don't want sends/recvs_to/from_self -> here I don't count myself  */
5141:       else cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1;
5142:     }
5143:     PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);
5144:     PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);
5145:     PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);
5146:     for (i=0; i<cum_recv_counts[where_values]; i++) {
5147:       send_requests[i]=MPI_REQUEST_NULL;
5148:       recv_requests[i]=MPI_REQUEST_NULL;
5149:     }
5150:     /* exchange with my neighbours the number of my connected components on the shared interface */
5151:     for (i=0; i<where_values; i++) {
5152:       j = where_to_nodes_indices[i];
5153:       k = (mat_graph->neighbours_set[j][0] == -1 ?  1 : 0);
5154:       for (; k<mat_graph->count[j]; k++) {
5155:         MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
5156:         MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
5157:         sum_requests++;
5158:       }
5159:     }
5160:     MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
5161:     MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
5162:     /* determine the connected component I need to adapt */
5163:     PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);
5164:     PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));
5165:     for (i=0; i<where_values; i++) {
5166:       for (j=cum_recv_counts[i]; j<cum_recv_counts[i+1]; j++) {
5167:         /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */
5168:         if (mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1) {
5169:           where_cc_adapt[i]=PETSC_TRUE;
5170:           break;
5171:         }
5172:       }
5173:     }
5174:     buffer_size = 0;
5175:     for (i=0; i<where_values; i++) {
5176:       if (where_cc_adapt[i]) {
5177:         for (j=i; j<mat_graph->ncmps; j++) {
5178:           if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */
5179:             buffer_size += 1 + mat_graph->cptr[j+1]-mat_graph->cptr[j];
5180:           }
5181:         }
5182:       }
5183:     }
5184:     PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);
5185:     /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */
5186:     /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */
5187:     PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);
5188:     PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));

5190:     sum_requests  = 0;
5191:     start_of_send = 0;
5192:     start_of_recv = cum_recv_counts[where_values];
5193:     for (i=0; i<where_values; i++) {
5194:       if (where_cc_adapt[i]) {
5195:         size_of_send=0;
5196:         for (j=i; j<mat_graph->ncmps; j++) {
5197:           if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */
5198:             send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j];
5199:             size_of_send += 1;
5200:             for (k=0; k<mat_graph->cptr[j+1]-mat_graph->cptr[j]; k++) {
5201:               send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k];
5202:             }
5203:             size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j];
5204:           }
5205:         }
5206:         j = where_to_nodes_indices[i];
5207:         k = (mat_graph->neighbours_set[j][0] == -1 ?  1 : 0);
5208:         sizes_of_sends[i]=size_of_send;
5209:         for (; k<mat_graph->count[j]; k++) {
5210:           MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
5211:           MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
5212:           sum_requests++;
5213:         }
5214:         start_of_send+=size_of_send;
5215:       }
5216:     }
5217:     MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
5218:     MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);

5220:     buffer_size=0;

5222:     for (k=0;k<sum_requests;k++) buffer_size += recv_buffer_where[start_of_recv+k];
5223:     PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);
5224:     /* now exchange the data */
5225:     start_of_recv = 0;
5226:     start_of_send = 0;
5227:     sum_requests  = 0;
5228:     for (i=0; i<where_values; i++) {
5229:       if (where_cc_adapt[i]) {
5230:         size_of_send = sizes_of_sends[i];

5232:         j = where_to_nodes_indices[i];
5233:         k = (mat_graph->neighbours_set[j][0] == -1 ?  1 : 0);
5234:         for (; k<mat_graph->count[j]; k++) {
5235:           MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
5236:           size_of_recv  = recv_buffer_where[cum_recv_counts[where_values]+sum_requests];
5237:           MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
5238:           start_of_recv+=size_of_recv;
5239:           sum_requests++;
5240:         }
5241:         start_of_send+=size_of_send;
5242:       }
5243:     }
5244:     MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
5245:     MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
5246:     PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);
5247:     for (k=0;k<start_of_recv;k++) petsc_buffer[k]=(PetscInt)recv_buffer[k];
5248:     for (j=0;j<buffer_size;) {
5249:       ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);
5250:       k    = petsc_buffer[j]+1;
5251:       j   += k;
5252:     }
5253:     sum_requests  = cum_recv_counts[where_values];
5254:     start_of_recv = 0;

5256:     PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);
5257:     global_where_counter=0;
5258:     for (i=0; i<where_values; i++) {
5259:       if (where_cc_adapt[i]) {
5260:         temp_buffer_size=0;
5261:         /* find nodes on the shared interface we need to adapt */
5262:         for (j=0; j<mat_graph->nvtxs; j++) {
5263:           if (mat_graph->where[j]==i+1) {
5264:             nodes_to_temp_buffer_indices[j]=temp_buffer_size;
5265:             temp_buffer_size++;
5266:           } else {
5267:             nodes_to_temp_buffer_indices[j]=-1;
5268:           }
5269:         }

5271:         /* allocate some temporary space */
5272:         PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);
5273:         PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);
5274:         PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));
5275:         for (j=1; j<temp_buffer_size; j++) {
5276:           temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i];
5277:         }
5278:         /* analyze contributions from neighbouring subdomains for i-th conn comp
5279:            temp buffer structure:
5280:            supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4)
5281:            3 neighs procs with structured connected components:
5282:              neigh 0: [0 1 4], [2 3];  (2 connected components)
5283:              neigh 1: [0 1], [2 3 4];  (2 connected components)
5284:              neigh 2: [0 4], [1], [2 3]; (3 connected components)
5285:            tempbuffer (row-oriented) should be filled as:
5286:              [ 0, 0, 0;
5287:                0, 0, 1;
5288:                1, 1, 2;
5289:                1, 1, 2;
5290:                0, 1, 0; ];
5291:            This way we can simply recover the resulting structure account for possible intersections of ccs among neighs.
5292:            The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4];
5293:                                                                                                                                    */
5294:         for (j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) {
5295:           ins_val=0;
5296:           size_of_recv=recv_buffer_where[sum_requests];  /* total size of recv from neighs */
5297:           for (buffer_size=0;buffer_size<size_of_recv;) {  /* loop until all data from neighs has been taken into account */
5298:             for (k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */
5299:               temp_buffer[nodes_to_temp_buffer_indices[petsc_buffer[start_of_recv+buffer_size+k]]][j] = ins_val;
5300:             }
5301:             buffer_size+=k;
5302:             ins_val++;
5303:           }
5304:           start_of_recv+=size_of_recv;
5305:           sum_requests++;
5306:         }
5307:         PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);
5308:         PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));
5309:         for (j=0; j<temp_buffer_size; j++) {
5310:           if (!add_to_where[j]) { /* found a new cc  */
5311:             global_where_counter++;
5312:             add_to_where[j]=global_where_counter;
5313:             for (k=j+1; k<temp_buffer_size; k++) { /* check for other nodes in new cc */
5314:               same_set=PETSC_TRUE;
5315:               for (s=0; s<cum_recv_counts[i+1]-cum_recv_counts[i]; s++) {
5316:                 if (temp_buffer[j][s]!=temp_buffer[k][s]) {
5317:                   same_set=PETSC_FALSE;
5318:                   break;
5319:                 }
5320:               }
5321:               if (same_set) add_to_where[k] = global_where_counter;
5322:             }
5323:           }
5324:         }
5325:         /* insert new data in where array */
5326:         temp_buffer_size=0;
5327:         for (j=0;j<mat_graph->nvtxs;j++) {
5328:           if (mat_graph->where[j]==i+1) {
5329:             mat_graph->where[j]=where_values+add_to_where[temp_buffer_size];
5330:             temp_buffer_size++;
5331:           }
5332:         }
5333:         PetscFree(temp_buffer[0]);
5334:         PetscFree(temp_buffer);
5335:         PetscFree(add_to_where);
5336:       }
5337:     }
5338:     PetscFree(nodes_to_temp_buffer_indices);
5339:     PetscFree(sizes_of_sends);
5340:     PetscFree(send_requests);
5341:     PetscFree(recv_requests);
5342:     PetscFree(petsc_buffer);
5343:     PetscFree(recv_buffer);
5344:     PetscFree(recv_buffer_where);
5345:     PetscFree(send_buffer);
5346:     PetscFree(cum_recv_counts);
5347:     PetscFree(where_to_nodes_indices);
5348:     PetscFree(where_cc_adapt);

5350:     /* We are ready to evaluate consistent connected components on each part of the shared interface */
5351:     if (global_where_counter) {
5352:       for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE;
5353:       global_where_counter=0;
5354:       for (i=0;i<mat_graph->nvtxs;i++) {
5355:         if (mat_graph->where[i] && !mat_graph->touched[i]) {
5356:           global_where_counter++;
5357:           for (j=i+1;j<mat_graph->nvtxs;j++) {
5358:             if (!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) {
5359:               mat_graph->where[j]   = global_where_counter;
5360:               mat_graph->touched[j] = PETSC_TRUE;
5361:             }
5362:           }
5363:           mat_graph->where[i]   = global_where_counter;
5364:           mat_graph->touched[i] = PETSC_TRUE;
5365:         }
5366:       }
5367:       where_values=global_where_counter;
5368:     }
5369:     if (global_where_counter) {
5370:       PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));
5371:       PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));
5372:       PetscFree(mat_graph->where_ncmps);
5373:       PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);
5374:       PCBDDCFindConnectedComponents(mat_graph, where_values);
5375:     }
5376:   } /* Finished adapting interface */

5378:   /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */
5379:   for (i=0; i<mat_graph->ncmps; i++) {
5380:     k    = mat_graph->cptr[i+1]-mat_graph->cptr[i];
5381:     ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);
5382:     PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);
5383:   }

5385:   PetscInt  nfc         = 0;
5386:   PetscInt  nec         = 0;
5387:   PetscInt  nvc         = 0;
5388:   PetscBool twodim_flag = PETSC_FALSE;
5389:   for (i=0; i<mat_graph->ncmps; i++) {
5390:     if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) {
5391:       if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) nfc++; /* 1 neigh Neumann fake included */
5392:       else nec++; /* note that nec will be zero in 2d */
5393:     } else {
5394:       nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i];
5395:     }
5396:   }
5397:   if (!nec) { /* we are in a 2d case -> no faces, only edges */
5398:     nec         = nfc;
5399:     nfc         = 0;
5400:     twodim_flag = PETSC_TRUE;
5401:   }
5402:   /* allocate IS arrays for faces, edges. Vertices need a single index set. */
5403:   k=0;
5404:   for (i=0; i<mat_graph->ncmps; i++) {
5405:     j=mat_graph->cptr[i+1]-mat_graph->cptr[i];
5406:     if (j > k) k=j;

5408:     if (j<=vertex_size) k+=vertex_size;
5409:   }
5410:   PetscMalloc(k*sizeof(PetscInt),&auxis);
5411:   if (!pcbddc->vertices_flag && !pcbddc->edges_flag) {
5412:     PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);
5413:     use_faces = PETSC_TRUE;
5414:   }
5415:   if (!pcbddc->vertices_flag && !pcbddc->faces_flag) {
5416:     PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);
5417:     use_edges = PETSC_TRUE;
5418:   }
5419:   nfc=0;
5420:   nec=0;
5421:   for (i=0; i<mat_graph->ncmps; i++) {
5422:     if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) {
5423:       for (j=0; j<mat_graph->cptr[i+1]-mat_graph->cptr[i]; j++) {
5424:         auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j];
5425:       }
5426:       if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) {
5427:         if (twodim_flag) {
5428:           if (use_edges) {
5429:             ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);
5430:             nec++;
5431:           }
5432:         } else {
5433:           if (use_faces) {
5434:             ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);
5435:             nfc++;
5436:           }
5437:         }
5438:       } else {
5439:         if (use_edges) {
5440:           ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);
5441:           nec++;
5442:         }
5443:       }
5444:     }
5445:   }
5446:   pcbddc->n_ISForFaces = nfc;
5447:   pcbddc->n_ISForEdges = nec;

5449:   nvc = 0;
5450:   if (!pcbddc->constraints_flag) {
5451:     for (i=0; i<mat_graph->ncmps; i++) {
5452:       if (mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size) {
5453:         for (j = mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) {
5454:           auxis[nvc]=mat_graph->queue[j];
5455:           nvc++;
5456:         }
5457:       }
5458:     }
5459:   }

5461:   /* sort vertex set (by local ordering) */
5462:   PetscSortInt(nvc,auxis);
5463:   ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);
5464:   if (pcbddc->dbg_flag) {
5465:     PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");
5466:     PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);
5467:     PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);
5468:     for (i=0; i<mat_graph->ncmps; i++) {
5469:       PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n",
5470:                                                 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);
5471:       PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: ");
5472:       for (j=0; j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) {
5473:         PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]);
5474:       }
5475:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
5476:       for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) {
5477:         PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",mat_graph->queue[j],queue_in_global_numbering[j]);
5478:       }
5479:     }
5480:     PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");
5481:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);
5482:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);
5483:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);
5484:     PetscViewerFlush(viewer);
5485:   }
5486:   PetscFree(auxis);
5487:   PetscFree(queue_in_global_numbering);
5488:   return(0);
5489: }

5491: /* -------------------------------------------------------------------------- */

5493: /* The following code has been adapted from function IsConnectedSubdomain contained
5494:    in source file contig.c of METIS library (version 5.0.1)
5495:    It finds connected components of each partition labeled from 1 to n_dist  */

5499: static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist)
5500: {
5501:   PetscInt  i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid;
5502:   PetscInt  *xadj, *adjncy, *where, *queue;
5503:   PetscInt  *cptr;
5504:   PetscBool *touched;

5507:   nvtxs   = graph->nvtxs;
5508:   xadj    = graph->xadj;
5509:   adjncy  = graph->adjncy;
5510:   where   = graph->where;
5511:   touched = graph->touched;
5512:   queue   = graph->queue;
5513:   cptr    = graph->cptr;

5515:   for (i=0; i<nvtxs; i++) touched[i] = PETSC_FALSE;

5517:   cum_queue = 0;
5518:   ncmps     = 0;

5520:   for (n=0; n<n_dist; n++) {
5521:     pid   = n+1; /* partition labeled by 0 is discarded */
5522:     nleft = 0;
5523:     for (i=0; i<nvtxs; i++) {
5524:       if (where[i] == pid) nleft++;
5525:     }
5526:     for (i=0; i<nvtxs; i++) {
5527:       if (where[i] == pid) break;
5528:     }
5529:     touched[i]       = PETSC_TRUE;
5530:     queue[cum_queue] = i;
5531:     first = 0; last = 1;

5533:     cptr[ncmps] = cum_queue;  /* This actually points to queue */
5534:     ncmps_pid   = 0;

5536:     while (first != nleft) {
5537:       if (first == last) { /* Find another starting vertex */
5538:         cptr[++ncmps] = first+cum_queue;
5539:         ncmps_pid++;
5540:         for (i=0; i<nvtxs; i++) {
5541:           if (where[i] == pid && !touched[i]) break;
5542:         }
5543:         queue[cum_queue+last] = i;
5544:         last++;
5545:         touched[i] = PETSC_TRUE;
5546:       }
5547:       i = queue[cum_queue+first];
5548:       first++;
5549:       for (j=xadj[i]; j<xadj[i+1]; j++) {
5550:         k = adjncy[j];
5551:         if (where[k] == pid && !touched[k]) {
5552:           queue[cum_queue+last] = k;
5553:           last++;
5554:           touched[k] = PETSC_TRUE;
5555:         }
5556:       }
5557:     }
5558:     cptr[++ncmps] = first+cum_queue;
5559:     ncmps_pid++;
5560:     cum_queue             = cptr[ncmps];
5561:     graph->where_ncmps[n] = ncmps_pid;
5562:   }
5563:   graph->ncmps = ncmps;
5564:   return(0);
5565: }