Actual source code: ispai.c

petsc-3.4.4 2014-03-13
  2: /*
  3:    3/99 Modified by Stephen Barnard to support SPAI version 3.0
  4: */

  6: /*
  7:       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
  8:    Code written by Stephen Barnard.

 10:       Note: there is some BAD memory bleeding below!

 12:       This code needs work

 14:    1) get rid of all memory bleeding
 15:    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
 16:       rather than having the sp flag for PC_SPAI
 17:    3) fix to set the block size based on the matrix block size

 19: */

 21: #include <petsc-private/pcimpl.h>        /*I "petscpc.h" I*/
 22:  #include petscspai.h

 24: /*
 25:     These are the SPAI include files
 26: */
 27: EXTERN_C_BEGIN
 28: #define MPI /* required for setting SPAI_Comm correctly in basics.h */
 29: #include <spai.h>
 30: #include <matrix.h>
 31: EXTERN_C_END

 33: extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
 34: extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*);
 35: extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
 36: extern PetscErrorCode MM_to_PETSC(char*,char*,char*);

 38: typedef struct {

 40:   matrix *B;                /* matrix in SPAI format */
 41:   matrix *BT;               /* transpose of matrix in SPAI format */
 42:   matrix *M;                /* the approximate inverse in SPAI format */

 44:   Mat PM;                   /* the approximate inverse PETSc format */

 46:   double epsilon;           /* tolerance */
 47:   int    nbsteps;           /* max number of "improvement" steps per line */
 48:   int    max;               /* max dimensions of is_I, q, etc. */
 49:   int    maxnew;            /* max number of new entries per step */
 50:   int    block_size;        /* constant block size */
 51:   int    cache_size;        /* one of (1,2,3,4,5,6) indicting size of cache */
 52:   int    verbose;           /* SPAI prints timing and statistics */

 54:   int      sp;              /* symmetric nonzero pattern */
 55:   MPI_Comm comm_spai;     /* communicator to be used with spai */
 56: } PC_SPAI;

 58: /**********************************************************************/

 62: static PetscErrorCode PCSetUp_SPAI(PC pc)
 63: {
 64:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
 66:   Mat            AT;

 69:   init_SPAI();

 71:   if (ispai->sp) {
 72:     ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);
 73:   } else {
 74:     /* Use the transpose to get the column nonzero structure. */
 75:     MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);
 76:     ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);
 77:     MatDestroy(&AT);
 78:   }

 80:   /* Destroy the transpose */
 81:   /* Don't know how to do it. PETSc developers? */

 83:   /* construct SPAI preconditioner */
 84:   /* FILE *messages */     /* file for warning messages */
 85:   /* double epsilon */     /* tolerance */
 86:   /* int nbsteps */        /* max number of "improvement" steps per line */
 87:   /* int max */            /* max dimensions of is_I, q, etc. */
 88:   /* int maxnew */         /* max number of new entries per step */
 89:   /* int block_size */     /* block_size == 1 specifies scalar elments
 90:                               block_size == n specifies nxn constant-block elements
 91:                               block_size == 0 specifies variable-block elements */
 92:   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
 93:   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
 94:                               verbose == 1 prints timing and matrix statistics */

 96:   bspai(ispai->B,&ispai->M,
 97:                stdout,
 98:                ispai->epsilon,
 99:                ispai->nbsteps,
100:                ispai->max,
101:                ispai->maxnew,
102:                ispai->block_size,
103:                ispai->cache_size,
104:                ispai->verbose);

106:   ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);

108:   /* free the SPAI matrices */
109:   sp_free_matrix(ispai->B);
110:   sp_free_matrix(ispai->M);
111:   return(0);
112: }

114: /**********************************************************************/

118: static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
119: {
120:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;

124:   /* Now using PETSc's multiply */
125:   MatMult(ispai->PM,xx,y);
126:   return(0);
127: }

129: /**********************************************************************/

133: static PetscErrorCode PCDestroy_SPAI(PC pc)
134: {
136:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;

139:   MatDestroy(&ispai->PM);
140:   MPI_Comm_free(&(ispai->comm_spai));
141:   PetscFree(pc->data);
142:   return(0);
143: }

145: /**********************************************************************/

149: static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
150: {
151:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
153:   PetscBool      iascii;

156:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
157:   if (iascii) {
158:     PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");
159:     PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);
160:     PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);
161:     PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);
162:     PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);
163:     PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);
164:     PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);
165:     PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);
166:     PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);
167:   }
168:   return(0);
169: }

173: static PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
174: {
175:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

178:   ispai->epsilon = epsilon1;
179:   return(0);
180: }

182: /**********************************************************************/

186: static PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
187: {
188:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

191:   ispai->nbsteps = nbsteps1;
192:   return(0);
193: }

195: /**********************************************************************/

197: /* added 1/7/99 g.h. */
200: static PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
201: {
202:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

205:   ispai->max = max1;
206:   return(0);
207: }

209: /**********************************************************************/

213: static PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
214: {
215:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

218:   ispai->maxnew = maxnew1;
219:   return(0);
220: }

222: /**********************************************************************/

226: static PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
227: {
228:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

231:   ispai->block_size = block_size1;
232:   return(0);
233: }

235: /**********************************************************************/

239: static PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
240: {
241:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

244:   ispai->cache_size = cache_size;
245:   return(0);
246: }

248: /**********************************************************************/

252: static PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
253: {
254:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

257:   ispai->verbose = verbose;
258:   return(0);
259: }

261: /**********************************************************************/

265: static PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
266: {
267:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

270:   ispai->sp = sp;
271:   return(0);
272: }

274: /* -------------------------------------------------------------------*/

278: /*@
279:   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner

281:   Input Parameters:
282: + pc - the preconditioner
283: - eps - epsilon (default .4)

285:   Notes:  Espilon must be between 0 and 1. It controls the
286:                  quality of the approximation of M to the inverse of
287:                  A. Higher values of epsilon lead to more work, more
288:                  fill, and usually better preconditioners. In many
289:                  cases the best choice of epsilon is the one that
290:                  divides the total solution time equally between the
291:                  preconditioner and the solver.

293:   Level: intermediate

295: .seealso: PCSPAI, PCSetType()
296:   @*/
297: PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
298: {

302:   PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));
303:   return(0);
304: }

306: /**********************************************************************/

310: /*@
311:   PCSPAISetNBSteps - set maximum number of improvement steps per row in
312:         the SPAI preconditioner

314:   Input Parameters:
315: + pc - the preconditioner
316: - n - number of steps (default 5)

318:   Notes:  SPAI constructs to approximation to every column of
319:                  the exact inverse of A in a series of improvement
320:                  steps. The quality of the approximation is determined
321:                  by epsilon. If an approximation achieving an accuracy
322:                  of epsilon is not obtained after ns steps, SPAI simply
323:                  uses the best approximation constructed so far.

325:   Level: intermediate

327: .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
328: @*/
329: PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
330: {

334:   PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));
335:   return(0);
336: }

338: /**********************************************************************/

340: /* added 1/7/99 g.h. */
343: /*@
344:   PCSPAISetMax - set the size of various working buffers in
345:         the SPAI preconditioner

347:   Input Parameters:
348: + pc - the preconditioner
349: - n - size (default is 5000)

351:   Level: intermediate

353: .seealso: PCSPAI, PCSetType()
354: @*/
355: PetscErrorCode  PCSPAISetMax(PC pc,int max1)
356: {

360:   PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));
361:   return(0);
362: }

364: /**********************************************************************/

368: /*@
369:   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
370:    in SPAI preconditioner

372:   Input Parameters:
373: + pc - the preconditioner
374: - n - maximum number (default 5)

376:   Level: intermediate

378: .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
379: @*/
380: PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
381: {

385:   PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));
386:   return(0);
387: }

389: /**********************************************************************/

393: /*@
394:   PCSPAISetBlockSize - set the block size for the SPAI preconditioner

396:   Input Parameters:
397: + pc - the preconditioner
398: - n - block size (default 1)

400:   Notes: A block
401:                  size of 1 treats A as a matrix of scalar elements. A
402:                  block size of s > 1 treats A as a matrix of sxs
403:                  blocks. A block size of 0 treats A as a matrix with
404:                  variable sized blocks, which are determined by
405:                  searching for dense square diagonal blocks in A.
406:                  This can be very effective for finite-element
407:                  matrices.

409:                  SPAI will convert A to block form, use a block
410:                  version of the preconditioner algorithm, and then
411:                  convert the result back to scalar form.

413:                  In many cases the a block-size parameter other than 1
414:                  can lead to very significant improvement in
415:                  performance.


418:   Level: intermediate

420: .seealso: PCSPAI, PCSetType()
421: @*/
422: PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
423: {

427:   PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));
428:   return(0);
429: }

431: /**********************************************************************/

435: /*@
436:   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner

438:   Input Parameters:
439: + pc - the preconditioner
440: - n -  cache size {0,1,2,3,4,5} (default 5)

442:   Notes:    SPAI uses a hash table to cache messages and avoid
443:                  redundant communication. If suggest always using
444:                  5. This parameter is irrelevant in the serial
445:                  version.

447:   Level: intermediate

449: .seealso: PCSPAI, PCSetType()
450: @*/
451: PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
452: {

456:   PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));
457:   return(0);
458: }

460: /**********************************************************************/

464: /*@
465:   PCSPAISetVerbose - verbosity level for the SPAI preconditioner

467:   Input Parameters:
468: + pc - the preconditioner
469: - n - level (default 1)

471:   Notes: print parameters, timings and matrix statistics

473:   Level: intermediate

475: .seealso: PCSPAI, PCSetType()
476: @*/
477: PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
478: {

482:   PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));
483:   return(0);
484: }

486: /**********************************************************************/

490: /*@
491:   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner

493:   Input Parameters:
494: + pc - the preconditioner
495: - n - 0 or 1

497:   Notes: If A has a symmetric nonzero pattern use -sp 1 to
498:                  improve performance by eliminating some communication
499:                  in the parallel version. Even if A does not have a
500:                  symmetric nonzero pattern -sp 1 may well lead to good
501:                  results, but the code will not follow the published
502:                  SPAI algorithm exactly.


505:   Level: intermediate

507: .seealso: PCSPAI, PCSetType()
508: @*/
509: PetscErrorCode  PCSPAISetSp(PC pc,int sp)
510: {

514:   PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));
515:   return(0);
516: }

518: /**********************************************************************/

520: /**********************************************************************/

524: static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
525: {
526:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
528:   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
529:   double         epsilon1;
530:   PetscBool      flg;

533:   PetscOptionsHead("SPAI options");
534:   PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);
535:   if (flg) {
536:     PCSPAISetEpsilon(pc,epsilon1);
537:   }
538:   PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);
539:   if (flg) {
540:     PCSPAISetNBSteps(pc,nbsteps1);
541:   }
542:   /* added 1/7/99 g.h. */
543:   PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);
544:   if (flg) {
545:     PCSPAISetMax(pc,max1);
546:   }
547:   PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);
548:   if (flg) {
549:     PCSPAISetMaxNew(pc,maxnew1);
550:   }
551:   PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);
552:   if (flg) {
553:     PCSPAISetBlockSize(pc,block_size1);
554:   }
555:   PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);
556:   if (flg) {
557:     PCSPAISetCacheSize(pc,cache_size);
558:   }
559:   PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);
560:   if (flg) {
561:     PCSPAISetVerbose(pc,verbose);
562:   }
563:   PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);
564:   if (flg) {
565:     PCSPAISetSp(pc,sp);
566:   }
567:   PetscOptionsTail();
568:   return(0);
569: }

571: /**********************************************************************/

573: /*MC
574:    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
575:      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)

577:    Options Database Keys:
578: +  -pc_spai_epsilon <eps> - set tolerance
579: .  -pc_spai_nbstep <n> - set nbsteps
580: .  -pc_spai_max <m> - set max
581: .  -pc_spai_max_new <m> - set maxnew
582: .  -pc_spai_block_size <n> - set block size
583: .  -pc_spai_cache_size <n> - set cache size
584: .  -pc_spai_sp <m> - set sp
585: -  -pc_spai_set_verbose <true,false> - verbose output

587:    Notes: This only works with AIJ matrices.

589:    Level: beginner

591:    Concepts: approximate inverse

593: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
594:     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
595:     PCSPAISetVerbose(), PCSPAISetSp()
596: M*/

600: PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
601: {
602:   PC_SPAI        *ispai;

606:   PetscNewLog(pc,PC_SPAI,&ispai);
607:   pc->data = ispai;

609:   pc->ops->destroy         = PCDestroy_SPAI;
610:   pc->ops->apply           = PCApply_SPAI;
611:   pc->ops->applyrichardson = 0;
612:   pc->ops->setup           = PCSetUp_SPAI;
613:   pc->ops->view            = PCView_SPAI;
614:   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;

616:   ispai->epsilon    = .4;
617:   ispai->nbsteps    = 5;
618:   ispai->max        = 5000;
619:   ispai->maxnew     = 5;
620:   ispai->block_size = 1;
621:   ispai->cache_size = 5;
622:   ispai->verbose    = 0;

624:   ispai->sp = 1;
625:   MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));

627:   PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);
628:   PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);
629:   PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);
630:   PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);
631:   PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);
632:   PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);
633:   PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);
634:   PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);
635:   return(0);
636: }

638: /**********************************************************************/

640: /*
641:    Converts from a PETSc matrix to an SPAI matrix
642: */
645: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
646: {
647:   matrix                  *M;
648:   int                     i,j,col;
649:   int                     row_indx;
650:   int                     len,pe,local_indx,start_indx;
651:   int                     *mapping;
652:   PetscErrorCode          ierr;
653:   const int               *cols;
654:   const double            *vals;
655:   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
656:   PetscMPIInt             size,rank;
657:   struct compressed_lines *rows;

660:   MPI_Comm_size(comm,&size);
661:   MPI_Comm_rank(comm,&rank);
662:   MatGetSize(A,&n,&n);
663:   MatGetLocalSize(A,&mnl,&nnl);

665:   /*
666:     not sure why a barrier is required. commenting out
667:   MPI_Barrier(comm);
668:   */

670:   M = new_matrix((SPAI_Comm)comm);

672:   M->n              = n;
673:   M->bs             = 1;
674:   M->max_block_size = 1;

676:   M->mnls          = (int*)malloc(sizeof(int)*size);
677:   M->start_indices = (int*)malloc(sizeof(int)*size);
678:   M->pe            = (int*)malloc(sizeof(int)*n);
679:   M->block_sizes   = (int*)malloc(sizeof(int)*n);
680:   for (i=0; i<n; i++) M->block_sizes[i] = 1;

682:   MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);

684:   M->start_indices[0] = 0;
685:   for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];

687:   M->mnl            = M->mnls[M->myid];
688:   M->my_start_index = M->start_indices[M->myid];

690:   for (i=0; i<size; i++) {
691:     start_indx = M->start_indices[i];
692:     for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i;
693:   }

695:   if (AT) {
696:     M->lines = new_compressed_lines(M->mnls[rank],1);
697:   } else {
698:     M->lines = new_compressed_lines(M->mnls[rank],0);
699:   }

701:   rows = M->lines;

703:   /* Determine the mapping from global indices to pointers */
704:   PetscMalloc(M->n*sizeof(int),&mapping);
705:   pe         = 0;
706:   local_indx = 0;
707:   for (i=0; i<M->n; i++) {
708:     if (local_indx >= M->mnls[pe]) {
709:       pe++;
710:       local_indx = 0;
711:     }
712:     mapping[i] = local_indx + M->start_indices[pe];
713:     local_indx++;
714:   }


717:   PetscMalloc(mnl*sizeof(int),&num_ptr);

719:   /*********************************************************/
720:   /************** Set up the row structure *****************/
721:   /*********************************************************/

723:   /* count number of nonzeros in every row */
724:   MatGetOwnershipRange(A,&rstart,&rend);
725:   for (i=rstart; i<rend; i++) {
726:     MatGetRow(A,i,&num_ptr[i-rstart],NULL,NULL);
727:     MatRestoreRow(A,i,&num_ptr[i-rstart],NULL,NULL);
728:   }

730:   /* allocate buffers */
731:   len = 0;
732:   for (i=0; i<mnl; i++) {
733:     if (len < num_ptr[i]) len = num_ptr[i];
734:   }

736:   for (i=rstart; i<rend; i++) {
737:     row_indx             = i-rstart;
738:     len                  = num_ptr[row_indx];
739:     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
740:     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
741:   }

743:   /* copy the matrix */
744:   for (i=rstart; i<rend; i++) {
745:     row_indx = i - rstart;
746:     MatGetRow(A,i,&nz,&cols,&vals);
747:     for (j=0; j<nz; j++) {
748:       col = cols[j];
749:       len = rows->len[row_indx]++;

751:       rows->ptrs[row_indx][len] = mapping[col];
752:       rows->A[row_indx][len]    = vals[j];
753:     }
754:     rows->slen[row_indx] = rows->len[row_indx];

756:     MatRestoreRow(A,i,&nz,&cols,&vals);
757:   }


760:   /************************************************************/
761:   /************** Set up the column structure *****************/
762:   /*********************************************************/

764:   if (AT) {

766:     /* count number of nonzeros in every column */
767:     for (i=rstart; i<rend; i++) {
768:       MatGetRow(AT,i,&num_ptr[i-rstart],NULL,NULL);
769:       MatRestoreRow(AT,i,&num_ptr[i-rstart],NULL,NULL);
770:     }

772:     /* allocate buffers */
773:     len = 0;
774:     for (i=0; i<mnl; i++) {
775:       if (len < num_ptr[i]) len = num_ptr[i];
776:     }

778:     for (i=rstart; i<rend; i++) {
779:       row_indx = i-rstart;
780:       len      = num_ptr[row_indx];

782:       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
783:     }

785:     /* copy the matrix (i.e., the structure) */
786:     for (i=rstart; i<rend; i++) {
787:       row_indx = i - rstart;
788:       MatGetRow(AT,i,&nz,&cols,&vals);
789:       for (j=0; j<nz; j++) {
790:         col = cols[j];
791:         len = rows->rlen[row_indx]++;

793:         rows->rptrs[row_indx][len] = mapping[col];
794:       }
795:       MatRestoreRow(AT,i,&nz,&cols,&vals);
796:     }
797:   }

799:   PetscFree(num_ptr);
800:   PetscFree(mapping);

802:   order_pointers(M);
803:   M->maxnz = calc_maxnz(M);
804:   *B       = M;
805:   return(0);
806: }

808: /**********************************************************************/

810: /*
811:    Converts from an SPAI matrix B  to a PETSc matrix PB.
812:    This assumes that the the SPAI matrix B is stored in
813:    COMPRESSED-ROW format.
814: */
817: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
818: {
819:   PetscMPIInt    size,rank;
821:   int            m,n,M,N;
822:   int            d_nz,o_nz;
823:   int            *d_nnz,*o_nnz;
824:   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
825:   PetscScalar    val;

828:   MPI_Comm_size(comm,&size);
829:   MPI_Comm_rank(comm,&rank);

831:   m    = n = B->mnls[rank];
832:   d_nz = o_nz = 0;

834:   /* Determine preallocation for MatCreateMPIAIJ */
835:   PetscMalloc(m*sizeof(PetscInt),&d_nnz);
836:   PetscMalloc(m*sizeof(PetscInt),&o_nnz);
837:   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
838:   first_diag_col = B->start_indices[rank];
839:   last_diag_col  = first_diag_col + B->mnls[rank];
840:   for (i=0; i<B->mnls[rank]; i++) {
841:     for (k=0; k<B->lines->len[i]; k++) {
842:       global_col = B->lines->ptrs[i][k];
843:       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
844:       else o_nnz[i]++;
845:     }
846:   }

848:   M = N = B->n;
849:   /* Here we only know how to create AIJ format */
850:   MatCreate(comm,PB);
851:   MatSetSizes(*PB,m,n,M,N);
852:   MatSetType(*PB,MATAIJ);
853:   MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);
854:   MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);

856:   for (i=0; i<B->mnls[rank]; i++) {
857:     global_row = B->start_indices[rank]+i;
858:     for (k=0; k<B->lines->len[i]; k++) {
859:       global_col = B->lines->ptrs[i][k];

861:       val  = B->lines->A[i][k];
862:       MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);
863:     }
864:   }

866:   PetscFree(d_nnz);
867:   PetscFree(o_nnz);

869:   MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);
870:   MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);
871:   return(0);
872: }

874: /**********************************************************************/

876: /*
877:    Converts from an SPAI vector v  to a PETSc vec Pv.
878: */
881: PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
882: {
884:   PetscMPIInt    size,rank;
885:   int            m,M,i,*mnls,*start_indices,*global_indices;

888:   MPI_Comm_size(comm,&size);
889:   MPI_Comm_rank(comm,&rank);

891:   m = v->mnl;
892:   M = v->n;


895:   VecCreateMPI(comm,m,M,Pv);

897:   PetscMalloc(size*sizeof(int),&mnls);
898:   MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);

900:   PetscMalloc(size*sizeof(int),&start_indices);

902:   start_indices[0] = 0;
903:   for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1];

905:   PetscMalloc(v->mnl*sizeof(int),&global_indices);
906:   for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i;

908:   PetscFree(mnls);
909:   PetscFree(start_indices);

911:   VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);
912:   VecAssemblyBegin(*Pv);
913:   VecAssemblyEnd(*Pv);

915:   PetscFree(global_indices);
916:   return(0);
917: }