Actual source code: kaij.c

petsc-master 2019-11-13
Report Typos and Errors

  2: /*
  3:   Defines the basic matrix operations for the KAIJ  matrix storage format.
  4:   This format is used to evaluate matrices of the form:

  6:     [I \otimes S + A \otimes T]

  8:   where
  9:     S is a dense (p \times q) matrix
 10:     T is a dense (p \times q) matrix
 11:     A is an AIJ  (n \times n) matrix
 12:     I is the identity matrix

 14:   The resulting matrix is (np \times nq)

 16:   We provide:
 17:      MatMult()
 18:      MatMultAdd()
 19:      MatInvertBlockDiagonal()
 20:   and
 21:      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)

 23:   This single directory handles both the sequential and parallel codes
 24: */

 26:  #include <../src/mat/impls/kaij/kaij.h>
 27:  #include <../src/mat/utils/freespace.h>
 28:  #include <petsc/private/vecimpl.h>

 30: /*@C
 31:    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix

 33:    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel

 35:    Input Parameter:
 36: .  A - the KAIJ matrix

 38:    Output Parameter:
 39: .  B - the AIJ matrix

 41:    Level: advanced

 43:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 45: .seealso: MatCreateKAIJ()
 46: @*/
 47: PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
 48: {
 50:   PetscBool      ismpikaij,isseqkaij;

 53:   PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
 54:   PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);
 55:   if (ismpikaij) {
 56:     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;

 58:     *B = b->A;
 59:   } else if (isseqkaij) {
 60:     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;

 62:     *B = b->AIJ;
 63:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
 64:   return(0);
 65: }

 67: /*@C
 68:    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix

 70:    Not Collective; the entire S is stored and returned independently on all processes.

 72:    Input Parameter:
 73: .  A - the KAIJ matrix

 75:    Output Parameters:
 76: +  m - the number of rows in S
 77: .  n - the number of columns in S
 78: -  S - the S matrix, in form of a scalar array in column-major format

 80:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

 82:    Level: advanced

 84: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
 85: @*/
 86: PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
 87: {
 88:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
 90:   if (m) *m = b->p;
 91:   if (n) *n = b->q;
 92:   if (S) *S = b->S;
 93:   return(0);
 94: }

 96: /*@C
 97:    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix

 99:    Not Collective; the entire S is stored and returned independently on all processes.

101:    Input Parameter:
102: .  A - the KAIJ matrix

104:    Output Parameters:
105: +  m - the number of rows in S
106: .  n - the number of columns in S
107: -  S - the S matrix, in form of a scalar array in column-major format

109:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

111:    Level: advanced

113: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114: @*/
115: PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116: {
117:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
119:   if (m) *m = b->p;
120:   if (n) *n = b->q;
121:   if (S) *S = b->S;
122:   return(0);
123: }

125: /*@C
126:   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()

128:   Not collective

130:   Input Parameter:
131: . A - the KAIJ matrix

133:   Output Parameter:
134: . S - location of pointer to array obtained with MatKAIJGetS()

136:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137:   If NULL is passed, it will not attempt to zero the array pointer.

139:   Level: advanced
140: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141: @*/
142: PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143: {

147:   if (S) *S = NULL;
148:   PetscObjectStateIncrease((PetscObject)A);
149:   return(0);
150: }

152: /*@C
153:   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()

155:   Not collective

157:   Input Parameter:
158: . A - the KAIJ matrix

160:   Output Parameter:
161: . S - location of pointer to array obtained with MatKAIJGetS()

163:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164:   If NULL is passed, it will not attempt to zero the array pointer.

166:   Level: advanced
167: .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170: {
172:   if (S) *S = NULL;
173:   return(0);
174: }

176: /*@C
177:    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix

179:    Not Collective; the entire T is stored and returned independently on all processes

181:    Input Parameter:
182: .  A - the KAIJ matrix

184:    Output Parameter:
185: +  m - the number of rows in T
186: .  n - the number of columns in T
187: -  T - the T matrix, in form of a scalar array in column-major format

189:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

191:    Level: advanced

193: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196: {
197:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
199:   if (m) *m = b->p;
200:   if (n) *n = b->q;
201:   if (T) *T = b->T;
202:   return(0);
203: }

205: /*@C
206:    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix

208:    Not Collective; the entire T is stored and returned independently on all processes

210:    Input Parameter:
211: .  A - the KAIJ matrix

213:    Output Parameter:
214: +  m - the number of rows in T
215: .  n - the number of columns in T
216: -  T - the T matrix, in form of a scalar array in column-major format

218:    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)

220:    Level: advanced

222: .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223: @*/
224: PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225: {
226:   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
228:   if (m) *m = b->p;
229:   if (n) *n = b->q;
230:   if (T) *T = b->T;
231:   return(0);
232: }

234: /*@C
235:   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()

237:   Not collective

239:   Input Parameter:
240: . A - the KAIJ matrix

242:   Output Parameter:
243: . T - location of pointer to array obtained with MatKAIJGetS()

245:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246:   If NULL is passed, it will not attempt to zero the array pointer.

248:   Level: advanced
249: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250: @*/
251: PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252: {

256:   if (T) *T = NULL;
257:   PetscObjectStateIncrease((PetscObject)A);
258:   return(0);
259: }

261: /*@C
262:   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()

264:   Not collective

266:   Input Parameter:
267: . A - the KAIJ matrix

269:   Output Parameter:
270: . T - location of pointer to array obtained with MatKAIJGetS()

272:   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273:   If NULL is passed, it will not attempt to zero the array pointer.

275:   Level: advanced
276: .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277: @*/
278: PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279: {
281:   if (T) *T = NULL;
282:   return(0);
283: }

285: /*@
286:    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix

288:    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel

290:    Input Parameters:
291: +  A - the KAIJ matrix
292: -  B - the AIJ matrix

294:    Notes:
295:    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296:    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.

298:    Level: advanced

300: .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301: @*/
302: PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303: {
305:   PetscMPIInt    size;

308:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
309:   if (size == 1) {
310:     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311:     a->AIJ = B;
312:   } else {
313:     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314:     a->A = B;
315:   }
316:   PetscObjectReference((PetscObject)B);
317:   return(0);
318: }

320: /*@C
321:    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix

323:    Logically Collective; the entire S is stored independently on all processes.

325:    Input Parameters:
326: +  A - the KAIJ matrix
327: .  p - the number of rows in S
328: .  q - the number of columns in S
329: -  S - the S matrix, in form of a scalar array in column-major format

331:    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 
332:    The S matrix is copied, so the user can destroy this array.

334:    Level: Advanced

336: .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337: @*/
338: PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339: {
341:   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;

344:   PetscFree(a->S);
345:   if (S) {
346:     PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);
347:     PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));
348:   } else  a->S = NULL;

350:   a->p = p;
351:   a->q = q;
352:   return(0);
353: }

355: /*@C
356:    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix

358:    Logically Collective; the entire T is stored independently on all processes.

360:    Input Parameters:
361: +  A - the KAIJ matrix
362: .  p - the number of rows in S
363: .  q - the number of columns in S
364: -  T - the T matrix, in form of a scalar array in column-major format

366:    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 
367:    The T matrix is copied, so the user can destroy this array.

369:    Level: Advanced

371: .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
372: @*/
373: PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
374: {
376:   PetscInt       i,j;
377:   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
378:   PetscBool      isTI = PETSC_FALSE;

381:   /* check if T is an identity matrix */
382:   if (T && (p == q)) {
383:     isTI = PETSC_TRUE;
384:     for (i=0; i<p; i++) {
385:       for (j=0; j<q; j++) {
386:         if (i == j) {
387:           /* diagonal term must be 1 */
388:           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
389:         } else {
390:           /* off-diagonal term must be 0 */
391:           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
392:         }
393:       }
394:     }
395:   }
396:   a->isTI = isTI;

398:   PetscFree(a->T);
399:   if (T && (!isTI)) {
400:     PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);
401:     PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));
402:   } else a->T = NULL;

404:   a->p = p;
405:   a->q = q;
406:   return(0);
407: }

409: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
410: {
412:   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;

415:   MatDestroy(&b->AIJ);
416:   PetscFree(b->S);
417:   PetscFree(b->T);
418:   PetscFree(b->ibdiag);
419:   PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);
420:   PetscFree(A->data);
421:   return(0);
422: }

424: PetscErrorCode MatSetUp_KAIJ(Mat A)
425: {
427:   PetscInt       n;
428:   PetscMPIInt    size;
429:   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;

432:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
433:   if (size == 1) {
434:     MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);
435:     PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
436:     PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
437:     PetscLayoutSetUp(A->rmap);
438:     PetscLayoutSetUp(A->cmap);
439:   } else {
440:     Mat_MPIKAIJ *a;
441:     Mat_MPIAIJ  *mpiaij;
442:     IS          from,to;
443:     Vec         gvec;
444:     PetscScalar *T;
445:     PetscInt    i,j;

447:     a = (Mat_MPIKAIJ*)A->data;
448:     mpiaij = (Mat_MPIAIJ*)a->A->data;
449:     MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);
450:     PetscLayoutSetBlockSize(A->rmap,seqkaij->p);
451:     PetscLayoutSetBlockSize(A->cmap,seqkaij->q);
452:     PetscLayoutSetUp(A->rmap);
453:     PetscLayoutSetUp(A->cmap);

455:     if (a->isTI) {
456:       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
457:        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
458:        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
459:        * to pass in. */
460:       PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);
461:       for (i=0; i<a->p; i++) {
462:         for (j=0; j<a->q; j++) {
463:           if (i==j) T[i+j*a->p] = 1.0;
464:           else      T[i+j*a->p] = 0.0;
465:         }
466:       }
467:     } else T = a->T;
468:     MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);
469:     MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);
470:     if (a->isTI) {
471:       PetscFree(T);
472:     }

474:     VecGetSize(mpiaij->lvec,&n);
475:     VecCreate(PETSC_COMM_SELF,&a->w);
476:     VecSetSizes(a->w,n*a->q,n*a->q);
477:     VecSetBlockSize(a->w,a->q);
478:     VecSetType(a->w,VECSEQ);

480:     /* create two temporary Index sets for build scatter gather */
481:     ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
482:     ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);

484:     /* create temporary global vector to generate scatter context */
485:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);

487:     /* generate the scatter context */
488:     VecScatterCreate(gvec,from,a->w,to,&a->ctx);

490:     ISDestroy(&from);
491:     ISDestroy(&to);
492:     VecDestroy(&gvec);
493:   }

495:   A->assembled = PETSC_TRUE;
496:   return(0);
497: }

499: PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
500: {
501:   PetscViewerFormat format;
502:   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
503:   Mat               B;
504:   PetscInt          i;
505:   PetscErrorCode    ierr;
506:   PetscBool         ismpikaij;

509:   PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);
510:   PetscViewerGetFormat(viewer,&format);
511:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
512:     PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);

514:     /* Print appropriate details for S. */
515:     if (!a->S) {
516:       PetscViewerASCIIPrintf(viewer,"S is NULL\n");
517:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
518:       PetscViewerASCIIPrintf(viewer,"Entries of S are ");
519:       for (i=0; i<(a->p * a->q); i++) {
520: #if defined(PETSC_USE_COMPLEX)
521:         PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));
522: #else
523:         PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));
524: #endif
525:       }
526:       PetscViewerASCIIPrintf(viewer,"\n");
527:     }

529:     /* Print appropriate details for T. */
530:     if (a->isTI) {
531:       PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");
532:     } else if (!a->T) {
533:       PetscViewerASCIIPrintf(viewer,"T is NULL\n");
534:     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
535:       PetscViewerASCIIPrintf(viewer,"Entries of T are ");
536:       for (i=0; i<(a->p * a->q); i++) {
537: #if defined(PETSC_USE_COMPLEX)
538:         PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));
539: #else
540:         PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));
541: #endif
542:       }
543:       PetscViewerASCIIPrintf(viewer,"\n");
544:     }

546:     /* Now print details for the AIJ matrix, using the AIJ viewer. */
547:     PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");
548:     if (ismpikaij) {
549:       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
550:       MatView(b->A,viewer);
551:     } else {
552:       MatView(a->AIJ,viewer);
553:     }

555:   } else {
556:     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
557:     if (ismpikaij) {
558:       MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
559:     } else {
560:       MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
561:     }
562:     MatView(B,viewer);
563:     MatDestroy(&B);
564:   }
565:   return(0);
566: }

568: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
569: {
571:   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;

574:   MatDestroy(&b->AIJ);
575:   MatDestroy(&b->OAIJ);
576:   MatDestroy(&b->A);
577:   VecScatterDestroy(&b->ctx);
578:   VecDestroy(&b->w);
579:   PetscFree(b->S);
580:   PetscFree(b->T);
581:   PetscFree(b->ibdiag);
582:   PetscFree(A->data);
583:   return(0);
584: }

586: /* --------------------------------------------------------------------------------------*/

588: /* zz = yy + Axx */
589: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
590: {
591:   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
592:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
593:   const PetscScalar *s = b->S, *t = b->T;
594:   const PetscScalar *x,*v,*bx;
595:   PetscScalar       *y,*sums;
596:   PetscErrorCode    ierr;
597:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
598:   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;

601:   if (!yy) {
602:     VecSet(zz,0.0);
603:   } else {
604:     VecCopy(yy,zz);
605:   }
606:   if ((!s) && (!t) && (!b->isTI)) return(0);

608:   VecGetArrayRead(xx,&x);
609:   VecGetArray(zz,&y);
610:   idx  = a->j;
611:   v    = a->a;
612:   ii   = a->i;

614:   if (b->isTI) {
615:     for (i=0; i<m; i++) {
616:       jrow = ii[i];
617:       n    = ii[i+1] - jrow;
618:       sums = y + p*i;
619:       for (j=0; j<n; j++) {
620:         for (k=0; k<p; k++) {
621:           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
622:         }
623:       }
624:     }
625:     PetscLogFlops((a->nz)*3*p);
626:   } else if (t) {
627:     for (i=0; i<m; i++) {
628:       jrow = ii[i];
629:       n    = ii[i+1] - jrow;
630:       sums = y + p*i;
631:       for (j=0; j<n; j++) {
632:         for (k=0; k<p; k++) {
633:           for (l=0; l<q; l++) {
634:             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
635:           }
636:         }
637:       }
638:     }
639:     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
640:      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
641:      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
642:      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
643:     PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);
644:   }
645:   if (s) {
646:     for (i=0; i<m; i++) {
647:       sums = y + p*i;
648:       bx   = x + q*i;
649:       if (i < b->AIJ->cmap->n) {
650:         for (j=0; j<q; j++) {
651:           for (k=0; k<p; k++) {
652:             sums[k] += s[k+j*p]*bx[j];
653:           }
654:         }
655:       }
656:     }
657:     PetscLogFlops(m*2*p*q);
658:   }

660:   VecRestoreArrayRead(xx,&x);
661:   VecRestoreArray(zz,&y);
662:   return(0);
663: }

665: PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
666: {
669:   MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);
670:   return(0);
671: }

673:  #include <petsc/private/kernels/blockinvert.h>

675: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
676: {
677:   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
678:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
679:   const PetscScalar *S  = b->S;
680:   const PetscScalar *T  = b->T;
681:   const PetscScalar *v  = a->a;
682:   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
683:   PetscErrorCode    ierr;
684:   PetscInt          i,j,*v_pivots,dof,dof2;
685:   PetscScalar       *diag,aval,*v_work;

688:   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
689:   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");

691:   dof  = p;
692:   dof2 = dof*dof;

694:   if (b->ibdiagvalid) {
695:     if (values) *values = b->ibdiag;
696:     return(0);
697:   }
698:   if (!b->ibdiag) {
699:     PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);
700:     PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));
701:   }
702:   if (values) *values = b->ibdiag;
703:   diag = b->ibdiag;

705:   PetscMalloc2(dof,&v_work,dof,&v_pivots);
706:   for (i=0; i<m; i++) {
707:     if (S) {
708:       PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));
709:     } else {
710:       PetscMemzero(diag,dof2*sizeof(PetscScalar));
711:     }
712:     if (b->isTI) {
713:       aval = 0;
714:       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
715:       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
716:     } else if (T) {
717:       aval = 0;
718:       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
719:       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
720:     }
721:     PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);
722:     diag += dof2;
723:   }
724:   PetscFree2(v_work,v_pivots);

726:   b->ibdiagvalid = PETSC_TRUE;
727:   return(0);
728: }

730: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
731: {
732:   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;

735:   *B = kaij->AIJ;
736:   return(0);
737: }

739: PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
740: {
741:   PetscErrorCode    ierr;
742:   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
743:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
744:   const PetscScalar *aa = a->a, *T = kaij->T, *v;
745:   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
746:   const PetscScalar *b, *xb, *idiag;
747:   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
748:   PetscInt          i, j, k, i2, bs, bs2, nz;

751:   its = its*lits;
752:   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
753:   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
754:   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
755:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
756:   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
757:   else        {bs = p; bs2 = bs*bs; }

759:   if (!m) return(0);

761:   if (!kaij->ibdiagvalid) { MatInvertBlockDiagonal_SeqKAIJ(A,NULL); }
762:   idiag = kaij->ibdiag;
763:   diag  = a->diag;

765:   if (!kaij->sor.setup) {
766:     PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);
767:     kaij->sor.setup = PETSC_TRUE;
768:   }
769:   y     = kaij->sor.y;
770:   w     = kaij->sor.w;
771:   work  = kaij->sor.work;
772:   t     = kaij->sor.t;
773:   arr   = kaij->sor.arr;

775:   VecGetArray(xx,&x);
776:   VecGetArrayRead(bb,&b);

778:   if (flag & SOR_ZERO_INITIAL_GUESS) {
779:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
780:       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
781:        PetscMemcpy(t,b,bs*sizeof(PetscScalar));
782:       i2     =  bs;
783:       idiag  += bs2;
784:       for (i=1; i<m; i++) {
785:         v  = aa + ai[i];
786:         vi = aj + ai[i];
787:         nz = diag[i] - ai[i];

789:         if (T) {                /* b - T (Arow * x) */
790:           PetscMemzero(w,bs*sizeof(PetscScalar));
791:           for (j=0; j<nz; j++) {
792:             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
793:           }
794:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
795:           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
796:         } else if (kaij->isTI) {
797:           PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
798:           for (j=0; j<nz; j++) {
799:             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
800:           }
801:         } else {
802:           PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));
803:         }

805:         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
806:         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];

808:         idiag += bs2;
809:         i2    += bs;
810:       }
811:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
812:       PetscLogFlops(1.0*bs2*a->nz);
813:       xb = t;
814:     } else xb = b;
815:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
816:       idiag = kaij->ibdiag+bs2*(m-1);
817:       i2    = bs * (m-1);
818:       PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
819:       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
820:       i2    -= bs;
821:       idiag -= bs2;
822:       for (i=m-2; i>=0; i--) {
823:         v  = aa + diag[i] + 1 ;
824:         vi = aj + diag[i] + 1;
825:         nz = ai[i+1] - diag[i] - 1;

827:         if (T) {                /* FIXME: This branch untested */
828:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
829:           /* copy all rows of x that are needed into contiguous space */
830:           workt = work;
831:           for (j=0; j<nz; j++) {
832:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
833:             workt += bs;
834:           }
835:           arrt = arr;
836:           for (j=0; j<nz; j++) {
837:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
838:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
839:             arrt += bs2;
840:           }
841:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
842:         } else if (kaij->isTI) {
843:           PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));
844:           for (j=0; j<nz; j++) {
845:             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
846:           }
847:         }

849:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
850:         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];

852:         idiag -= bs2;
853:         i2    -= bs;
854:       }
855:       PetscLogFlops(1.0*bs2*(a->nz));
856:     }
857:     its--;
858:   }
859:   while (its--) {               /* FIXME: This branch not updated */
860:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
861:       i2     =  0;
862:       idiag  = kaij->ibdiag;
863:       for (i=0; i<m; i++) {
864:         PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));

866:         v  = aa + ai[i];
867:         vi = aj + ai[i];
868:         nz = diag[i] - ai[i];
869:         workt = work;
870:         for (j=0; j<nz; j++) {
871:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
872:           workt += bs;
873:         }
874:         arrt = arr;
875:         if (T) {
876:           for (j=0; j<nz; j++) {
877:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
878:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
879:             arrt += bs2;
880:           }
881:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
882:         } else if (kaij->isTI) {
883:           for (j=0; j<nz; j++) {
884:             PetscMemzero(arrt,bs2*sizeof(PetscScalar));
885:             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
886:             arrt += bs2;
887:           }
888:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
889:         }
890:         PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));

892:         v  = aa + diag[i] + 1;
893:         vi = aj + diag[i] + 1;
894:         nz = ai[i+1] - diag[i] - 1;
895:         workt = work;
896:         for (j=0; j<nz; j++) {
897:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
898:           workt += bs;
899:         }
900:         arrt = arr;
901:         if (T) {
902:           for (j=0; j<nz; j++) {
903:             PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
904:             for (k=0; k<bs2; k++) arrt[k] *= v[j];
905:             arrt += bs2;
906:           }
907:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
908:         } else if (kaij->isTI) {
909:           for (j=0; j<nz; j++) {
910:             PetscMemzero(arrt,bs2*sizeof(PetscScalar));
911:             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
912:             arrt += bs2;
913:           }
914:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
915:         }

917:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
918:         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);

920:         idiag += bs2;
921:         i2    += bs;
922:       }
923:       xb = t;
924:     }
925:     else xb = b;
926:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
927:       idiag = kaij->ibdiag+bs2*(m-1);
928:       i2    = bs * (m-1);
929:       if (xb == b) {
930:         for (i=m-1; i>=0; i--) {
931:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
932: 
933:           v  = aa + ai[i];
934:           vi = aj + ai[i];
935:           nz = diag[i] - ai[i];
936:           workt = work;
937:           for (j=0; j<nz; j++) {
938:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
939:             workt += bs;
940:           }
941:           arrt = arr;
942:           if (T) {
943:             for (j=0; j<nz; j++) {
944:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
945:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
946:               arrt += bs2;
947:             }
948:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
949:           } else if (kaij->isTI) {
950:             for (j=0; j<nz; j++) {
951:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
952:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
953:               arrt += bs2;
954:             }
955:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
956:           }

958:           v  = aa + diag[i] + 1;
959:           vi = aj + diag[i] + 1;
960:           nz = ai[i+1] - diag[i] - 1;
961:           workt = work;
962:           for (j=0; j<nz; j++) {
963:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
964:             workt += bs;
965:           }
966:           arrt = arr;
967:           if (T) {
968:             for (j=0; j<nz; j++) {
969:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
970:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
971:               arrt += bs2;
972:             }
973:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
974:           } else if (kaij->isTI) {
975:             for (j=0; j<nz; j++) {
976:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
977:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
978:               arrt += bs2;
979:             }
980:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
981:           }

983:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
984:           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
985:         }
986:       } else {
987:         for (i=m-1; i>=0; i--) {
988:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
989:           v  = aa + diag[i] + 1;
990:           vi = aj + diag[i] + 1;
991:           nz = ai[i+1] - diag[i] - 1;
992:           workt = work;
993:           for (j=0; j<nz; j++) {
994:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
995:             workt += bs;
996:           }
997:           arrt = arr;
998:           if (T) {
999:             for (j=0; j<nz; j++) {
1000:               PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));
1001:               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1002:               arrt += bs2;
1003:             }
1004:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1005:           } else if (kaij->isTI) {
1006:             for (j=0; j<nz; j++) {
1007:               PetscMemzero(arrt,bs2*sizeof(PetscScalar));
1008:               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1009:               arrt += bs2;
1010:             }
1011:             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1012:           }
1013:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1014:           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1015:         }
1016:       }
1017:       PetscLogFlops(1.0*bs2*(a->nz));
1018:     }
1019:   }

1021:   VecRestoreArray(xx,&x);
1022:   VecRestoreArrayRead(bb,&b);
1023:   return(0);
1024: }

1026: /*===================================================================================*/

1028: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1029: {
1030:   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;

1034:   if (!yy) {
1035:     VecSet(zz,0.0);
1036:   } else {
1037:     VecCopy(yy,zz);
1038:   }
1039:   /* start the scatter */
1040:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1041:   (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);
1042:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
1043:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1044:   return(0);
1045: }

1047: PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1048: {
1051:   MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);
1052:   return(0);
1053: }

1055: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1056: {
1057:   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1058:   PetscErrorCode  ierr;

1061:   (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);
1062:   return(0);
1063: }

1065: /* ----------------------------------------------------------------*/

1067: PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1068: {
1069:   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1070:   PetscErrorCode  diag = PETSC_FALSE;
1071:   PetscErrorCode  ierr;
1072:   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1073:   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;

1076:   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1077:   b->getrowactive = PETSC_TRUE;
1078:   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);

1080:   if ((!S) && (!T) && (!b->isTI)) {
1081:     if (ncols)    *ncols  = 0;
1082:     if (cols)     *cols   = NULL;
1083:     if (values)   *values = NULL;
1084:     return(0);
1085:   }

1087:   if (T || b->isTI) {
1088:     MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);
1089:     c     = nzaij;
1090:     for (i=0; i<nzaij; i++) {
1091:       /* check if this row contains a diagonal entry */
1092:       if (colsaij[i] == r) {
1093:         diag = PETSC_TRUE;
1094:         c = i;
1095:       }
1096:     }
1097:   } else nzaij = c = 0;

1099:   /* calculate size of row */
1100:   nz = 0;
1101:   if (S)            nz += q;
1102:   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);

1104:   if (cols || values) {
1105:     PetscMalloc2(nz,&idx,nz,&v);
1106:     for (i=0; i<q; i++) {
1107:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1108:       v[i] = 0.0;
1109:     }
1110:     if (b->isTI) {
1111:       for (i=0; i<nzaij; i++) {
1112:         for (j=0; j<q; j++) {
1113:           idx[i*q+j] = colsaij[i]*q+j;
1114:           v[i*q+j]   = (j==s ? vaij[i] : 0);
1115:         }
1116:       }
1117:     } else if (T) {
1118:       for (i=0; i<nzaij; i++) {
1119:         for (j=0; j<q; j++) {
1120:           idx[i*q+j] = colsaij[i]*q+j;
1121:           v[i*q+j]   = vaij[i]*T[s+j*p];
1122:         }
1123:       }
1124:     }
1125:     if (S) {
1126:       for (j=0; j<q; j++) {
1127:         idx[c*q+j] = r*q+j;
1128:         v[c*q+j]  += S[s+j*p];
1129:       }
1130:     }
1131:   }

1133:   if (ncols)    *ncols  = nz;
1134:   if (cols)     *cols   = idx;
1135:   if (values)   *values = v;
1136:   return(0);
1137: }

1139: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1140: {
1143:   PetscFree2(*idx,*v);
1144:   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1145:   return(0);
1146: }

1148: PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1149: {
1150:   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1151:   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1152:   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1153:   Mat             AIJ     = b->A;
1154:   PetscBool       diag    = PETSC_FALSE;
1155:   PetscErrorCode  ierr;
1156:   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1157:   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1158:   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;

1161:   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1162:   b->getrowactive = PETSC_TRUE;
1163:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1164:   lrow = row - rstart;

1166:   if ((!S) && (!T) && (!b->isTI)) {
1167:     if (ncols)    *ncols  = 0;
1168:     if (cols)     *cols   = NULL;
1169:     if (values)   *values = NULL;
1170:     return(0);
1171:   }

1173:   r = lrow/p;
1174:   s = lrow%p;

1176:   if (T || b->isTI) {
1177:     MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1178:     MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);
1179:     MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);

1181:     c     = ncolsaij + ncolsoaij;
1182:     for (i=0; i<ncolsaij; i++) {
1183:       /* check if this row contains a diagonal entry */
1184:       if (colsaij[i] == r) {
1185:         diag = PETSC_TRUE;
1186:         c = i;
1187:       }
1188:     }
1189:   } else c = 0;

1191:   /* calculate size of row */
1192:   nz = 0;
1193:   if (S)            nz += q;
1194:   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);

1196:   if (cols || values) {
1197:     PetscMalloc2(nz,&idx,nz,&v);
1198:     for (i=0; i<q; i++) {
1199:       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1200:       v[i] = 0.0;
1201:     }
1202:     if (b->isTI) {
1203:       for (i=0; i<ncolsaij; i++) {
1204:         for (j=0; j<q; j++) {
1205:           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1206:           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1207:         }
1208:       }
1209:       for (i=0; i<ncolsoaij; i++) {
1210:         for (j=0; j<q; j++) {
1211:           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1212:           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1213:         }
1214:       }
1215:     } else if (T) {
1216:       for (i=0; i<ncolsaij; i++) {
1217:         for (j=0; j<q; j++) {
1218:           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1219:           v[i*q+j]   = vals[i]*T[s+j*p];
1220:         }
1221:       }
1222:       for (i=0; i<ncolsoaij; i++) {
1223:         for (j=0; j<q; j++) {
1224:           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1225:           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1226:         }
1227:       }
1228:     }
1229:     if (S) {
1230:       for (j=0; j<q; j++) {
1231:         idx[c*q+j] = (r+rstart/p)*q+j;
1232:         v[c*q+j]  += S[s+j*p];
1233:       }
1234:     }
1235:   }

1237:   if (ncols)  *ncols  = nz;
1238:   if (cols)   *cols   = idx;
1239:   if (values) *values = v;
1240:   return(0);
1241: }

1243: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1244: {
1247:   PetscFree2(*idx,*v);
1248:   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1249:   return(0);
1250: }

1252: PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1253: {
1255:   Mat            A;

1258:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
1259:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
1260:   MatDestroy(&A);
1261:   return(0);
1262: }

1264: /* ---------------------------------------------------------------------------------- */
1265: /*@C
1266:   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:

1268:     [I \otimes S + A \otimes T]

1270:   where
1271:     S is a dense (p \times q) matrix
1272:     T is a dense (p \times q) matrix
1273:     A is an AIJ  (n \times n) matrix
1274:     I is the identity matrix
1275:   The resulting matrix is (np \times nq)
1276:   
1277:   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1278:   
1279:   Collective

1281:   Input Parameters:
1282: + A - the AIJ matrix
1283: . p - number of rows in S and T
1284: . q - number of columns in S and T
1285: . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1286: - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)

1288:   Output Parameter:
1289: . kaij - the new KAIJ matrix

1291:   Notes:
1292:   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1293:   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.

1295:   Level: advanced

1297: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1298: @*/
1299: PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1300: {
1302:   PetscMPIInt    size;

1305:   MatCreate(PetscObjectComm((PetscObject)A),kaij);
1306:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1307:   if (size == 1) {
1308:     MatSetType(*kaij,MATSEQKAIJ);
1309:   } else {
1310:     MatSetType(*kaij,MATMPIKAIJ);
1311:   }
1312:   MatKAIJSetAIJ(*kaij,A);
1313:   MatKAIJSetS(*kaij,p,q,S);
1314:   MatKAIJSetT(*kaij,p,q,T);
1315:   MatSetUp(*kaij);
1316:   return(0);
1317: }

1319: /*MC
1320:   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1321:     [I \otimes S + A \otimes T],
1322:   where
1323:     S is a dense (p \times q) matrix,
1324:     T is a dense (p \times q) matrix,
1325:     A is an AIJ  (n \times n) matrix,
1326:     and I is the identity matrix.
1327:   The resulting matrix is (np \times nq).
1328:   
1329:   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.

1331:   Notes:
1332:   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1333:   where x and b are column vectors containing the row-major representations of X and B.

1335:   Level: advanced

1337: .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1338: M*/

1340: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1341: {
1343:   Mat_MPIKAIJ    *b;
1344:   PetscMPIInt    size;

1347:   PetscNewLog(A,&b);
1348:   A->data  = (void*)b;

1350:   PetscMemzero(A->ops,sizeof(struct _MatOps));

1352:   A->ops->setup = MatSetUp_KAIJ;

1354:   b->w    = 0;
1355:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1356:   if (size == 1) {
1357:     PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);
1358:     A->ops->setup               = MatSetUp_KAIJ;
1359:     A->ops->destroy             = MatDestroy_SeqKAIJ;
1360:     A->ops->view                = MatView_KAIJ;
1361:     A->ops->mult                = MatMult_SeqKAIJ;
1362:     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1363:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1364:     A->ops->getrow              = MatGetRow_SeqKAIJ;
1365:     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1366:     A->ops->sor                 = MatSOR_SeqKAIJ;
1367:   } else {
1368:     PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);
1369:     A->ops->setup               = MatSetUp_KAIJ;
1370:     A->ops->destroy             = MatDestroy_MPIKAIJ;
1371:     A->ops->view                = MatView_KAIJ;
1372:     A->ops->mult                = MatMult_MPIKAIJ;
1373:     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1374:     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1375:     A->ops->getrow              = MatGetRow_MPIKAIJ;
1376:     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1377:     PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);
1378:   }
1379:   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1380:   return(0);
1381: }