Actual source code: maij.c

petsc-master 2020-11-25
Report Typos and Errors

  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19: #include <../src/mat/impls/maij/maij.h>
 20: #include <../src/mat/utils/freespace.h>

 22: /*@
 23:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 25:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 27:    Input Parameter:
 28: .  A - the MAIJ matrix

 30:    Output Parameter:
 31: .  B - the AIJ matrix

 33:    Level: advanced

 35:    Notes:
 36:     The reference count on the AIJ matrix is not increased so you should not destroy it.

 38: .seealso: MatCreateMAIJ()
 39: @*/
 40: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 41: {
 43:   PetscBool      ismpimaij,isseqmaij;

 46:   PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 47:   PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 48:   if (ismpimaij) {
 49:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 51:     *B = b->A;
 52:   } else if (isseqmaij) {
 53:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 55:     *B = b->AIJ;
 56:   } else {
 57:     *B = A;
 58:   }
 59:   return(0);
 60: }

 62: /*@
 63:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 65:    Logically Collective

 67:    Input Parameter:
 68: +  A - the MAIJ matrix
 69: -  dof - the block size for the new matrix

 71:    Output Parameter:
 72: .  B - the new MAIJ matrix

 74:    Level: advanced

 76: .seealso: MatCreateMAIJ()
 77: @*/
 78: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 79: {
 81:   Mat            Aij = NULL;

 85:   MatMAIJGetAIJ(A,&Aij);
 86:   MatCreateMAIJ(Aij,dof,B);
 87:   return(0);
 88: }

 90: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 91: {
 93:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 96:   MatDestroy(&b->AIJ);
 97:   PetscFree(A->data);
 98:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
 99:   PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);
100:   return(0);
101: }

103: PetscErrorCode MatSetUp_MAIJ(Mat A)
104: {
106:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107: }

109: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
110: {
112:   Mat            B;

115:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
116:   MatView(B,viewer);
117:   MatDestroy(&B);
118:   return(0);
119: }

121: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
122: {
124:   Mat            B;

127:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
128:   MatView(B,viewer);
129:   MatDestroy(&B);
130:   return(0);
131: }

133: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
134: {
136:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

139:   MatDestroy(&b->AIJ);
140:   MatDestroy(&b->OAIJ);
141:   MatDestroy(&b->A);
142:   VecScatterDestroy(&b->ctx);
143:   VecDestroy(&b->w);
144:   PetscFree(A->data);
145:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
146:   PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);
147:   PetscObjectChangeTypeName((PetscObject)A,NULL);
148:   return(0);
149: }

151: /*MC
152:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
153:   multicomponent problems, interpolating or restricting each component the same way independently.
154:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

156:   Operations provided:
157: + MatMult
158: . MatMultTranspose
159: . MatMultAdd
160: - MatMultTransposeAdd

162:   Level: advanced

164: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
165: M*/

167: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
168: {
170:   Mat_MPIMAIJ    *b;
171:   PetscMPIInt    size;

174:   PetscNewLog(A,&b);
175:   A->data  = (void*)b;

177:   PetscMemzero(A->ops,sizeof(struct _MatOps));

179:   A->ops->setup = MatSetUp_MAIJ;

181:   b->AIJ  = NULL;
182:   b->dof  = 0;
183:   b->OAIJ = NULL;
184:   b->ctx  = NULL;
185:   b->w    = NULL;
186:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
187:   if (size == 1) {
188:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
189:   } else {
190:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
191:   }
192:   A->preallocated  = PETSC_TRUE;
193:   A->assembled     = PETSC_TRUE;
194:   return(0);
195: }

197: /* --------------------------------------------------------------------------------------*/
198: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
199: {
200:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
201:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
202:   const PetscScalar *x,*v;
203:   PetscScalar       *y, sum1, sum2;
204:   PetscErrorCode    ierr;
205:   PetscInt          nonzerorow=0,n,i,jrow,j;
206:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

209:   VecGetArrayRead(xx,&x);
210:   VecGetArray(yy,&y);
211:   idx  = a->j;
212:   v    = a->a;
213:   ii   = a->i;

215:   for (i=0; i<m; i++) {
216:     jrow  = ii[i];
217:     n     = ii[i+1] - jrow;
218:     sum1  = 0.0;
219:     sum2  = 0.0;

221:     nonzerorow += (n>0);
222:     for (j=0; j<n; j++) {
223:       sum1 += v[jrow]*x[2*idx[jrow]];
224:       sum2 += v[jrow]*x[2*idx[jrow]+1];
225:       jrow++;
226:     }
227:     y[2*i]   = sum1;
228:     y[2*i+1] = sum2;
229:   }

231:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
232:   VecRestoreArrayRead(xx,&x);
233:   VecRestoreArray(yy,&y);
234:   return(0);
235: }

237: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
238: {
239:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
240:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
241:   const PetscScalar *x,*v;
242:   PetscScalar       *y,alpha1,alpha2;
243:   PetscErrorCode    ierr;
244:   PetscInt          n,i;
245:   const PetscInt    m = b->AIJ->rmap->n,*idx;

248:   VecSet(yy,0.0);
249:   VecGetArrayRead(xx,&x);
250:   VecGetArray(yy,&y);

252:   for (i=0; i<m; i++) {
253:     idx    = a->j + a->i[i];
254:     v      = a->a + a->i[i];
255:     n      = a->i[i+1] - a->i[i];
256:     alpha1 = x[2*i];
257:     alpha2 = x[2*i+1];
258:     while (n-->0) {
259:       y[2*(*idx)]   += alpha1*(*v);
260:       y[2*(*idx)+1] += alpha2*(*v);
261:       idx++; v++;
262:     }
263:   }
264:   PetscLogFlops(4.0*a->nz);
265:   VecRestoreArrayRead(xx,&x);
266:   VecRestoreArray(yy,&y);
267:   return(0);
268: }

270: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
271: {
272:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
273:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
274:   const PetscScalar *x,*v;
275:   PetscScalar       *y,sum1, sum2;
276:   PetscErrorCode    ierr;
277:   PetscInt          n,i,jrow,j;
278:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

281:   if (yy != zz) {VecCopy(yy,zz);}
282:   VecGetArrayRead(xx,&x);
283:   VecGetArray(zz,&y);
284:   idx  = a->j;
285:   v    = a->a;
286:   ii   = a->i;

288:   for (i=0; i<m; i++) {
289:     jrow = ii[i];
290:     n    = ii[i+1] - jrow;
291:     sum1 = 0.0;
292:     sum2 = 0.0;
293:     for (j=0; j<n; j++) {
294:       sum1 += v[jrow]*x[2*idx[jrow]];
295:       sum2 += v[jrow]*x[2*idx[jrow]+1];
296:       jrow++;
297:     }
298:     y[2*i]   += sum1;
299:     y[2*i+1] += sum2;
300:   }

302:   PetscLogFlops(4.0*a->nz);
303:   VecRestoreArrayRead(xx,&x);
304:   VecRestoreArray(zz,&y);
305:   return(0);
306: }
307: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
308: {
309:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
310:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
311:   const PetscScalar *x,*v;
312:   PetscScalar       *y,alpha1,alpha2;
313:   PetscErrorCode    ierr;
314:   PetscInt          n,i;
315:   const PetscInt    m = b->AIJ->rmap->n,*idx;

318:   if (yy != zz) {VecCopy(yy,zz);}
319:   VecGetArrayRead(xx,&x);
320:   VecGetArray(zz,&y);

322:   for (i=0; i<m; i++) {
323:     idx    = a->j + a->i[i];
324:     v      = a->a + a->i[i];
325:     n      = a->i[i+1] - a->i[i];
326:     alpha1 = x[2*i];
327:     alpha2 = x[2*i+1];
328:     while (n-->0) {
329:       y[2*(*idx)]   += alpha1*(*v);
330:       y[2*(*idx)+1] += alpha2*(*v);
331:       idx++; v++;
332:     }
333:   }
334:   PetscLogFlops(4.0*a->nz);
335:   VecRestoreArrayRead(xx,&x);
336:   VecRestoreArray(zz,&y);
337:   return(0);
338: }
339: /* --------------------------------------------------------------------------------------*/
340: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
341: {
342:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
343:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
344:   const PetscScalar *x,*v;
345:   PetscScalar       *y,sum1, sum2, sum3;
346:   PetscErrorCode    ierr;
347:   PetscInt          nonzerorow=0,n,i,jrow,j;
348:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

351:   VecGetArrayRead(xx,&x);
352:   VecGetArray(yy,&y);
353:   idx  = a->j;
354:   v    = a->a;
355:   ii   = a->i;

357:   for (i=0; i<m; i++) {
358:     jrow  = ii[i];
359:     n     = ii[i+1] - jrow;
360:     sum1  = 0.0;
361:     sum2  = 0.0;
362:     sum3  = 0.0;

364:     nonzerorow += (n>0);
365:     for (j=0; j<n; j++) {
366:       sum1 += v[jrow]*x[3*idx[jrow]];
367:       sum2 += v[jrow]*x[3*idx[jrow]+1];
368:       sum3 += v[jrow]*x[3*idx[jrow]+2];
369:       jrow++;
370:      }
371:     y[3*i]   = sum1;
372:     y[3*i+1] = sum2;
373:     y[3*i+2] = sum3;
374:   }

376:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
377:   VecRestoreArrayRead(xx,&x);
378:   VecRestoreArray(yy,&y);
379:   return(0);
380: }

382: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
383: {
384:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
385:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
386:   const PetscScalar *x,*v;
387:   PetscScalar       *y,alpha1,alpha2,alpha3;
388:   PetscErrorCode    ierr;
389:   PetscInt          n,i;
390:   const PetscInt    m = b->AIJ->rmap->n,*idx;

393:   VecSet(yy,0.0);
394:   VecGetArrayRead(xx,&x);
395:   VecGetArray(yy,&y);

397:   for (i=0; i<m; i++) {
398:     idx    = a->j + a->i[i];
399:     v      = a->a + a->i[i];
400:     n      = a->i[i+1] - a->i[i];
401:     alpha1 = x[3*i];
402:     alpha2 = x[3*i+1];
403:     alpha3 = x[3*i+2];
404:     while (n-->0) {
405:       y[3*(*idx)]   += alpha1*(*v);
406:       y[3*(*idx)+1] += alpha2*(*v);
407:       y[3*(*idx)+2] += alpha3*(*v);
408:       idx++; v++;
409:     }
410:   }
411:   PetscLogFlops(6.0*a->nz);
412:   VecRestoreArrayRead(xx,&x);
413:   VecRestoreArray(yy,&y);
414:   return(0);
415: }

417: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
418: {
419:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
420:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
421:   const PetscScalar *x,*v;
422:   PetscScalar       *y,sum1, sum2, sum3;
423:   PetscErrorCode    ierr;
424:   PetscInt          n,i,jrow,j;
425:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

428:   if (yy != zz) {VecCopy(yy,zz);}
429:   VecGetArrayRead(xx,&x);
430:   VecGetArray(zz,&y);
431:   idx  = a->j;
432:   v    = a->a;
433:   ii   = a->i;

435:   for (i=0; i<m; i++) {
436:     jrow = ii[i];
437:     n    = ii[i+1] - jrow;
438:     sum1 = 0.0;
439:     sum2 = 0.0;
440:     sum3 = 0.0;
441:     for (j=0; j<n; j++) {
442:       sum1 += v[jrow]*x[3*idx[jrow]];
443:       sum2 += v[jrow]*x[3*idx[jrow]+1];
444:       sum3 += v[jrow]*x[3*idx[jrow]+2];
445:       jrow++;
446:     }
447:     y[3*i]   += sum1;
448:     y[3*i+1] += sum2;
449:     y[3*i+2] += sum3;
450:   }

452:   PetscLogFlops(6.0*a->nz);
453:   VecRestoreArrayRead(xx,&x);
454:   VecRestoreArray(zz,&y);
455:   return(0);
456: }
457: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
458: {
459:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
460:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
461:   const PetscScalar *x,*v;
462:   PetscScalar       *y,alpha1,alpha2,alpha3;
463:   PetscErrorCode    ierr;
464:   PetscInt          n,i;
465:   const PetscInt    m = b->AIJ->rmap->n,*idx;

468:   if (yy != zz) {VecCopy(yy,zz);}
469:   VecGetArrayRead(xx,&x);
470:   VecGetArray(zz,&y);
471:   for (i=0; i<m; i++) {
472:     idx    = a->j + a->i[i];
473:     v      = a->a + a->i[i];
474:     n      = a->i[i+1] - a->i[i];
475:     alpha1 = x[3*i];
476:     alpha2 = x[3*i+1];
477:     alpha3 = x[3*i+2];
478:     while (n-->0) {
479:       y[3*(*idx)]   += alpha1*(*v);
480:       y[3*(*idx)+1] += alpha2*(*v);
481:       y[3*(*idx)+2] += alpha3*(*v);
482:       idx++; v++;
483:     }
484:   }
485:   PetscLogFlops(6.0*a->nz);
486:   VecRestoreArrayRead(xx,&x);
487:   VecRestoreArray(zz,&y);
488:   return(0);
489: }

491: /* ------------------------------------------------------------------------------*/
492: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
493: {
494:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
495:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
496:   const PetscScalar *x,*v;
497:   PetscScalar       *y,sum1, sum2, sum3, sum4;
498:   PetscErrorCode    ierr;
499:   PetscInt          nonzerorow=0,n,i,jrow,j;
500:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

503:   VecGetArrayRead(xx,&x);
504:   VecGetArray(yy,&y);
505:   idx  = a->j;
506:   v    = a->a;
507:   ii   = a->i;

509:   for (i=0; i<m; i++) {
510:     jrow        = ii[i];
511:     n           = ii[i+1] - jrow;
512:     sum1        = 0.0;
513:     sum2        = 0.0;
514:     sum3        = 0.0;
515:     sum4        = 0.0;
516:     nonzerorow += (n>0);
517:     for (j=0; j<n; j++) {
518:       sum1 += v[jrow]*x[4*idx[jrow]];
519:       sum2 += v[jrow]*x[4*idx[jrow]+1];
520:       sum3 += v[jrow]*x[4*idx[jrow]+2];
521:       sum4 += v[jrow]*x[4*idx[jrow]+3];
522:       jrow++;
523:     }
524:     y[4*i]   = sum1;
525:     y[4*i+1] = sum2;
526:     y[4*i+2] = sum3;
527:     y[4*i+3] = sum4;
528:   }

530:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
531:   VecRestoreArrayRead(xx,&x);
532:   VecRestoreArray(yy,&y);
533:   return(0);
534: }

536: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
537: {
538:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
539:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
540:   const PetscScalar *x,*v;
541:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
542:   PetscErrorCode    ierr;
543:   PetscInt          n,i;
544:   const PetscInt    m = b->AIJ->rmap->n,*idx;

547:   VecSet(yy,0.0);
548:   VecGetArrayRead(xx,&x);
549:   VecGetArray(yy,&y);
550:   for (i=0; i<m; i++) {
551:     idx    = a->j + a->i[i];
552:     v      = a->a + a->i[i];
553:     n      = a->i[i+1] - a->i[i];
554:     alpha1 = x[4*i];
555:     alpha2 = x[4*i+1];
556:     alpha3 = x[4*i+2];
557:     alpha4 = x[4*i+3];
558:     while (n-->0) {
559:       y[4*(*idx)]   += alpha1*(*v);
560:       y[4*(*idx)+1] += alpha2*(*v);
561:       y[4*(*idx)+2] += alpha3*(*v);
562:       y[4*(*idx)+3] += alpha4*(*v);
563:       idx++; v++;
564:     }
565:   }
566:   PetscLogFlops(8.0*a->nz);
567:   VecRestoreArrayRead(xx,&x);
568:   VecRestoreArray(yy,&y);
569:   return(0);
570: }

572: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
573: {
574:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
575:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
576:   const PetscScalar *x,*v;
577:   PetscScalar       *y,sum1, sum2, sum3, sum4;
578:   PetscErrorCode    ierr;
579:   PetscInt          n,i,jrow,j;
580:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

583:   if (yy != zz) {VecCopy(yy,zz);}
584:   VecGetArrayRead(xx,&x);
585:   VecGetArray(zz,&y);
586:   idx  = a->j;
587:   v    = a->a;
588:   ii   = a->i;

590:   for (i=0; i<m; i++) {
591:     jrow = ii[i];
592:     n    = ii[i+1] - jrow;
593:     sum1 = 0.0;
594:     sum2 = 0.0;
595:     sum3 = 0.0;
596:     sum4 = 0.0;
597:     for (j=0; j<n; j++) {
598:       sum1 += v[jrow]*x[4*idx[jrow]];
599:       sum2 += v[jrow]*x[4*idx[jrow]+1];
600:       sum3 += v[jrow]*x[4*idx[jrow]+2];
601:       sum4 += v[jrow]*x[4*idx[jrow]+3];
602:       jrow++;
603:     }
604:     y[4*i]   += sum1;
605:     y[4*i+1] += sum2;
606:     y[4*i+2] += sum3;
607:     y[4*i+3] += sum4;
608:   }

610:   PetscLogFlops(8.0*a->nz);
611:   VecRestoreArrayRead(xx,&x);
612:   VecRestoreArray(zz,&y);
613:   return(0);
614: }
615: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
616: {
617:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
618:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
619:   const PetscScalar *x,*v;
620:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
621:   PetscErrorCode    ierr;
622:   PetscInt          n,i;
623:   const PetscInt    m = b->AIJ->rmap->n,*idx;

626:   if (yy != zz) {VecCopy(yy,zz);}
627:   VecGetArrayRead(xx,&x);
628:   VecGetArray(zz,&y);

630:   for (i=0; i<m; i++) {
631:     idx    = a->j + a->i[i];
632:     v      = a->a + a->i[i];
633:     n      = a->i[i+1] - a->i[i];
634:     alpha1 = x[4*i];
635:     alpha2 = x[4*i+1];
636:     alpha3 = x[4*i+2];
637:     alpha4 = x[4*i+3];
638:     while (n-->0) {
639:       y[4*(*idx)]   += alpha1*(*v);
640:       y[4*(*idx)+1] += alpha2*(*v);
641:       y[4*(*idx)+2] += alpha3*(*v);
642:       y[4*(*idx)+3] += alpha4*(*v);
643:       idx++; v++;
644:     }
645:   }
646:   PetscLogFlops(8.0*a->nz);
647:   VecRestoreArrayRead(xx,&x);
648:   VecRestoreArray(zz,&y);
649:   return(0);
650: }
651: /* ------------------------------------------------------------------------------*/

653: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
654: {
655:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
656:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
657:   const PetscScalar *x,*v;
658:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
659:   PetscErrorCode    ierr;
660:   PetscInt          nonzerorow=0,n,i,jrow,j;
661:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

664:   VecGetArrayRead(xx,&x);
665:   VecGetArray(yy,&y);
666:   idx  = a->j;
667:   v    = a->a;
668:   ii   = a->i;

670:   for (i=0; i<m; i++) {
671:     jrow  = ii[i];
672:     n     = ii[i+1] - jrow;
673:     sum1  = 0.0;
674:     sum2  = 0.0;
675:     sum3  = 0.0;
676:     sum4  = 0.0;
677:     sum5  = 0.0;

679:     nonzerorow += (n>0);
680:     for (j=0; j<n; j++) {
681:       sum1 += v[jrow]*x[5*idx[jrow]];
682:       sum2 += v[jrow]*x[5*idx[jrow]+1];
683:       sum3 += v[jrow]*x[5*idx[jrow]+2];
684:       sum4 += v[jrow]*x[5*idx[jrow]+3];
685:       sum5 += v[jrow]*x[5*idx[jrow]+4];
686:       jrow++;
687:     }
688:     y[5*i]   = sum1;
689:     y[5*i+1] = sum2;
690:     y[5*i+2] = sum3;
691:     y[5*i+3] = sum4;
692:     y[5*i+4] = sum5;
693:   }

695:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
696:   VecRestoreArrayRead(xx,&x);
697:   VecRestoreArray(yy,&y);
698:   return(0);
699: }

701: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
702: {
703:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
704:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
705:   const PetscScalar *x,*v;
706:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
707:   PetscErrorCode    ierr;
708:   PetscInt          n,i;
709:   const PetscInt    m = b->AIJ->rmap->n,*idx;

712:   VecSet(yy,0.0);
713:   VecGetArrayRead(xx,&x);
714:   VecGetArray(yy,&y);

716:   for (i=0; i<m; i++) {
717:     idx    = a->j + a->i[i];
718:     v      = a->a + a->i[i];
719:     n      = a->i[i+1] - a->i[i];
720:     alpha1 = x[5*i];
721:     alpha2 = x[5*i+1];
722:     alpha3 = x[5*i+2];
723:     alpha4 = x[5*i+3];
724:     alpha5 = x[5*i+4];
725:     while (n-->0) {
726:       y[5*(*idx)]   += alpha1*(*v);
727:       y[5*(*idx)+1] += alpha2*(*v);
728:       y[5*(*idx)+2] += alpha3*(*v);
729:       y[5*(*idx)+3] += alpha4*(*v);
730:       y[5*(*idx)+4] += alpha5*(*v);
731:       idx++; v++;
732:     }
733:   }
734:   PetscLogFlops(10.0*a->nz);
735:   VecRestoreArrayRead(xx,&x);
736:   VecRestoreArray(yy,&y);
737:   return(0);
738: }

740: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
741: {
742:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
743:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
744:   const PetscScalar *x,*v;
745:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
746:   PetscErrorCode    ierr;
747:   PetscInt          n,i,jrow,j;
748:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

751:   if (yy != zz) {VecCopy(yy,zz);}
752:   VecGetArrayRead(xx,&x);
753:   VecGetArray(zz,&y);
754:   idx  = a->j;
755:   v    = a->a;
756:   ii   = a->i;

758:   for (i=0; i<m; i++) {
759:     jrow = ii[i];
760:     n    = ii[i+1] - jrow;
761:     sum1 = 0.0;
762:     sum2 = 0.0;
763:     sum3 = 0.0;
764:     sum4 = 0.0;
765:     sum5 = 0.0;
766:     for (j=0; j<n; j++) {
767:       sum1 += v[jrow]*x[5*idx[jrow]];
768:       sum2 += v[jrow]*x[5*idx[jrow]+1];
769:       sum3 += v[jrow]*x[5*idx[jrow]+2];
770:       sum4 += v[jrow]*x[5*idx[jrow]+3];
771:       sum5 += v[jrow]*x[5*idx[jrow]+4];
772:       jrow++;
773:     }
774:     y[5*i]   += sum1;
775:     y[5*i+1] += sum2;
776:     y[5*i+2] += sum3;
777:     y[5*i+3] += sum4;
778:     y[5*i+4] += sum5;
779:   }

781:   PetscLogFlops(10.0*a->nz);
782:   VecRestoreArrayRead(xx,&x);
783:   VecRestoreArray(zz,&y);
784:   return(0);
785: }

787: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788: {
789:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
790:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
791:   const PetscScalar *x,*v;
792:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
793:   PetscErrorCode    ierr;
794:   PetscInt          n,i;
795:   const PetscInt    m = b->AIJ->rmap->n,*idx;

798:   if (yy != zz) {VecCopy(yy,zz);}
799:   VecGetArrayRead(xx,&x);
800:   VecGetArray(zz,&y);

802:   for (i=0; i<m; i++) {
803:     idx    = a->j + a->i[i];
804:     v      = a->a + a->i[i];
805:     n      = a->i[i+1] - a->i[i];
806:     alpha1 = x[5*i];
807:     alpha2 = x[5*i+1];
808:     alpha3 = x[5*i+2];
809:     alpha4 = x[5*i+3];
810:     alpha5 = x[5*i+4];
811:     while (n-->0) {
812:       y[5*(*idx)]   += alpha1*(*v);
813:       y[5*(*idx)+1] += alpha2*(*v);
814:       y[5*(*idx)+2] += alpha3*(*v);
815:       y[5*(*idx)+3] += alpha4*(*v);
816:       y[5*(*idx)+4] += alpha5*(*v);
817:       idx++; v++;
818:     }
819:   }
820:   PetscLogFlops(10.0*a->nz);
821:   VecRestoreArrayRead(xx,&x);
822:   VecRestoreArray(zz,&y);
823:   return(0);
824: }

826: /* ------------------------------------------------------------------------------*/
827: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
828: {
829:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
830:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
831:   const PetscScalar *x,*v;
832:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
833:   PetscErrorCode    ierr;
834:   PetscInt          nonzerorow=0,n,i,jrow,j;
835:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

838:   VecGetArrayRead(xx,&x);
839:   VecGetArray(yy,&y);
840:   idx  = a->j;
841:   v    = a->a;
842:   ii   = a->i;

844:   for (i=0; i<m; i++) {
845:     jrow  = ii[i];
846:     n     = ii[i+1] - jrow;
847:     sum1  = 0.0;
848:     sum2  = 0.0;
849:     sum3  = 0.0;
850:     sum4  = 0.0;
851:     sum5  = 0.0;
852:     sum6  = 0.0;

854:     nonzerorow += (n>0);
855:     for (j=0; j<n; j++) {
856:       sum1 += v[jrow]*x[6*idx[jrow]];
857:       sum2 += v[jrow]*x[6*idx[jrow]+1];
858:       sum3 += v[jrow]*x[6*idx[jrow]+2];
859:       sum4 += v[jrow]*x[6*idx[jrow]+3];
860:       sum5 += v[jrow]*x[6*idx[jrow]+4];
861:       sum6 += v[jrow]*x[6*idx[jrow]+5];
862:       jrow++;
863:     }
864:     y[6*i]   = sum1;
865:     y[6*i+1] = sum2;
866:     y[6*i+2] = sum3;
867:     y[6*i+3] = sum4;
868:     y[6*i+4] = sum5;
869:     y[6*i+5] = sum6;
870:   }

872:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
873:   VecRestoreArrayRead(xx,&x);
874:   VecRestoreArray(yy,&y);
875:   return(0);
876: }

878: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
879: {
880:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
881:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
882:   const PetscScalar *x,*v;
883:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
884:   PetscErrorCode    ierr;
885:   PetscInt          n,i;
886:   const PetscInt    m = b->AIJ->rmap->n,*idx;

889:   VecSet(yy,0.0);
890:   VecGetArrayRead(xx,&x);
891:   VecGetArray(yy,&y);

893:   for (i=0; i<m; i++) {
894:     idx    = a->j + a->i[i];
895:     v      = a->a + a->i[i];
896:     n      = a->i[i+1] - a->i[i];
897:     alpha1 = x[6*i];
898:     alpha2 = x[6*i+1];
899:     alpha3 = x[6*i+2];
900:     alpha4 = x[6*i+3];
901:     alpha5 = x[6*i+4];
902:     alpha6 = x[6*i+5];
903:     while (n-->0) {
904:       y[6*(*idx)]   += alpha1*(*v);
905:       y[6*(*idx)+1] += alpha2*(*v);
906:       y[6*(*idx)+2] += alpha3*(*v);
907:       y[6*(*idx)+3] += alpha4*(*v);
908:       y[6*(*idx)+4] += alpha5*(*v);
909:       y[6*(*idx)+5] += alpha6*(*v);
910:       idx++; v++;
911:     }
912:   }
913:   PetscLogFlops(12.0*a->nz);
914:   VecRestoreArrayRead(xx,&x);
915:   VecRestoreArray(yy,&y);
916:   return(0);
917: }

919: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
920: {
921:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
922:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
923:   const PetscScalar *x,*v;
924:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
925:   PetscErrorCode    ierr;
926:   PetscInt          n,i,jrow,j;
927:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

930:   if (yy != zz) {VecCopy(yy,zz);}
931:   VecGetArrayRead(xx,&x);
932:   VecGetArray(zz,&y);
933:   idx  = a->j;
934:   v    = a->a;
935:   ii   = a->i;

937:   for (i=0; i<m; i++) {
938:     jrow = ii[i];
939:     n    = ii[i+1] - jrow;
940:     sum1 = 0.0;
941:     sum2 = 0.0;
942:     sum3 = 0.0;
943:     sum4 = 0.0;
944:     sum5 = 0.0;
945:     sum6 = 0.0;
946:     for (j=0; j<n; j++) {
947:       sum1 += v[jrow]*x[6*idx[jrow]];
948:       sum2 += v[jrow]*x[6*idx[jrow]+1];
949:       sum3 += v[jrow]*x[6*idx[jrow]+2];
950:       sum4 += v[jrow]*x[6*idx[jrow]+3];
951:       sum5 += v[jrow]*x[6*idx[jrow]+4];
952:       sum6 += v[jrow]*x[6*idx[jrow]+5];
953:       jrow++;
954:     }
955:     y[6*i]   += sum1;
956:     y[6*i+1] += sum2;
957:     y[6*i+2] += sum3;
958:     y[6*i+3] += sum4;
959:     y[6*i+4] += sum5;
960:     y[6*i+5] += sum6;
961:   }

963:   PetscLogFlops(12.0*a->nz);
964:   VecRestoreArrayRead(xx,&x);
965:   VecRestoreArray(zz,&y);
966:   return(0);
967: }

969: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
970: {
971:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
972:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
973:   const PetscScalar *x,*v;
974:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
975:   PetscErrorCode    ierr;
976:   PetscInt          n,i;
977:   const PetscInt    m = b->AIJ->rmap->n,*idx;

980:   if (yy != zz) {VecCopy(yy,zz);}
981:   VecGetArrayRead(xx,&x);
982:   VecGetArray(zz,&y);

984:   for (i=0; i<m; i++) {
985:     idx    = a->j + a->i[i];
986:     v      = a->a + a->i[i];
987:     n      = a->i[i+1] - a->i[i];
988:     alpha1 = x[6*i];
989:     alpha2 = x[6*i+1];
990:     alpha3 = x[6*i+2];
991:     alpha4 = x[6*i+3];
992:     alpha5 = x[6*i+4];
993:     alpha6 = x[6*i+5];
994:     while (n-->0) {
995:       y[6*(*idx)]   += alpha1*(*v);
996:       y[6*(*idx)+1] += alpha2*(*v);
997:       y[6*(*idx)+2] += alpha3*(*v);
998:       y[6*(*idx)+3] += alpha4*(*v);
999:       y[6*(*idx)+4] += alpha5*(*v);
1000:       y[6*(*idx)+5] += alpha6*(*v);
1001:       idx++; v++;
1002:     }
1003:   }
1004:   PetscLogFlops(12.0*a->nz);
1005:   VecRestoreArrayRead(xx,&x);
1006:   VecRestoreArray(zz,&y);
1007:   return(0);
1008: }

1010: /* ------------------------------------------------------------------------------*/
1011: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1012: {
1013:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1014:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1015:   const PetscScalar *x,*v;
1016:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1017:   PetscErrorCode    ierr;
1018:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1019:   PetscInt          nonzerorow=0,n,i,jrow,j;

1022:   VecGetArrayRead(xx,&x);
1023:   VecGetArray(yy,&y);
1024:   idx  = a->j;
1025:   v    = a->a;
1026:   ii   = a->i;

1028:   for (i=0; i<m; i++) {
1029:     jrow  = ii[i];
1030:     n     = ii[i+1] - jrow;
1031:     sum1  = 0.0;
1032:     sum2  = 0.0;
1033:     sum3  = 0.0;
1034:     sum4  = 0.0;
1035:     sum5  = 0.0;
1036:     sum6  = 0.0;
1037:     sum7  = 0.0;

1039:     nonzerorow += (n>0);
1040:     for (j=0; j<n; j++) {
1041:       sum1 += v[jrow]*x[7*idx[jrow]];
1042:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1043:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1044:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1045:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1046:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1047:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1048:       jrow++;
1049:     }
1050:     y[7*i]   = sum1;
1051:     y[7*i+1] = sum2;
1052:     y[7*i+2] = sum3;
1053:     y[7*i+3] = sum4;
1054:     y[7*i+4] = sum5;
1055:     y[7*i+5] = sum6;
1056:     y[7*i+6] = sum7;
1057:   }

1059:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1060:   VecRestoreArrayRead(xx,&x);
1061:   VecRestoreArray(yy,&y);
1062:   return(0);
1063: }

1065: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1066: {
1067:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1068:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1069:   const PetscScalar *x,*v;
1070:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1071:   PetscErrorCode    ierr;
1072:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1073:   PetscInt          n,i;

1076:   VecSet(yy,0.0);
1077:   VecGetArrayRead(xx,&x);
1078:   VecGetArray(yy,&y);

1080:   for (i=0; i<m; i++) {
1081:     idx    = a->j + a->i[i];
1082:     v      = a->a + a->i[i];
1083:     n      = a->i[i+1] - a->i[i];
1084:     alpha1 = x[7*i];
1085:     alpha2 = x[7*i+1];
1086:     alpha3 = x[7*i+2];
1087:     alpha4 = x[7*i+3];
1088:     alpha5 = x[7*i+4];
1089:     alpha6 = x[7*i+5];
1090:     alpha7 = x[7*i+6];
1091:     while (n-->0) {
1092:       y[7*(*idx)]   += alpha1*(*v);
1093:       y[7*(*idx)+1] += alpha2*(*v);
1094:       y[7*(*idx)+2] += alpha3*(*v);
1095:       y[7*(*idx)+3] += alpha4*(*v);
1096:       y[7*(*idx)+4] += alpha5*(*v);
1097:       y[7*(*idx)+5] += alpha6*(*v);
1098:       y[7*(*idx)+6] += alpha7*(*v);
1099:       idx++; v++;
1100:     }
1101:   }
1102:   PetscLogFlops(14.0*a->nz);
1103:   VecRestoreArrayRead(xx,&x);
1104:   VecRestoreArray(yy,&y);
1105:   return(0);
1106: }

1108: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1109: {
1110:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1111:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1112:   const PetscScalar *x,*v;
1113:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1114:   PetscErrorCode    ierr;
1115:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1116:   PetscInt          n,i,jrow,j;

1119:   if (yy != zz) {VecCopy(yy,zz);}
1120:   VecGetArrayRead(xx,&x);
1121:   VecGetArray(zz,&y);
1122:   idx  = a->j;
1123:   v    = a->a;
1124:   ii   = a->i;

1126:   for (i=0; i<m; i++) {
1127:     jrow = ii[i];
1128:     n    = ii[i+1] - jrow;
1129:     sum1 = 0.0;
1130:     sum2 = 0.0;
1131:     sum3 = 0.0;
1132:     sum4 = 0.0;
1133:     sum5 = 0.0;
1134:     sum6 = 0.0;
1135:     sum7 = 0.0;
1136:     for (j=0; j<n; j++) {
1137:       sum1 += v[jrow]*x[7*idx[jrow]];
1138:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1139:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1140:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1141:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1142:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1143:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1144:       jrow++;
1145:     }
1146:     y[7*i]   += sum1;
1147:     y[7*i+1] += sum2;
1148:     y[7*i+2] += sum3;
1149:     y[7*i+3] += sum4;
1150:     y[7*i+4] += sum5;
1151:     y[7*i+5] += sum6;
1152:     y[7*i+6] += sum7;
1153:   }

1155:   PetscLogFlops(14.0*a->nz);
1156:   VecRestoreArrayRead(xx,&x);
1157:   VecRestoreArray(zz,&y);
1158:   return(0);
1159: }

1161: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1162: {
1163:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1164:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1165:   const PetscScalar *x,*v;
1166:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1167:   PetscErrorCode    ierr;
1168:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1169:   PetscInt          n,i;

1172:   if (yy != zz) {VecCopy(yy,zz);}
1173:   VecGetArrayRead(xx,&x);
1174:   VecGetArray(zz,&y);
1175:   for (i=0; i<m; i++) {
1176:     idx    = a->j + a->i[i];
1177:     v      = a->a + a->i[i];
1178:     n      = a->i[i+1] - a->i[i];
1179:     alpha1 = x[7*i];
1180:     alpha2 = x[7*i+1];
1181:     alpha3 = x[7*i+2];
1182:     alpha4 = x[7*i+3];
1183:     alpha5 = x[7*i+4];
1184:     alpha6 = x[7*i+5];
1185:     alpha7 = x[7*i+6];
1186:     while (n-->0) {
1187:       y[7*(*idx)]   += alpha1*(*v);
1188:       y[7*(*idx)+1] += alpha2*(*v);
1189:       y[7*(*idx)+2] += alpha3*(*v);
1190:       y[7*(*idx)+3] += alpha4*(*v);
1191:       y[7*(*idx)+4] += alpha5*(*v);
1192:       y[7*(*idx)+5] += alpha6*(*v);
1193:       y[7*(*idx)+6] += alpha7*(*v);
1194:       idx++; v++;
1195:     }
1196:   }
1197:   PetscLogFlops(14.0*a->nz);
1198:   VecRestoreArrayRead(xx,&x);
1199:   VecRestoreArray(zz,&y);
1200:   return(0);
1201: }

1203: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1204: {
1205:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1206:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1207:   const PetscScalar *x,*v;
1208:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1209:   PetscErrorCode    ierr;
1210:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1211:   PetscInt          nonzerorow=0,n,i,jrow,j;

1214:   VecGetArrayRead(xx,&x);
1215:   VecGetArray(yy,&y);
1216:   idx  = a->j;
1217:   v    = a->a;
1218:   ii   = a->i;

1220:   for (i=0; i<m; i++) {
1221:     jrow  = ii[i];
1222:     n     = ii[i+1] - jrow;
1223:     sum1  = 0.0;
1224:     sum2  = 0.0;
1225:     sum3  = 0.0;
1226:     sum4  = 0.0;
1227:     sum5  = 0.0;
1228:     sum6  = 0.0;
1229:     sum7  = 0.0;
1230:     sum8  = 0.0;

1232:     nonzerorow += (n>0);
1233:     for (j=0; j<n; j++) {
1234:       sum1 += v[jrow]*x[8*idx[jrow]];
1235:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1236:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1237:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1238:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1239:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1240:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1241:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1242:       jrow++;
1243:     }
1244:     y[8*i]   = sum1;
1245:     y[8*i+1] = sum2;
1246:     y[8*i+2] = sum3;
1247:     y[8*i+3] = sum4;
1248:     y[8*i+4] = sum5;
1249:     y[8*i+5] = sum6;
1250:     y[8*i+6] = sum7;
1251:     y[8*i+7] = sum8;
1252:   }

1254:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1255:   VecRestoreArrayRead(xx,&x);
1256:   VecRestoreArray(yy,&y);
1257:   return(0);
1258: }

1260: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1261: {
1262:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1263:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1264:   const PetscScalar *x,*v;
1265:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1266:   PetscErrorCode    ierr;
1267:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1268:   PetscInt          n,i;

1271:   VecSet(yy,0.0);
1272:   VecGetArrayRead(xx,&x);
1273:   VecGetArray(yy,&y);

1275:   for (i=0; i<m; i++) {
1276:     idx    = a->j + a->i[i];
1277:     v      = a->a + a->i[i];
1278:     n      = a->i[i+1] - a->i[i];
1279:     alpha1 = x[8*i];
1280:     alpha2 = x[8*i+1];
1281:     alpha3 = x[8*i+2];
1282:     alpha4 = x[8*i+3];
1283:     alpha5 = x[8*i+4];
1284:     alpha6 = x[8*i+5];
1285:     alpha7 = x[8*i+6];
1286:     alpha8 = x[8*i+7];
1287:     while (n-->0) {
1288:       y[8*(*idx)]   += alpha1*(*v);
1289:       y[8*(*idx)+1] += alpha2*(*v);
1290:       y[8*(*idx)+2] += alpha3*(*v);
1291:       y[8*(*idx)+3] += alpha4*(*v);
1292:       y[8*(*idx)+4] += alpha5*(*v);
1293:       y[8*(*idx)+5] += alpha6*(*v);
1294:       y[8*(*idx)+6] += alpha7*(*v);
1295:       y[8*(*idx)+7] += alpha8*(*v);
1296:       idx++; v++;
1297:     }
1298:   }
1299:   PetscLogFlops(16.0*a->nz);
1300:   VecRestoreArrayRead(xx,&x);
1301:   VecRestoreArray(yy,&y);
1302:   return(0);
1303: }

1305: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1306: {
1307:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1308:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1309:   const PetscScalar *x,*v;
1310:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1311:   PetscErrorCode    ierr;
1312:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1313:   PetscInt          n,i,jrow,j;

1316:   if (yy != zz) {VecCopy(yy,zz);}
1317:   VecGetArrayRead(xx,&x);
1318:   VecGetArray(zz,&y);
1319:   idx  = a->j;
1320:   v    = a->a;
1321:   ii   = a->i;

1323:   for (i=0; i<m; i++) {
1324:     jrow = ii[i];
1325:     n    = ii[i+1] - jrow;
1326:     sum1 = 0.0;
1327:     sum2 = 0.0;
1328:     sum3 = 0.0;
1329:     sum4 = 0.0;
1330:     sum5 = 0.0;
1331:     sum6 = 0.0;
1332:     sum7 = 0.0;
1333:     sum8 = 0.0;
1334:     for (j=0; j<n; j++) {
1335:       sum1 += v[jrow]*x[8*idx[jrow]];
1336:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1337:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1338:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1339:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1340:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1341:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1342:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1343:       jrow++;
1344:     }
1345:     y[8*i]   += sum1;
1346:     y[8*i+1] += sum2;
1347:     y[8*i+2] += sum3;
1348:     y[8*i+3] += sum4;
1349:     y[8*i+4] += sum5;
1350:     y[8*i+5] += sum6;
1351:     y[8*i+6] += sum7;
1352:     y[8*i+7] += sum8;
1353:   }

1355:   PetscLogFlops(16.0*a->nz);
1356:   VecRestoreArrayRead(xx,&x);
1357:   VecRestoreArray(zz,&y);
1358:   return(0);
1359: }

1361: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1362: {
1363:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1364:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1365:   const PetscScalar *x,*v;
1366:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1367:   PetscErrorCode    ierr;
1368:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1369:   PetscInt          n,i;

1372:   if (yy != zz) {VecCopy(yy,zz);}
1373:   VecGetArrayRead(xx,&x);
1374:   VecGetArray(zz,&y);
1375:   for (i=0; i<m; i++) {
1376:     idx    = a->j + a->i[i];
1377:     v      = a->a + a->i[i];
1378:     n      = a->i[i+1] - a->i[i];
1379:     alpha1 = x[8*i];
1380:     alpha2 = x[8*i+1];
1381:     alpha3 = x[8*i+2];
1382:     alpha4 = x[8*i+3];
1383:     alpha5 = x[8*i+4];
1384:     alpha6 = x[8*i+5];
1385:     alpha7 = x[8*i+6];
1386:     alpha8 = x[8*i+7];
1387:     while (n-->0) {
1388:       y[8*(*idx)]   += alpha1*(*v);
1389:       y[8*(*idx)+1] += alpha2*(*v);
1390:       y[8*(*idx)+2] += alpha3*(*v);
1391:       y[8*(*idx)+3] += alpha4*(*v);
1392:       y[8*(*idx)+4] += alpha5*(*v);
1393:       y[8*(*idx)+5] += alpha6*(*v);
1394:       y[8*(*idx)+6] += alpha7*(*v);
1395:       y[8*(*idx)+7] += alpha8*(*v);
1396:       idx++; v++;
1397:     }
1398:   }
1399:   PetscLogFlops(16.0*a->nz);
1400:   VecRestoreArrayRead(xx,&x);
1401:   VecRestoreArray(zz,&y);
1402:   return(0);
1403: }

1405: /* ------------------------------------------------------------------------------*/
1406: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1407: {
1408:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1409:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1410:   const PetscScalar *x,*v;
1411:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1412:   PetscErrorCode    ierr;
1413:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1414:   PetscInt          nonzerorow=0,n,i,jrow,j;

1417:   VecGetArrayRead(xx,&x);
1418:   VecGetArray(yy,&y);
1419:   idx  = a->j;
1420:   v    = a->a;
1421:   ii   = a->i;

1423:   for (i=0; i<m; i++) {
1424:     jrow  = ii[i];
1425:     n     = ii[i+1] - jrow;
1426:     sum1  = 0.0;
1427:     sum2  = 0.0;
1428:     sum3  = 0.0;
1429:     sum4  = 0.0;
1430:     sum5  = 0.0;
1431:     sum6  = 0.0;
1432:     sum7  = 0.0;
1433:     sum8  = 0.0;
1434:     sum9  = 0.0;

1436:     nonzerorow += (n>0);
1437:     for (j=0; j<n; j++) {
1438:       sum1 += v[jrow]*x[9*idx[jrow]];
1439:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1440:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1441:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1442:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1443:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1444:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1445:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1446:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1447:       jrow++;
1448:     }
1449:     y[9*i]   = sum1;
1450:     y[9*i+1] = sum2;
1451:     y[9*i+2] = sum3;
1452:     y[9*i+3] = sum4;
1453:     y[9*i+4] = sum5;
1454:     y[9*i+5] = sum6;
1455:     y[9*i+6] = sum7;
1456:     y[9*i+7] = sum8;
1457:     y[9*i+8] = sum9;
1458:   }

1460:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1461:   VecRestoreArrayRead(xx,&x);
1462:   VecRestoreArray(yy,&y);
1463:   return(0);
1464: }

1466: /* ------------------------------------------------------------------------------*/

1468: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1469: {
1470:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1471:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1472:   const PetscScalar *x,*v;
1473:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1474:   PetscErrorCode    ierr;
1475:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1476:   PetscInt          n,i;

1479:   VecSet(yy,0.0);
1480:   VecGetArrayRead(xx,&x);
1481:   VecGetArray(yy,&y);

1483:   for (i=0; i<m; i++) {
1484:     idx    = a->j + a->i[i];
1485:     v      = a->a + a->i[i];
1486:     n      = a->i[i+1] - a->i[i];
1487:     alpha1 = x[9*i];
1488:     alpha2 = x[9*i+1];
1489:     alpha3 = x[9*i+2];
1490:     alpha4 = x[9*i+3];
1491:     alpha5 = x[9*i+4];
1492:     alpha6 = x[9*i+5];
1493:     alpha7 = x[9*i+6];
1494:     alpha8 = x[9*i+7];
1495:     alpha9 = x[9*i+8];
1496:     while (n-->0) {
1497:       y[9*(*idx)]   += alpha1*(*v);
1498:       y[9*(*idx)+1] += alpha2*(*v);
1499:       y[9*(*idx)+2] += alpha3*(*v);
1500:       y[9*(*idx)+3] += alpha4*(*v);
1501:       y[9*(*idx)+4] += alpha5*(*v);
1502:       y[9*(*idx)+5] += alpha6*(*v);
1503:       y[9*(*idx)+6] += alpha7*(*v);
1504:       y[9*(*idx)+7] += alpha8*(*v);
1505:       y[9*(*idx)+8] += alpha9*(*v);
1506:       idx++; v++;
1507:     }
1508:   }
1509:   PetscLogFlops(18.0*a->nz);
1510:   VecRestoreArrayRead(xx,&x);
1511:   VecRestoreArray(yy,&y);
1512:   return(0);
1513: }

1515: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1516: {
1517:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1518:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1519:   const PetscScalar *x,*v;
1520:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1521:   PetscErrorCode    ierr;
1522:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1523:   PetscInt          n,i,jrow,j;

1526:   if (yy != zz) {VecCopy(yy,zz);}
1527:   VecGetArrayRead(xx,&x);
1528:   VecGetArray(zz,&y);
1529:   idx  = a->j;
1530:   v    = a->a;
1531:   ii   = a->i;

1533:   for (i=0; i<m; i++) {
1534:     jrow = ii[i];
1535:     n    = ii[i+1] - jrow;
1536:     sum1 = 0.0;
1537:     sum2 = 0.0;
1538:     sum3 = 0.0;
1539:     sum4 = 0.0;
1540:     sum5 = 0.0;
1541:     sum6 = 0.0;
1542:     sum7 = 0.0;
1543:     sum8 = 0.0;
1544:     sum9 = 0.0;
1545:     for (j=0; j<n; j++) {
1546:       sum1 += v[jrow]*x[9*idx[jrow]];
1547:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1548:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1549:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1550:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1551:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1552:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1553:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1554:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1555:       jrow++;
1556:     }
1557:     y[9*i]   += sum1;
1558:     y[9*i+1] += sum2;
1559:     y[9*i+2] += sum3;
1560:     y[9*i+3] += sum4;
1561:     y[9*i+4] += sum5;
1562:     y[9*i+5] += sum6;
1563:     y[9*i+6] += sum7;
1564:     y[9*i+7] += sum8;
1565:     y[9*i+8] += sum9;
1566:   }

1568:   PetscLogFlops(18.0*a->nz);
1569:   VecRestoreArrayRead(xx,&x);
1570:   VecRestoreArray(zz,&y);
1571:   return(0);
1572: }

1574: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1575: {
1576:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1577:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1578:   const PetscScalar *x,*v;
1579:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1580:   PetscErrorCode    ierr;
1581:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1582:   PetscInt          n,i;

1585:   if (yy != zz) {VecCopy(yy,zz);}
1586:   VecGetArrayRead(xx,&x);
1587:   VecGetArray(zz,&y);
1588:   for (i=0; i<m; i++) {
1589:     idx    = a->j + a->i[i];
1590:     v      = a->a + a->i[i];
1591:     n      = a->i[i+1] - a->i[i];
1592:     alpha1 = x[9*i];
1593:     alpha2 = x[9*i+1];
1594:     alpha3 = x[9*i+2];
1595:     alpha4 = x[9*i+3];
1596:     alpha5 = x[9*i+4];
1597:     alpha6 = x[9*i+5];
1598:     alpha7 = x[9*i+6];
1599:     alpha8 = x[9*i+7];
1600:     alpha9 = x[9*i+8];
1601:     while (n-->0) {
1602:       y[9*(*idx)]   += alpha1*(*v);
1603:       y[9*(*idx)+1] += alpha2*(*v);
1604:       y[9*(*idx)+2] += alpha3*(*v);
1605:       y[9*(*idx)+3] += alpha4*(*v);
1606:       y[9*(*idx)+4] += alpha5*(*v);
1607:       y[9*(*idx)+5] += alpha6*(*v);
1608:       y[9*(*idx)+6] += alpha7*(*v);
1609:       y[9*(*idx)+7] += alpha8*(*v);
1610:       y[9*(*idx)+8] += alpha9*(*v);
1611:       idx++; v++;
1612:     }
1613:   }
1614:   PetscLogFlops(18.0*a->nz);
1615:   VecRestoreArrayRead(xx,&x);
1616:   VecRestoreArray(zz,&y);
1617:   return(0);
1618: }
1619: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1620: {
1621:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1622:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1623:   const PetscScalar *x,*v;
1624:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1625:   PetscErrorCode    ierr;
1626:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1627:   PetscInt          nonzerorow=0,n,i,jrow,j;

1630:   VecGetArrayRead(xx,&x);
1631:   VecGetArray(yy,&y);
1632:   idx  = a->j;
1633:   v    = a->a;
1634:   ii   = a->i;

1636:   for (i=0; i<m; i++) {
1637:     jrow  = ii[i];
1638:     n     = ii[i+1] - jrow;
1639:     sum1  = 0.0;
1640:     sum2  = 0.0;
1641:     sum3  = 0.0;
1642:     sum4  = 0.0;
1643:     sum5  = 0.0;
1644:     sum6  = 0.0;
1645:     sum7  = 0.0;
1646:     sum8  = 0.0;
1647:     sum9  = 0.0;
1648:     sum10 = 0.0;

1650:     nonzerorow += (n>0);
1651:     for (j=0; j<n; j++) {
1652:       sum1  += v[jrow]*x[10*idx[jrow]];
1653:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1654:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1655:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1656:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1657:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1658:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1659:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1660:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1661:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1662:       jrow++;
1663:     }
1664:     y[10*i]   = sum1;
1665:     y[10*i+1] = sum2;
1666:     y[10*i+2] = sum3;
1667:     y[10*i+3] = sum4;
1668:     y[10*i+4] = sum5;
1669:     y[10*i+5] = sum6;
1670:     y[10*i+6] = sum7;
1671:     y[10*i+7] = sum8;
1672:     y[10*i+8] = sum9;
1673:     y[10*i+9] = sum10;
1674:   }

1676:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1677:   VecRestoreArrayRead(xx,&x);
1678:   VecRestoreArray(yy,&y);
1679:   return(0);
1680: }

1682: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1683: {
1684:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1685:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1686:   const PetscScalar *x,*v;
1687:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1688:   PetscErrorCode    ierr;
1689:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1690:   PetscInt          n,i,jrow,j;

1693:   if (yy != zz) {VecCopy(yy,zz);}
1694:   VecGetArrayRead(xx,&x);
1695:   VecGetArray(zz,&y);
1696:   idx  = a->j;
1697:   v    = a->a;
1698:   ii   = a->i;

1700:   for (i=0; i<m; i++) {
1701:     jrow  = ii[i];
1702:     n     = ii[i+1] - jrow;
1703:     sum1  = 0.0;
1704:     sum2  = 0.0;
1705:     sum3  = 0.0;
1706:     sum4  = 0.0;
1707:     sum5  = 0.0;
1708:     sum6  = 0.0;
1709:     sum7  = 0.0;
1710:     sum8  = 0.0;
1711:     sum9  = 0.0;
1712:     sum10 = 0.0;
1713:     for (j=0; j<n; j++) {
1714:       sum1  += v[jrow]*x[10*idx[jrow]];
1715:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1716:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1717:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1718:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1719:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1720:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1721:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1722:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1723:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1724:       jrow++;
1725:     }
1726:     y[10*i]   += sum1;
1727:     y[10*i+1] += sum2;
1728:     y[10*i+2] += sum3;
1729:     y[10*i+3] += sum4;
1730:     y[10*i+4] += sum5;
1731:     y[10*i+5] += sum6;
1732:     y[10*i+6] += sum7;
1733:     y[10*i+7] += sum8;
1734:     y[10*i+8] += sum9;
1735:     y[10*i+9] += sum10;
1736:   }

1738:   PetscLogFlops(20.0*a->nz);
1739:   VecRestoreArrayRead(xx,&x);
1740:   VecRestoreArray(yy,&y);
1741:   return(0);
1742: }

1744: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1745: {
1746:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1747:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1748:   const PetscScalar *x,*v;
1749:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1750:   PetscErrorCode    ierr;
1751:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1752:   PetscInt          n,i;

1755:   VecSet(yy,0.0);
1756:   VecGetArrayRead(xx,&x);
1757:   VecGetArray(yy,&y);

1759:   for (i=0; i<m; i++) {
1760:     idx     = a->j + a->i[i];
1761:     v       = a->a + a->i[i];
1762:     n       = a->i[i+1] - a->i[i];
1763:     alpha1  = x[10*i];
1764:     alpha2  = x[10*i+1];
1765:     alpha3  = x[10*i+2];
1766:     alpha4  = x[10*i+3];
1767:     alpha5  = x[10*i+4];
1768:     alpha6  = x[10*i+5];
1769:     alpha7  = x[10*i+6];
1770:     alpha8  = x[10*i+7];
1771:     alpha9  = x[10*i+8];
1772:     alpha10 = x[10*i+9];
1773:     while (n-->0) {
1774:       y[10*(*idx)]   += alpha1*(*v);
1775:       y[10*(*idx)+1] += alpha2*(*v);
1776:       y[10*(*idx)+2] += alpha3*(*v);
1777:       y[10*(*idx)+3] += alpha4*(*v);
1778:       y[10*(*idx)+4] += alpha5*(*v);
1779:       y[10*(*idx)+5] += alpha6*(*v);
1780:       y[10*(*idx)+6] += alpha7*(*v);
1781:       y[10*(*idx)+7] += alpha8*(*v);
1782:       y[10*(*idx)+8] += alpha9*(*v);
1783:       y[10*(*idx)+9] += alpha10*(*v);
1784:       idx++; v++;
1785:     }
1786:   }
1787:   PetscLogFlops(20.0*a->nz);
1788:   VecRestoreArrayRead(xx,&x);
1789:   VecRestoreArray(yy,&y);
1790:   return(0);
1791: }

1793: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1794: {
1795:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1796:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1797:   const PetscScalar *x,*v;
1798:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1799:   PetscErrorCode    ierr;
1800:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1801:   PetscInt          n,i;

1804:   if (yy != zz) {VecCopy(yy,zz);}
1805:   VecGetArrayRead(xx,&x);
1806:   VecGetArray(zz,&y);
1807:   for (i=0; i<m; i++) {
1808:     idx     = a->j + a->i[i];
1809:     v       = a->a + a->i[i];
1810:     n       = a->i[i+1] - a->i[i];
1811:     alpha1  = x[10*i];
1812:     alpha2  = x[10*i+1];
1813:     alpha3  = x[10*i+2];
1814:     alpha4  = x[10*i+3];
1815:     alpha5  = x[10*i+4];
1816:     alpha6  = x[10*i+5];
1817:     alpha7  = x[10*i+6];
1818:     alpha8  = x[10*i+7];
1819:     alpha9  = x[10*i+8];
1820:     alpha10 = x[10*i+9];
1821:     while (n-->0) {
1822:       y[10*(*idx)]   += alpha1*(*v);
1823:       y[10*(*idx)+1] += alpha2*(*v);
1824:       y[10*(*idx)+2] += alpha3*(*v);
1825:       y[10*(*idx)+3] += alpha4*(*v);
1826:       y[10*(*idx)+4] += alpha5*(*v);
1827:       y[10*(*idx)+5] += alpha6*(*v);
1828:       y[10*(*idx)+6] += alpha7*(*v);
1829:       y[10*(*idx)+7] += alpha8*(*v);
1830:       y[10*(*idx)+8] += alpha9*(*v);
1831:       y[10*(*idx)+9] += alpha10*(*v);
1832:       idx++; v++;
1833:     }
1834:   }
1835:   PetscLogFlops(20.0*a->nz);
1836:   VecRestoreArrayRead(xx,&x);
1837:   VecRestoreArray(zz,&y);
1838:   return(0);
1839: }


1842: /*--------------------------------------------------------------------------------------------*/
1843: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1844: {
1845:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1846:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1847:   const PetscScalar *x,*v;
1848:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1849:   PetscErrorCode    ierr;
1850:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1851:   PetscInt          nonzerorow=0,n,i,jrow,j;

1854:   VecGetArrayRead(xx,&x);
1855:   VecGetArray(yy,&y);
1856:   idx  = a->j;
1857:   v    = a->a;
1858:   ii   = a->i;

1860:   for (i=0; i<m; i++) {
1861:     jrow  = ii[i];
1862:     n     = ii[i+1] - jrow;
1863:     sum1  = 0.0;
1864:     sum2  = 0.0;
1865:     sum3  = 0.0;
1866:     sum4  = 0.0;
1867:     sum5  = 0.0;
1868:     sum6  = 0.0;
1869:     sum7  = 0.0;
1870:     sum8  = 0.0;
1871:     sum9  = 0.0;
1872:     sum10 = 0.0;
1873:     sum11 = 0.0;

1875:     nonzerorow += (n>0);
1876:     for (j=0; j<n; j++) {
1877:       sum1  += v[jrow]*x[11*idx[jrow]];
1878:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1879:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1880:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1881:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1882:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1883:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1884:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1885:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1886:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1887:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1888:       jrow++;
1889:     }
1890:     y[11*i]    = sum1;
1891:     y[11*i+1]  = sum2;
1892:     y[11*i+2]  = sum3;
1893:     y[11*i+3]  = sum4;
1894:     y[11*i+4]  = sum5;
1895:     y[11*i+5]  = sum6;
1896:     y[11*i+6]  = sum7;
1897:     y[11*i+7]  = sum8;
1898:     y[11*i+8]  = sum9;
1899:     y[11*i+9]  = sum10;
1900:     y[11*i+10] = sum11;
1901:   }

1903:   PetscLogFlops(22.0*a->nz - 11*nonzerorow);
1904:   VecRestoreArrayRead(xx,&x);
1905:   VecRestoreArray(yy,&y);
1906:   return(0);
1907: }

1909: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1910: {
1911:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1912:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1913:   const PetscScalar *x,*v;
1914:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1915:   PetscErrorCode    ierr;
1916:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1917:   PetscInt          n,i,jrow,j;

1920:   if (yy != zz) {VecCopy(yy,zz);}
1921:   VecGetArrayRead(xx,&x);
1922:   VecGetArray(zz,&y);
1923:   idx  = a->j;
1924:   v    = a->a;
1925:   ii   = a->i;

1927:   for (i=0; i<m; i++) {
1928:     jrow  = ii[i];
1929:     n     = ii[i+1] - jrow;
1930:     sum1  = 0.0;
1931:     sum2  = 0.0;
1932:     sum3  = 0.0;
1933:     sum4  = 0.0;
1934:     sum5  = 0.0;
1935:     sum6  = 0.0;
1936:     sum7  = 0.0;
1937:     sum8  = 0.0;
1938:     sum9  = 0.0;
1939:     sum10 = 0.0;
1940:     sum11 = 0.0;
1941:     for (j=0; j<n; j++) {
1942:       sum1  += v[jrow]*x[11*idx[jrow]];
1943:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1944:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1945:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1946:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1947:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1948:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1949:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1950:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1951:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1952:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1953:       jrow++;
1954:     }
1955:     y[11*i]    += sum1;
1956:     y[11*i+1]  += sum2;
1957:     y[11*i+2]  += sum3;
1958:     y[11*i+3]  += sum4;
1959:     y[11*i+4]  += sum5;
1960:     y[11*i+5]  += sum6;
1961:     y[11*i+6]  += sum7;
1962:     y[11*i+7]  += sum8;
1963:     y[11*i+8]  += sum9;
1964:     y[11*i+9]  += sum10;
1965:     y[11*i+10] += sum11;
1966:   }

1968:   PetscLogFlops(22.0*a->nz);
1969:   VecRestoreArrayRead(xx,&x);
1970:   VecRestoreArray(yy,&y);
1971:   return(0);
1972: }

1974: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1975: {
1976:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1977:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1978:   const PetscScalar *x,*v;
1979:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1980:   PetscErrorCode    ierr;
1981:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1982:   PetscInt          n,i;

1985:   VecSet(yy,0.0);
1986:   VecGetArrayRead(xx,&x);
1987:   VecGetArray(yy,&y);

1989:   for (i=0; i<m; i++) {
1990:     idx     = a->j + a->i[i];
1991:     v       = a->a + a->i[i];
1992:     n       = a->i[i+1] - a->i[i];
1993:     alpha1  = x[11*i];
1994:     alpha2  = x[11*i+1];
1995:     alpha3  = x[11*i+2];
1996:     alpha4  = x[11*i+3];
1997:     alpha5  = x[11*i+4];
1998:     alpha6  = x[11*i+5];
1999:     alpha7  = x[11*i+6];
2000:     alpha8  = x[11*i+7];
2001:     alpha9  = x[11*i+8];
2002:     alpha10 = x[11*i+9];
2003:     alpha11 = x[11*i+10];
2004:     while (n-->0) {
2005:       y[11*(*idx)]    += alpha1*(*v);
2006:       y[11*(*idx)+1]  += alpha2*(*v);
2007:       y[11*(*idx)+2]  += alpha3*(*v);
2008:       y[11*(*idx)+3]  += alpha4*(*v);
2009:       y[11*(*idx)+4]  += alpha5*(*v);
2010:       y[11*(*idx)+5]  += alpha6*(*v);
2011:       y[11*(*idx)+6]  += alpha7*(*v);
2012:       y[11*(*idx)+7]  += alpha8*(*v);
2013:       y[11*(*idx)+8]  += alpha9*(*v);
2014:       y[11*(*idx)+9]  += alpha10*(*v);
2015:       y[11*(*idx)+10] += alpha11*(*v);
2016:       idx++; v++;
2017:     }
2018:   }
2019:   PetscLogFlops(22.0*a->nz);
2020:   VecRestoreArrayRead(xx,&x);
2021:   VecRestoreArray(yy,&y);
2022:   return(0);
2023: }

2025: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2026: {
2027:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2028:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2029:   const PetscScalar *x,*v;
2030:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2031:   PetscErrorCode    ierr;
2032:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2033:   PetscInt          n,i;

2036:   if (yy != zz) {VecCopy(yy,zz);}
2037:   VecGetArrayRead(xx,&x);
2038:   VecGetArray(zz,&y);
2039:   for (i=0; i<m; i++) {
2040:     idx     = a->j + a->i[i];
2041:     v       = a->a + a->i[i];
2042:     n       = a->i[i+1] - a->i[i];
2043:     alpha1  = x[11*i];
2044:     alpha2  = x[11*i+1];
2045:     alpha3  = x[11*i+2];
2046:     alpha4  = x[11*i+3];
2047:     alpha5  = x[11*i+4];
2048:     alpha6  = x[11*i+5];
2049:     alpha7  = x[11*i+6];
2050:     alpha8  = x[11*i+7];
2051:     alpha9  = x[11*i+8];
2052:     alpha10 = x[11*i+9];
2053:     alpha11 = x[11*i+10];
2054:     while (n-->0) {
2055:       y[11*(*idx)]    += alpha1*(*v);
2056:       y[11*(*idx)+1]  += alpha2*(*v);
2057:       y[11*(*idx)+2]  += alpha3*(*v);
2058:       y[11*(*idx)+3]  += alpha4*(*v);
2059:       y[11*(*idx)+4]  += alpha5*(*v);
2060:       y[11*(*idx)+5]  += alpha6*(*v);
2061:       y[11*(*idx)+6]  += alpha7*(*v);
2062:       y[11*(*idx)+7]  += alpha8*(*v);
2063:       y[11*(*idx)+8]  += alpha9*(*v);
2064:       y[11*(*idx)+9]  += alpha10*(*v);
2065:       y[11*(*idx)+10] += alpha11*(*v);
2066:       idx++; v++;
2067:     }
2068:   }
2069:   PetscLogFlops(22.0*a->nz);
2070:   VecRestoreArrayRead(xx,&x);
2071:   VecRestoreArray(zz,&y);
2072:   return(0);
2073: }


2076: /*--------------------------------------------------------------------------------------------*/
2077: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2078: {
2079:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2080:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2081:   const PetscScalar *x,*v;
2082:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2083:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2084:   PetscErrorCode    ierr;
2085:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2086:   PetscInt          nonzerorow=0,n,i,jrow,j;

2089:   VecGetArrayRead(xx,&x);
2090:   VecGetArray(yy,&y);
2091:   idx  = a->j;
2092:   v    = a->a;
2093:   ii   = a->i;

2095:   for (i=0; i<m; i++) {
2096:     jrow  = ii[i];
2097:     n     = ii[i+1] - jrow;
2098:     sum1  = 0.0;
2099:     sum2  = 0.0;
2100:     sum3  = 0.0;
2101:     sum4  = 0.0;
2102:     sum5  = 0.0;
2103:     sum6  = 0.0;
2104:     sum7  = 0.0;
2105:     sum8  = 0.0;
2106:     sum9  = 0.0;
2107:     sum10 = 0.0;
2108:     sum11 = 0.0;
2109:     sum12 = 0.0;
2110:     sum13 = 0.0;
2111:     sum14 = 0.0;
2112:     sum15 = 0.0;
2113:     sum16 = 0.0;

2115:     nonzerorow += (n>0);
2116:     for (j=0; j<n; j++) {
2117:       sum1  += v[jrow]*x[16*idx[jrow]];
2118:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2119:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2120:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2121:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2122:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2123:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2124:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2125:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2126:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2127:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2128:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2129:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2130:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2131:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2132:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2133:       jrow++;
2134:     }
2135:     y[16*i]    = sum1;
2136:     y[16*i+1]  = sum2;
2137:     y[16*i+2]  = sum3;
2138:     y[16*i+3]  = sum4;
2139:     y[16*i+4]  = sum5;
2140:     y[16*i+5]  = sum6;
2141:     y[16*i+6]  = sum7;
2142:     y[16*i+7]  = sum8;
2143:     y[16*i+8]  = sum9;
2144:     y[16*i+9]  = sum10;
2145:     y[16*i+10] = sum11;
2146:     y[16*i+11] = sum12;
2147:     y[16*i+12] = sum13;
2148:     y[16*i+13] = sum14;
2149:     y[16*i+14] = sum15;
2150:     y[16*i+15] = sum16;
2151:   }

2153:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2154:   VecRestoreArrayRead(xx,&x);
2155:   VecRestoreArray(yy,&y);
2156:   return(0);
2157: }

2159: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2160: {
2161:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2162:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2163:   const PetscScalar *x,*v;
2164:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2165:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2166:   PetscErrorCode    ierr;
2167:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2168:   PetscInt          n,i;

2171:   VecSet(yy,0.0);
2172:   VecGetArrayRead(xx,&x);
2173:   VecGetArray(yy,&y);

2175:   for (i=0; i<m; i++) {
2176:     idx     = a->j + a->i[i];
2177:     v       = a->a + a->i[i];
2178:     n       = a->i[i+1] - a->i[i];
2179:     alpha1  = x[16*i];
2180:     alpha2  = x[16*i+1];
2181:     alpha3  = x[16*i+2];
2182:     alpha4  = x[16*i+3];
2183:     alpha5  = x[16*i+4];
2184:     alpha6  = x[16*i+5];
2185:     alpha7  = x[16*i+6];
2186:     alpha8  = x[16*i+7];
2187:     alpha9  = x[16*i+8];
2188:     alpha10 = x[16*i+9];
2189:     alpha11 = x[16*i+10];
2190:     alpha12 = x[16*i+11];
2191:     alpha13 = x[16*i+12];
2192:     alpha14 = x[16*i+13];
2193:     alpha15 = x[16*i+14];
2194:     alpha16 = x[16*i+15];
2195:     while (n-->0) {
2196:       y[16*(*idx)]    += alpha1*(*v);
2197:       y[16*(*idx)+1]  += alpha2*(*v);
2198:       y[16*(*idx)+2]  += alpha3*(*v);
2199:       y[16*(*idx)+3]  += alpha4*(*v);
2200:       y[16*(*idx)+4]  += alpha5*(*v);
2201:       y[16*(*idx)+5]  += alpha6*(*v);
2202:       y[16*(*idx)+6]  += alpha7*(*v);
2203:       y[16*(*idx)+7]  += alpha8*(*v);
2204:       y[16*(*idx)+8]  += alpha9*(*v);
2205:       y[16*(*idx)+9]  += alpha10*(*v);
2206:       y[16*(*idx)+10] += alpha11*(*v);
2207:       y[16*(*idx)+11] += alpha12*(*v);
2208:       y[16*(*idx)+12] += alpha13*(*v);
2209:       y[16*(*idx)+13] += alpha14*(*v);
2210:       y[16*(*idx)+14] += alpha15*(*v);
2211:       y[16*(*idx)+15] += alpha16*(*v);
2212:       idx++; v++;
2213:     }
2214:   }
2215:   PetscLogFlops(32.0*a->nz);
2216:   VecRestoreArrayRead(xx,&x);
2217:   VecRestoreArray(yy,&y);
2218:   return(0);
2219: }

2221: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2222: {
2223:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2224:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2225:   const PetscScalar *x,*v;
2226:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2227:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2228:   PetscErrorCode    ierr;
2229:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2230:   PetscInt          n,i,jrow,j;

2233:   if (yy != zz) {VecCopy(yy,zz);}
2234:   VecGetArrayRead(xx,&x);
2235:   VecGetArray(zz,&y);
2236:   idx  = a->j;
2237:   v    = a->a;
2238:   ii   = a->i;

2240:   for (i=0; i<m; i++) {
2241:     jrow  = ii[i];
2242:     n     = ii[i+1] - jrow;
2243:     sum1  = 0.0;
2244:     sum2  = 0.0;
2245:     sum3  = 0.0;
2246:     sum4  = 0.0;
2247:     sum5  = 0.0;
2248:     sum6  = 0.0;
2249:     sum7  = 0.0;
2250:     sum8  = 0.0;
2251:     sum9  = 0.0;
2252:     sum10 = 0.0;
2253:     sum11 = 0.0;
2254:     sum12 = 0.0;
2255:     sum13 = 0.0;
2256:     sum14 = 0.0;
2257:     sum15 = 0.0;
2258:     sum16 = 0.0;
2259:     for (j=0; j<n; j++) {
2260:       sum1  += v[jrow]*x[16*idx[jrow]];
2261:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2262:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2263:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2264:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2265:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2266:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2267:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2268:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2269:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2270:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2271:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2272:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2273:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2274:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2275:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2276:       jrow++;
2277:     }
2278:     y[16*i]    += sum1;
2279:     y[16*i+1]  += sum2;
2280:     y[16*i+2]  += sum3;
2281:     y[16*i+3]  += sum4;
2282:     y[16*i+4]  += sum5;
2283:     y[16*i+5]  += sum6;
2284:     y[16*i+6]  += sum7;
2285:     y[16*i+7]  += sum8;
2286:     y[16*i+8]  += sum9;
2287:     y[16*i+9]  += sum10;
2288:     y[16*i+10] += sum11;
2289:     y[16*i+11] += sum12;
2290:     y[16*i+12] += sum13;
2291:     y[16*i+13] += sum14;
2292:     y[16*i+14] += sum15;
2293:     y[16*i+15] += sum16;
2294:   }

2296:   PetscLogFlops(32.0*a->nz);
2297:   VecRestoreArrayRead(xx,&x);
2298:   VecRestoreArray(zz,&y);
2299:   return(0);
2300: }

2302: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2303: {
2304:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2305:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2306:   const PetscScalar *x,*v;
2307:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2308:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2309:   PetscErrorCode    ierr;
2310:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2311:   PetscInt          n,i;

2314:   if (yy != zz) {VecCopy(yy,zz);}
2315:   VecGetArrayRead(xx,&x);
2316:   VecGetArray(zz,&y);
2317:   for (i=0; i<m; i++) {
2318:     idx     = a->j + a->i[i];
2319:     v       = a->a + a->i[i];
2320:     n       = a->i[i+1] - a->i[i];
2321:     alpha1  = x[16*i];
2322:     alpha2  = x[16*i+1];
2323:     alpha3  = x[16*i+2];
2324:     alpha4  = x[16*i+3];
2325:     alpha5  = x[16*i+4];
2326:     alpha6  = x[16*i+5];
2327:     alpha7  = x[16*i+6];
2328:     alpha8  = x[16*i+7];
2329:     alpha9  = x[16*i+8];
2330:     alpha10 = x[16*i+9];
2331:     alpha11 = x[16*i+10];
2332:     alpha12 = x[16*i+11];
2333:     alpha13 = x[16*i+12];
2334:     alpha14 = x[16*i+13];
2335:     alpha15 = x[16*i+14];
2336:     alpha16 = x[16*i+15];
2337:     while (n-->0) {
2338:       y[16*(*idx)]    += alpha1*(*v);
2339:       y[16*(*idx)+1]  += alpha2*(*v);
2340:       y[16*(*idx)+2]  += alpha3*(*v);
2341:       y[16*(*idx)+3]  += alpha4*(*v);
2342:       y[16*(*idx)+4]  += alpha5*(*v);
2343:       y[16*(*idx)+5]  += alpha6*(*v);
2344:       y[16*(*idx)+6]  += alpha7*(*v);
2345:       y[16*(*idx)+7]  += alpha8*(*v);
2346:       y[16*(*idx)+8]  += alpha9*(*v);
2347:       y[16*(*idx)+9]  += alpha10*(*v);
2348:       y[16*(*idx)+10] += alpha11*(*v);
2349:       y[16*(*idx)+11] += alpha12*(*v);
2350:       y[16*(*idx)+12] += alpha13*(*v);
2351:       y[16*(*idx)+13] += alpha14*(*v);
2352:       y[16*(*idx)+14] += alpha15*(*v);
2353:       y[16*(*idx)+15] += alpha16*(*v);
2354:       idx++; v++;
2355:     }
2356:   }
2357:   PetscLogFlops(32.0*a->nz);
2358:   VecRestoreArrayRead(xx,&x);
2359:   VecRestoreArray(zz,&y);
2360:   return(0);
2361: }

2363: /*--------------------------------------------------------------------------------------------*/
2364: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2365: {
2366:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2367:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2368:   const PetscScalar *x,*v;
2369:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2370:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2371:   PetscErrorCode    ierr;
2372:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2373:   PetscInt          nonzerorow=0,n,i,jrow,j;

2376:   VecGetArrayRead(xx,&x);
2377:   VecGetArray(yy,&y);
2378:   idx  = a->j;
2379:   v    = a->a;
2380:   ii   = a->i;

2382:   for (i=0; i<m; i++) {
2383:     jrow  = ii[i];
2384:     n     = ii[i+1] - jrow;
2385:     sum1  = 0.0;
2386:     sum2  = 0.0;
2387:     sum3  = 0.0;
2388:     sum4  = 0.0;
2389:     sum5  = 0.0;
2390:     sum6  = 0.0;
2391:     sum7  = 0.0;
2392:     sum8  = 0.0;
2393:     sum9  = 0.0;
2394:     sum10 = 0.0;
2395:     sum11 = 0.0;
2396:     sum12 = 0.0;
2397:     sum13 = 0.0;
2398:     sum14 = 0.0;
2399:     sum15 = 0.0;
2400:     sum16 = 0.0;
2401:     sum17 = 0.0;
2402:     sum18 = 0.0;

2404:     nonzerorow += (n>0);
2405:     for (j=0; j<n; j++) {
2406:       sum1  += v[jrow]*x[18*idx[jrow]];
2407:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2408:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2409:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2410:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2411:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2412:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2413:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2414:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2415:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2416:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2417:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2418:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2419:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2420:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2421:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2422:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2423:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2424:       jrow++;
2425:     }
2426:     y[18*i]    = sum1;
2427:     y[18*i+1]  = sum2;
2428:     y[18*i+2]  = sum3;
2429:     y[18*i+3]  = sum4;
2430:     y[18*i+4]  = sum5;
2431:     y[18*i+5]  = sum6;
2432:     y[18*i+6]  = sum7;
2433:     y[18*i+7]  = sum8;
2434:     y[18*i+8]  = sum9;
2435:     y[18*i+9]  = sum10;
2436:     y[18*i+10] = sum11;
2437:     y[18*i+11] = sum12;
2438:     y[18*i+12] = sum13;
2439:     y[18*i+13] = sum14;
2440:     y[18*i+14] = sum15;
2441:     y[18*i+15] = sum16;
2442:     y[18*i+16] = sum17;
2443:     y[18*i+17] = sum18;
2444:   }

2446:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2447:   VecRestoreArrayRead(xx,&x);
2448:   VecRestoreArray(yy,&y);
2449:   return(0);
2450: }

2452: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2453: {
2454:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2455:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2456:   const PetscScalar *x,*v;
2457:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2458:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2459:   PetscErrorCode    ierr;
2460:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2461:   PetscInt          n,i;

2464:   VecSet(yy,0.0);
2465:   VecGetArrayRead(xx,&x);
2466:   VecGetArray(yy,&y);

2468:   for (i=0; i<m; i++) {
2469:     idx     = a->j + a->i[i];
2470:     v       = a->a + a->i[i];
2471:     n       = a->i[i+1] - a->i[i];
2472:     alpha1  = x[18*i];
2473:     alpha2  = x[18*i+1];
2474:     alpha3  = x[18*i+2];
2475:     alpha4  = x[18*i+3];
2476:     alpha5  = x[18*i+4];
2477:     alpha6  = x[18*i+5];
2478:     alpha7  = x[18*i+6];
2479:     alpha8  = x[18*i+7];
2480:     alpha9  = x[18*i+8];
2481:     alpha10 = x[18*i+9];
2482:     alpha11 = x[18*i+10];
2483:     alpha12 = x[18*i+11];
2484:     alpha13 = x[18*i+12];
2485:     alpha14 = x[18*i+13];
2486:     alpha15 = x[18*i+14];
2487:     alpha16 = x[18*i+15];
2488:     alpha17 = x[18*i+16];
2489:     alpha18 = x[18*i+17];
2490:     while (n-->0) {
2491:       y[18*(*idx)]    += alpha1*(*v);
2492:       y[18*(*idx)+1]  += alpha2*(*v);
2493:       y[18*(*idx)+2]  += alpha3*(*v);
2494:       y[18*(*idx)+3]  += alpha4*(*v);
2495:       y[18*(*idx)+4]  += alpha5*(*v);
2496:       y[18*(*idx)+5]  += alpha6*(*v);
2497:       y[18*(*idx)+6]  += alpha7*(*v);
2498:       y[18*(*idx)+7]  += alpha8*(*v);
2499:       y[18*(*idx)+8]  += alpha9*(*v);
2500:       y[18*(*idx)+9]  += alpha10*(*v);
2501:       y[18*(*idx)+10] += alpha11*(*v);
2502:       y[18*(*idx)+11] += alpha12*(*v);
2503:       y[18*(*idx)+12] += alpha13*(*v);
2504:       y[18*(*idx)+13] += alpha14*(*v);
2505:       y[18*(*idx)+14] += alpha15*(*v);
2506:       y[18*(*idx)+15] += alpha16*(*v);
2507:       y[18*(*idx)+16] += alpha17*(*v);
2508:       y[18*(*idx)+17] += alpha18*(*v);
2509:       idx++; v++;
2510:     }
2511:   }
2512:   PetscLogFlops(36.0*a->nz);
2513:   VecRestoreArrayRead(xx,&x);
2514:   VecRestoreArray(yy,&y);
2515:   return(0);
2516: }

2518: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2519: {
2520:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2521:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2522:   const PetscScalar *x,*v;
2523:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2524:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2525:   PetscErrorCode    ierr;
2526:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2527:   PetscInt          n,i,jrow,j;

2530:   if (yy != zz) {VecCopy(yy,zz);}
2531:   VecGetArrayRead(xx,&x);
2532:   VecGetArray(zz,&y);
2533:   idx  = a->j;
2534:   v    = a->a;
2535:   ii   = a->i;

2537:   for (i=0; i<m; i++) {
2538:     jrow  = ii[i];
2539:     n     = ii[i+1] - jrow;
2540:     sum1  = 0.0;
2541:     sum2  = 0.0;
2542:     sum3  = 0.0;
2543:     sum4  = 0.0;
2544:     sum5  = 0.0;
2545:     sum6  = 0.0;
2546:     sum7  = 0.0;
2547:     sum8  = 0.0;
2548:     sum9  = 0.0;
2549:     sum10 = 0.0;
2550:     sum11 = 0.0;
2551:     sum12 = 0.0;
2552:     sum13 = 0.0;
2553:     sum14 = 0.0;
2554:     sum15 = 0.0;
2555:     sum16 = 0.0;
2556:     sum17 = 0.0;
2557:     sum18 = 0.0;
2558:     for (j=0; j<n; j++) {
2559:       sum1  += v[jrow]*x[18*idx[jrow]];
2560:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2561:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2562:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2563:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2564:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2565:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2566:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2567:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2568:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2569:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2570:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2571:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2572:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2573:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2574:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2575:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2576:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2577:       jrow++;
2578:     }
2579:     y[18*i]    += sum1;
2580:     y[18*i+1]  += sum2;
2581:     y[18*i+2]  += sum3;
2582:     y[18*i+3]  += sum4;
2583:     y[18*i+4]  += sum5;
2584:     y[18*i+5]  += sum6;
2585:     y[18*i+6]  += sum7;
2586:     y[18*i+7]  += sum8;
2587:     y[18*i+8]  += sum9;
2588:     y[18*i+9]  += sum10;
2589:     y[18*i+10] += sum11;
2590:     y[18*i+11] += sum12;
2591:     y[18*i+12] += sum13;
2592:     y[18*i+13] += sum14;
2593:     y[18*i+14] += sum15;
2594:     y[18*i+15] += sum16;
2595:     y[18*i+16] += sum17;
2596:     y[18*i+17] += sum18;
2597:   }

2599:   PetscLogFlops(36.0*a->nz);
2600:   VecRestoreArrayRead(xx,&x);
2601:   VecRestoreArray(zz,&y);
2602:   return(0);
2603: }

2605: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2606: {
2607:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2608:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2609:   const PetscScalar *x,*v;
2610:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2611:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2612:   PetscErrorCode    ierr;
2613:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2614:   PetscInt          n,i;

2617:   if (yy != zz) {VecCopy(yy,zz);}
2618:   VecGetArrayRead(xx,&x);
2619:   VecGetArray(zz,&y);
2620:   for (i=0; i<m; i++) {
2621:     idx     = a->j + a->i[i];
2622:     v       = a->a + a->i[i];
2623:     n       = a->i[i+1] - a->i[i];
2624:     alpha1  = x[18*i];
2625:     alpha2  = x[18*i+1];
2626:     alpha3  = x[18*i+2];
2627:     alpha4  = x[18*i+3];
2628:     alpha5  = x[18*i+4];
2629:     alpha6  = x[18*i+5];
2630:     alpha7  = x[18*i+6];
2631:     alpha8  = x[18*i+7];
2632:     alpha9  = x[18*i+8];
2633:     alpha10 = x[18*i+9];
2634:     alpha11 = x[18*i+10];
2635:     alpha12 = x[18*i+11];
2636:     alpha13 = x[18*i+12];
2637:     alpha14 = x[18*i+13];
2638:     alpha15 = x[18*i+14];
2639:     alpha16 = x[18*i+15];
2640:     alpha17 = x[18*i+16];
2641:     alpha18 = x[18*i+17];
2642:     while (n-->0) {
2643:       y[18*(*idx)]    += alpha1*(*v);
2644:       y[18*(*idx)+1]  += alpha2*(*v);
2645:       y[18*(*idx)+2]  += alpha3*(*v);
2646:       y[18*(*idx)+3]  += alpha4*(*v);
2647:       y[18*(*idx)+4]  += alpha5*(*v);
2648:       y[18*(*idx)+5]  += alpha6*(*v);
2649:       y[18*(*idx)+6]  += alpha7*(*v);
2650:       y[18*(*idx)+7]  += alpha8*(*v);
2651:       y[18*(*idx)+8]  += alpha9*(*v);
2652:       y[18*(*idx)+9]  += alpha10*(*v);
2653:       y[18*(*idx)+10] += alpha11*(*v);
2654:       y[18*(*idx)+11] += alpha12*(*v);
2655:       y[18*(*idx)+12] += alpha13*(*v);
2656:       y[18*(*idx)+13] += alpha14*(*v);
2657:       y[18*(*idx)+14] += alpha15*(*v);
2658:       y[18*(*idx)+15] += alpha16*(*v);
2659:       y[18*(*idx)+16] += alpha17*(*v);
2660:       y[18*(*idx)+17] += alpha18*(*v);
2661:       idx++; v++;
2662:     }
2663:   }
2664:   PetscLogFlops(36.0*a->nz);
2665:   VecRestoreArrayRead(xx,&x);
2666:   VecRestoreArray(zz,&y);
2667:   return(0);
2668: }

2670: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2671: {
2672:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2673:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2674:   const PetscScalar *x,*v;
2675:   PetscScalar       *y,*sums;
2676:   PetscErrorCode    ierr;
2677:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2678:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2681:   VecGetArrayRead(xx,&x);
2682:   VecSet(yy,0.0);
2683:   VecGetArray(yy,&y);
2684:   idx  = a->j;
2685:   v    = a->a;
2686:   ii   = a->i;

2688:   for (i=0; i<m; i++) {
2689:     jrow = ii[i];
2690:     n    = ii[i+1] - jrow;
2691:     sums = y + dof*i;
2692:     for (j=0; j<n; j++) {
2693:       for (k=0; k<dof; k++) {
2694:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2695:       }
2696:       jrow++;
2697:     }
2698:   }

2700:   PetscLogFlops(2.0*dof*a->nz);
2701:   VecRestoreArrayRead(xx,&x);
2702:   VecRestoreArray(yy,&y);
2703:   return(0);
2704: }

2706: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2707: {
2708:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2709:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2710:   const PetscScalar *x,*v;
2711:   PetscScalar       *y,*sums;
2712:   PetscErrorCode    ierr;
2713:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2714:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2717:   if (yy != zz) {VecCopy(yy,zz);}
2718:   VecGetArrayRead(xx,&x);
2719:   VecGetArray(zz,&y);
2720:   idx  = a->j;
2721:   v    = a->a;
2722:   ii   = a->i;

2724:   for (i=0; i<m; i++) {
2725:     jrow = ii[i];
2726:     n    = ii[i+1] - jrow;
2727:     sums = y + dof*i;
2728:     for (j=0; j<n; j++) {
2729:       for (k=0; k<dof; k++) {
2730:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2731:       }
2732:       jrow++;
2733:     }
2734:   }

2736:   PetscLogFlops(2.0*dof*a->nz);
2737:   VecRestoreArrayRead(xx,&x);
2738:   VecRestoreArray(zz,&y);
2739:   return(0);
2740: }

2742: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2743: {
2744:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2745:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2746:   const PetscScalar *x,*v,*alpha;
2747:   PetscScalar       *y;
2748:   PetscErrorCode    ierr;
2749:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2750:   PetscInt          n,i,k;

2753:   VecGetArrayRead(xx,&x);
2754:   VecSet(yy,0.0);
2755:   VecGetArray(yy,&y);
2756:   for (i=0; i<m; i++) {
2757:     idx   = a->j + a->i[i];
2758:     v     = a->a + a->i[i];
2759:     n     = a->i[i+1] - a->i[i];
2760:     alpha = x + dof*i;
2761:     while (n-->0) {
2762:       for (k=0; k<dof; k++) {
2763:         y[dof*(*idx)+k] += alpha[k]*(*v);
2764:       }
2765:       idx++; v++;
2766:     }
2767:   }
2768:   PetscLogFlops(2.0*dof*a->nz);
2769:   VecRestoreArrayRead(xx,&x);
2770:   VecRestoreArray(yy,&y);
2771:   return(0);
2772: }

2774: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2775: {
2776:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2777:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2778:   const PetscScalar *x,*v,*alpha;
2779:   PetscScalar       *y;
2780:   PetscErrorCode    ierr;
2781:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2782:   PetscInt          n,i,k;

2785:   if (yy != zz) {VecCopy(yy,zz);}
2786:   VecGetArrayRead(xx,&x);
2787:   VecGetArray(zz,&y);
2788:   for (i=0; i<m; i++) {
2789:     idx   = a->j + a->i[i];
2790:     v     = a->a + a->i[i];
2791:     n     = a->i[i+1] - a->i[i];
2792:     alpha = x + dof*i;
2793:     while (n-->0) {
2794:       for (k=0; k<dof; k++) {
2795:         y[dof*(*idx)+k] += alpha[k]*(*v);
2796:       }
2797:       idx++; v++;
2798:     }
2799:   }
2800:   PetscLogFlops(2.0*dof*a->nz);
2801:   VecRestoreArrayRead(xx,&x);
2802:   VecRestoreArray(zz,&y);
2803:   return(0);
2804: }

2806: /*===================================================================================*/
2807: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2808: {
2809:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2813:   /* start the scatter */
2814:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2815:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2816:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2817:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2818:   return(0);
2819: }

2821: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2822: {
2823:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2827:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2828:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2829:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2830:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2831:   return(0);
2832: }

2834: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2835: {
2836:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2840:   /* start the scatter */
2841:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2842:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2843:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2844:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2845:   return(0);
2846: }

2848: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2849: {
2850:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2854:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2855:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2856:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2857:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2858:   return(0);
2859: }

2861: /* ----------------------------------------------------------------*/
2862: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2863: {
2864:   Mat_Product    *product = C->product;

2867:   if (product->type == MATPRODUCT_PtAP) {
2868:     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2869:   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2870:   return(0);
2871: }

2873: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2874: {
2876:   Mat_Product    *product = C->product;
2877:   PetscBool      flg = PETSC_FALSE;
2878:   Mat            A=product->A,P=product->B;
2879:   PetscInt       alg=1; /* set default algorithm */
2880: #if !defined(PETSC_HAVE_HYPRE)
2881:   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2882:   PetscInt       nalg=4;
2883: #else
2884:   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2885:   PetscInt       nalg=5;
2886: #endif

2889:   if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);

2891:   /* PtAP */
2892:   /* Check matrix local sizes */
2893:   if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2894:   if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);

2896:   /* Set the default algorithm */
2897:   PetscStrcmp(C->product->alg,"default",&flg);
2898:   if (flg) {
2899:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2900:   }

2902:   /* Get runtime option */
2903:   PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2904:   PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);
2905:   if (flg) {
2906:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2907:   }
2908:   PetscOptionsEnd();

2910:   PetscStrcmp(C->product->alg,"allatonce",&flg);
2911:   if (flg) {
2912:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2913:     return(0);
2914:   }

2916:   PetscStrcmp(C->product->alg,"allatonce_merged",&flg);
2917:   if (flg) {
2918:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2919:     return(0);
2920:   }

2922:   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2923:   PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");
2924:   MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
2925:   MatProductSetFromOptions(C);
2926:   return(0);
2927: }

2929: /* ----------------------------------------------------------------*/
2930: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2931: {
2932:   PetscErrorCode     ierr;
2933:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2934:   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2935:   Mat                P         =pp->AIJ;
2936:   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2937:   PetscInt           *pti,*ptj,*ptJ;
2938:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2939:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2940:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2941:   MatScalar          *ca;
2942:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

2945:   /* Get ij structure of P^T */
2946:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2948:   cn = pn*ppdof;
2949:   /* Allocate ci array, arrays for fill computation and */
2950:   /* free space for accumulating nonzero column info */
2951:   PetscMalloc1(cn+1,&ci);
2952:   ci[0] = 0;

2954:   /* Work arrays for rows of P^T*A */
2955:   PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2956:   PetscArrayzero(ptadenserow,an);
2957:   PetscArrayzero(denserow,cn);

2959:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2960:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2961:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2962:   PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2963:   current_space = free_space;

2965:   /* Determine symbolic info for each row of C: */
2966:   for (i=0; i<pn; i++) {
2967:     ptnzi = pti[i+1] - pti[i];
2968:     ptJ   = ptj + pti[i];
2969:     for (dof=0; dof<ppdof; dof++) {
2970:       ptanzi = 0;
2971:       /* Determine symbolic row of PtA: */
2972:       for (j=0; j<ptnzi; j++) {
2973:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2974:         arow = ptJ[j]*ppdof + dof;
2975:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2976:         anzj = ai[arow+1] - ai[arow];
2977:         ajj  = aj + ai[arow];
2978:         for (k=0; k<anzj; k++) {
2979:           if (!ptadenserow[ajj[k]]) {
2980:             ptadenserow[ajj[k]]    = -1;
2981:             ptasparserow[ptanzi++] = ajj[k];
2982:           }
2983:         }
2984:       }
2985:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2986:       ptaj = ptasparserow;
2987:       cnzi = 0;
2988:       for (j=0; j<ptanzi; j++) {
2989:         /* Get offset within block of P */
2990:         pshift = *ptaj%ppdof;
2991:         /* Get block row of P */
2992:         prow = (*ptaj++)/ppdof; /* integer division */
2993:         /* P has same number of nonzeros per row as the compressed form */
2994:         pnzj = pi[prow+1] - pi[prow];
2995:         pjj  = pj + pi[prow];
2996:         for (k=0;k<pnzj;k++) {
2997:           /* Locations in C are shifted by the offset within the block */
2998:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2999:           if (!denserow[pjj[k]*ppdof+pshift]) {
3000:             denserow[pjj[k]*ppdof+pshift] = -1;
3001:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3002:           }
3003:         }
3004:       }

3006:       /* sort sparserow */
3007:       PetscSortInt(cnzi,sparserow);

3009:       /* If free space is not available, make more free space */
3010:       /* Double the amount of total space in the list */
3011:       if (current_space->local_remaining<cnzi) {
3012:         PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
3013:       }

3015:       /* Copy data into free space, and zero out denserows */
3016:       PetscArraycpy(current_space->array,sparserow,cnzi);

3018:       current_space->array           += cnzi;
3019:       current_space->local_used      += cnzi;
3020:       current_space->local_remaining -= cnzi;

3022:       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3023:       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;

3025:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3026:       /*        For now, we will recompute what is needed. */
3027:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3028:     }
3029:   }
3030:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3031:   /* Allocate space for cj, initialize cj, and */
3032:   /* destroy list of free space and other temporary array(s) */
3033:   PetscMalloc1(ci[cn]+1,&cj);
3034:   PetscFreeSpaceContiguous(&free_space,cj);
3035:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);

3037:   /* Allocate space for ca */
3038:   PetscCalloc1(ci[cn]+1,&ca);

3040:   /* put together the new matrix */
3041:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);
3042:   MatSetBlockSize(C,pp->dof);

3044:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3045:   /* Since these are PETSc arrays, change flags to free them as necessary. */
3046:   c          = (Mat_SeqAIJ*)(C->data);
3047:   c->free_a  = PETSC_TRUE;
3048:   c->free_ij = PETSC_TRUE;
3049:   c->nonew   = 0;

3051:   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3052:   C->ops->productnumeric = MatProductNumeric_PtAP;

3054:   /* Clean up. */
3055:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3056:   return(0);
3057: }

3059: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3060: {
3061:   /* This routine requires testing -- first draft only */
3062:   PetscErrorCode  ierr;
3063:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3064:   Mat             P  =pp->AIJ;
3065:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3066:   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3067:   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3068:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3069:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3070:   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3071:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3072:   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3073:   MatScalar       *ca=c->a,*caj,*apa;

3076:   /* Allocate temporary array for storage of one row of A*P */
3077:   PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);

3079:   /* Clear old values in C */
3080:   PetscArrayzero(ca,ci[cm]);

3082:   for (i=0; i<am; i++) {
3083:     /* Form sparse row of A*P */
3084:     anzi  = ai[i+1] - ai[i];
3085:     apnzj = 0;
3086:     for (j=0; j<anzi; j++) {
3087:       /* Get offset within block of P */
3088:       pshift = *aj%ppdof;
3089:       /* Get block row of P */
3090:       prow = *aj++/ppdof;   /* integer division */
3091:       pnzj = pi[prow+1] - pi[prow];
3092:       pjj  = pj + pi[prow];
3093:       paj  = pa + pi[prow];
3094:       for (k=0; k<pnzj; k++) {
3095:         poffset = pjj[k]*ppdof+pshift;
3096:         if (!apjdense[poffset]) {
3097:           apjdense[poffset] = -1;
3098:           apj[apnzj++]      = poffset;
3099:         }
3100:         apa[poffset] += (*aa)*paj[k];
3101:       }
3102:       PetscLogFlops(2.0*pnzj);
3103:       aa++;
3104:     }

3106:     /* Sort the j index array for quick sparse axpy. */
3107:     /* Note: a array does not need sorting as it is in dense storage locations. */
3108:     PetscSortInt(apnzj,apj);

3110:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3111:     prow    = i/ppdof; /* integer division */
3112:     pshift  = i%ppdof;
3113:     poffset = pi[prow];
3114:     pnzi    = pi[prow+1] - poffset;
3115:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3116:     pJ = pj+poffset;
3117:     pA = pa+poffset;
3118:     for (j=0; j<pnzi; j++) {
3119:       crow = (*pJ)*ppdof+pshift;
3120:       cjj  = cj + ci[crow];
3121:       caj  = ca + ci[crow];
3122:       pJ++;
3123:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3124:       for (k=0,nextap=0; nextap<apnzj; k++) {
3125:         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3126:       }
3127:       PetscLogFlops(2.0*apnzj);
3128:       pA++;
3129:     }

3131:     /* Zero the current row info for A*P */
3132:     for (j=0; j<apnzj; j++) {
3133:       apa[apj[j]]      = 0.;
3134:       apjdense[apj[j]] = 0;
3135:     }
3136:   }

3138:   /* Assemble the final matrix and clean up */
3139:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3140:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3141:   PetscFree3(apa,apj,apjdense);
3142:   return(0);
3143: }

3145: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3146: {
3147:   PetscErrorCode      ierr;
3148:   Mat_Product         *product = C->product;
3149:   Mat                 A=product->A,P=product->B;

3152:   MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);
3153:   return(0);
3154: }

3156: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3157: {
3159:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3160: }

3162: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3163: {
3165:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3166: }

3168: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);

3170: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3171: {
3172:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3173:   PetscErrorCode  ierr;


3177:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3178:   return(0);
3179: }

3181: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);

3183: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3184: {
3185:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3186:   PetscErrorCode  ierr;

3189:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3190:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3191:   return(0);
3192: }

3194: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);

3196: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3197: {
3198:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3199:   PetscErrorCode  ierr;


3203:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3204:   return(0);
3205: }

3207: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);

3209: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3210: {
3211:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3212:   PetscErrorCode  ierr;


3216:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3217:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3218:   return(0);
3219: }

3221: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3222: {
3223:   PetscErrorCode      ierr;
3224:   Mat_Product         *product = C->product;
3225:   Mat                 A=product->A,P=product->B;
3226:   PetscBool           flg;

3229:   PetscStrcmp(product->alg,"allatonce",&flg);
3230:   if (flg) {
3231:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);
3232:     C->ops->productnumeric = MatProductNumeric_PtAP;
3233:     return(0);
3234:   }

3236:   PetscStrcmp(product->alg,"allatonce_merged",&flg);
3237:   if (flg) {
3238:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);
3239:     C->ops->productnumeric = MatProductNumeric_PtAP;
3240:     return(0);
3241:   }

3243:   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3244: }

3246: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3247: {
3248:   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3249:   Mat            a    = b->AIJ,B;
3250:   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3252:   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3253:   PetscInt       *cols;
3254:   PetscScalar    *vals;

3257:   MatGetSize(a,&m,&n);
3258:   PetscMalloc1(dof*m,&ilen);
3259:   for (i=0; i<m; i++) {
3260:     nmax = PetscMax(nmax,aij->ilen[i]);
3261:     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3262:   }
3263:   MatCreate(PETSC_COMM_SELF,&B);
3264:   MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3265:   MatSetType(B,newtype);
3266:   MatSeqAIJSetPreallocation(B,0,ilen);
3267:   PetscFree(ilen);
3268:   PetscMalloc1(nmax,&icols);
3269:   ii   = 0;
3270:   for (i=0; i<m; i++) {
3271:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3272:     for (j=0; j<dof; j++) {
3273:       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3274:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3275:       ii++;
3276:     }
3277:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3278:   }
3279:   PetscFree(icols);
3280:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3281:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3283:   if (reuse == MAT_INPLACE_MATRIX) {
3284:     MatHeaderReplace(A,&B);
3285:   } else {
3286:     *newmat = B;
3287:   }
3288:   return(0);
3289: }

3291: #include <../src/mat/impls/aij/mpi/mpiaij.h>

3293: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3294: {
3295:   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3296:   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3297:   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3298:   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3299:   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3300:   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3301:   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3302:   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3303:   PetscInt       rstart,cstart,*garray,ii,k;
3305:   PetscScalar    *vals,*ovals;

3308:   PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3309:   for (i=0; i<A->rmap->n/dof; i++) {
3310:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3311:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3312:     for (j=0; j<dof; j++) {
3313:       dnz[dof*i+j] = AIJ->ilen[i];
3314:       onz[dof*i+j] = OAIJ->ilen[i];
3315:     }
3316:   }
3317:   MatCreate(PetscObjectComm((PetscObject)A),&B);
3318:   MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3319:   MatSetType(B,newtype);
3320:   MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3321:   MatSetBlockSize(B,dof);
3322:   PetscFree2(dnz,onz);

3324:   PetscMalloc2(nmax,&icols,onmax,&oicols);
3325:   rstart = dof*maij->A->rmap->rstart;
3326:   cstart = dof*maij->A->cmap->rstart;
3327:   garray = mpiaij->garray;

3329:   ii = rstart;
3330:   for (i=0; i<A->rmap->n/dof; i++) {
3331:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3332:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3333:     for (j=0; j<dof; j++) {
3334:       for (k=0; k<ncols; k++) {
3335:         icols[k] = cstart + dof*cols[k]+j;
3336:       }
3337:       for (k=0; k<oncols; k++) {
3338:         oicols[k] = dof*garray[ocols[k]]+j;
3339:       }
3340:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3341:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3342:       ii++;
3343:     }
3344:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3345:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3346:   }
3347:   PetscFree2(icols,oicols);

3349:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3350:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3352:   if (reuse == MAT_INPLACE_MATRIX) {
3353:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3354:     ((PetscObject)A)->refct = 1;

3356:     MatHeaderReplace(A,&B);

3358:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3359:   } else {
3360:     *newmat = B;
3361:   }
3362:   return(0);
3363: }

3365: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3366: {
3368:   Mat            A;

3371:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3372:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3373:   MatDestroy(&A);
3374:   return(0);
3375: }

3377: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3378: {
3380:   Mat            A;

3383:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3384:   MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3385:   MatDestroy(&A);
3386:   return(0);
3387: }

3389: /* ---------------------------------------------------------------------------------- */
3390: /*@
3391:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3392:   operations for multicomponent problems.  It interpolates each component the same
3393:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3394:   and MATMPIAIJ for distributed matrices.

3396:   Collective

3398:   Input Parameters:
3399: + A - the AIJ matrix describing the action on blocks
3400: - dof - the block size (number of components per node)

3402:   Output Parameter:
3403: . maij - the new MAIJ matrix

3405:   Operations provided:
3406: + MatMult
3407: . MatMultTranspose
3408: . MatMultAdd
3409: . MatMultTransposeAdd
3410: - MatView

3412:   Level: advanced

3414: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3415: @*/
3416: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3417: {
3419:   PetscMPIInt    size;
3420:   PetscInt       n;
3421:   Mat            B;
3422: #if defined(PETSC_HAVE_CUDA)
3423:   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3424:   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3425: #endif

3428:   dof  = PetscAbs(dof);
3429:   PetscObjectReference((PetscObject)A);

3431:   if (dof == 1) *maij = A;
3432:   else {
3433:     MatCreate(PetscObjectComm((PetscObject)A),&B);
3434:     /* propagate vec type */
3435:     MatSetVecType(B,A->defaultvectype);
3436:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3437:     PetscLayoutSetBlockSize(B->rmap,dof);
3438:     PetscLayoutSetBlockSize(B->cmap,dof);
3439:     PetscLayoutSetUp(B->rmap);
3440:     PetscLayoutSetUp(B->cmap);

3442:     B->assembled = PETSC_TRUE;

3444:     MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3445:     if (size == 1) {
3446:       Mat_SeqMAIJ *b;

3448:       MatSetType(B,MATSEQMAIJ);

3450:       B->ops->setup   = NULL;
3451:       B->ops->destroy = MatDestroy_SeqMAIJ;
3452:       B->ops->view    = MatView_SeqMAIJ;

3454:       b               = (Mat_SeqMAIJ*)B->data;
3455:       b->dof          = dof;
3456:       b->AIJ          = A;

3458:       if (dof == 2) {
3459:         B->ops->mult             = MatMult_SeqMAIJ_2;
3460:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3461:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3462:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3463:       } else if (dof == 3) {
3464:         B->ops->mult             = MatMult_SeqMAIJ_3;
3465:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3466:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3467:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3468:       } else if (dof == 4) {
3469:         B->ops->mult             = MatMult_SeqMAIJ_4;
3470:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3471:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3472:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3473:       } else if (dof == 5) {
3474:         B->ops->mult             = MatMult_SeqMAIJ_5;
3475:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3476:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3477:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3478:       } else if (dof == 6) {
3479:         B->ops->mult             = MatMult_SeqMAIJ_6;
3480:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3481:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3482:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3483:       } else if (dof == 7) {
3484:         B->ops->mult             = MatMult_SeqMAIJ_7;
3485:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3486:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3487:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3488:       } else if (dof == 8) {
3489:         B->ops->mult             = MatMult_SeqMAIJ_8;
3490:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3491:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3492:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3493:       } else if (dof == 9) {
3494:         B->ops->mult             = MatMult_SeqMAIJ_9;
3495:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3496:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3497:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3498:       } else if (dof == 10) {
3499:         B->ops->mult             = MatMult_SeqMAIJ_10;
3500:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3501:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3502:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3503:       } else if (dof == 11) {
3504:         B->ops->mult             = MatMult_SeqMAIJ_11;
3505:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3506:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3507:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3508:       } else if (dof == 16) {
3509:         B->ops->mult             = MatMult_SeqMAIJ_16;
3510:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3511:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3512:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3513:       } else if (dof == 18) {
3514:         B->ops->mult             = MatMult_SeqMAIJ_18;
3515:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3516:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3517:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3518:       } else {
3519:         B->ops->mult             = MatMult_SeqMAIJ_N;
3520:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3521:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3522:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3523:       }
3524: #if defined(PETSC_HAVE_CUDA)
3525:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3526: #endif
3527:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3528:       PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3529:     } else {
3530:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3531:       Mat_MPIMAIJ *b;
3532:       IS          from,to;
3533:       Vec         gvec;

3535:       MatSetType(B,MATMPIMAIJ);

3537:       B->ops->setup            = NULL;
3538:       B->ops->destroy          = MatDestroy_MPIMAIJ;
3539:       B->ops->view             = MatView_MPIMAIJ;

3541:       b      = (Mat_MPIMAIJ*)B->data;
3542:       b->dof = dof;
3543:       b->A   = A;

3545:       MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3546:       MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);

3548:       VecGetSize(mpiaij->lvec,&n);
3549:       VecCreate(PETSC_COMM_SELF,&b->w);
3550:       VecSetSizes(b->w,n*dof,n*dof);
3551:       VecSetBlockSize(b->w,dof);
3552:       VecSetType(b->w,VECSEQ);

3554:       /* create two temporary Index sets for build scatter gather */
3555:       ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3556:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3558:       /* create temporary global vector to generate scatter context */
3559:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);

3561:       /* generate the scatter context */
3562:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3564:       ISDestroy(&from);
3565:       ISDestroy(&to);
3566:       VecDestroy(&gvec);

3568:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3569:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3570:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3571:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

3573: #if defined(PETSC_HAVE_CUDA)
3574:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3575: #endif
3576:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3577:       PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3578:     }
3579:     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3580:     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3581:     MatSetFromOptions(B);
3582:     MatSetUp(B);
3583: #if defined(PETSC_HAVE_CUDA)
3584:     /* temporary until we have CUDA implementation of MAIJ */
3585:     {
3586:       PetscBool flg;
3587:       if (convert) {
3588:         PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3589:         if (flg) {
3590:           MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3591:         }
3592:       }
3593:     }
3594: #endif
3595:     *maij = B;
3596:     MatViewFromOptions(B,NULL,"-mat_view");
3597:   }
3598:   return(0);
3599: }