Actual source code: maij.c

petsc-master 2020-08-05
Report Typos and Errors

  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19:  #include <../src/mat/impls/maij/maij.h>
 20:  #include <../src/mat/utils/freespace.h>

 22: /*@
 23:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 25:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 27:    Input Parameter:
 28: .  A - the MAIJ matrix

 30:    Output Parameter:
 31: .  B - the AIJ matrix

 33:    Level: advanced

 35:    Notes:
 36:     The reference count on the AIJ matrix is not increased so you should not destroy it.

 38: .seealso: MatCreateMAIJ()
 39: @*/
 40: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 41: {
 43:   PetscBool      ismpimaij,isseqmaij;

 46:   PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 47:   PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 48:   if (ismpimaij) {
 49:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 51:     *B = b->A;
 52:   } else if (isseqmaij) {
 53:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 55:     *B = b->AIJ;
 56:   } else {
 57:     *B = A;
 58:   }
 59:   return(0);
 60: }

 62: /*@
 63:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 65:    Logically Collective

 67:    Input Parameter:
 68: +  A - the MAIJ matrix
 69: -  dof - the block size for the new matrix

 71:    Output Parameter:
 72: .  B - the new MAIJ matrix

 74:    Level: advanced

 76: .seealso: MatCreateMAIJ()
 77: @*/
 78: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 79: {
 81:   Mat            Aij = NULL;

 85:   MatMAIJGetAIJ(A,&Aij);
 86:   MatCreateMAIJ(Aij,dof,B);
 87:   return(0);
 88: }

 90: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 91: {
 93:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 96:   MatDestroy(&b->AIJ);
 97:   PetscFree(A->data);
 98:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
 99:   PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);
100:   return(0);
101: }

103: PetscErrorCode MatSetUp_MAIJ(Mat A)
104: {
106:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107:   return(0);
108: }

110: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
111: {
113:   Mat            B;

116:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
117:   MatView(B,viewer);
118:   MatDestroy(&B);
119:   return(0);
120: }

122: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
123: {
125:   Mat            B;

128:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
129:   MatView(B,viewer);
130:   MatDestroy(&B);
131:   return(0);
132: }

134: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
135: {
137:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

140:   MatDestroy(&b->AIJ);
141:   MatDestroy(&b->OAIJ);
142:   MatDestroy(&b->A);
143:   VecScatterDestroy(&b->ctx);
144:   VecDestroy(&b->w);
145:   PetscFree(A->data);
146:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
147:   PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);
148:   PetscObjectChangeTypeName((PetscObject)A,0);
149:   return(0);
150: }

152: /*MC
153:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
154:   multicomponent problems, interpolating or restricting each component the same way independently.
155:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

157:   Operations provided:
158: + MatMult
159: . MatMultTranspose
160: . MatMultAdd
161: - MatMultTransposeAdd

163:   Level: advanced

165: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
166: M*/

168: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
169: {
171:   Mat_MPIMAIJ    *b;
172:   PetscMPIInt    size;

175:   PetscNewLog(A,&b);
176:   A->data  = (void*)b;

178:   PetscMemzero(A->ops,sizeof(struct _MatOps));

180:   A->ops->setup = MatSetUp_MAIJ;

182:   b->AIJ  = 0;
183:   b->dof  = 0;
184:   b->OAIJ = 0;
185:   b->ctx  = 0;
186:   b->w    = 0;
187:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
188:   if (size == 1) {
189:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
190:   } else {
191:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
192:   }
193:   A->preallocated  = PETSC_TRUE;
194:   A->assembled     = PETSC_TRUE;
195:   return(0);
196: }

198: /* --------------------------------------------------------------------------------------*/
199: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
200: {
201:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
202:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
203:   const PetscScalar *x,*v;
204:   PetscScalar       *y, sum1, sum2;
205:   PetscErrorCode    ierr;
206:   PetscInt          nonzerorow=0,n,i,jrow,j;
207:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

210:   VecGetArrayRead(xx,&x);
211:   VecGetArray(yy,&y);
212:   idx  = a->j;
213:   v    = a->a;
214:   ii   = a->i;

216:   for (i=0; i<m; i++) {
217:     jrow  = ii[i];
218:     n     = ii[i+1] - jrow;
219:     sum1  = 0.0;
220:     sum2  = 0.0;

222:     nonzerorow += (n>0);
223:     for (j=0; j<n; j++) {
224:       sum1 += v[jrow]*x[2*idx[jrow]];
225:       sum2 += v[jrow]*x[2*idx[jrow]+1];
226:       jrow++;
227:     }
228:     y[2*i]   = sum1;
229:     y[2*i+1] = sum2;
230:   }

232:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
233:   VecRestoreArrayRead(xx,&x);
234:   VecRestoreArray(yy,&y);
235:   return(0);
236: }

238: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
239: {
240:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
241:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
242:   const PetscScalar *x,*v;
243:   PetscScalar       *y,alpha1,alpha2;
244:   PetscErrorCode    ierr;
245:   PetscInt          n,i;
246:   const PetscInt    m = b->AIJ->rmap->n,*idx;

249:   VecSet(yy,0.0);
250:   VecGetArrayRead(xx,&x);
251:   VecGetArray(yy,&y);

253:   for (i=0; i<m; i++) {
254:     idx    = a->j + a->i[i];
255:     v      = a->a + a->i[i];
256:     n      = a->i[i+1] - a->i[i];
257:     alpha1 = x[2*i];
258:     alpha2 = x[2*i+1];
259:     while (n-->0) {
260:       y[2*(*idx)]   += alpha1*(*v);
261:       y[2*(*idx)+1] += alpha2*(*v);
262:       idx++; v++;
263:     }
264:   }
265:   PetscLogFlops(4.0*a->nz);
266:   VecRestoreArrayRead(xx,&x);
267:   VecRestoreArray(yy,&y);
268:   return(0);
269: }

271: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
272: {
273:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
274:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
275:   const PetscScalar *x,*v;
276:   PetscScalar       *y,sum1, sum2;
277:   PetscErrorCode    ierr;
278:   PetscInt          n,i,jrow,j;
279:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

282:   if (yy != zz) {VecCopy(yy,zz);}
283:   VecGetArrayRead(xx,&x);
284:   VecGetArray(zz,&y);
285:   idx  = a->j;
286:   v    = a->a;
287:   ii   = a->i;

289:   for (i=0; i<m; i++) {
290:     jrow = ii[i];
291:     n    = ii[i+1] - jrow;
292:     sum1 = 0.0;
293:     sum2 = 0.0;
294:     for (j=0; j<n; j++) {
295:       sum1 += v[jrow]*x[2*idx[jrow]];
296:       sum2 += v[jrow]*x[2*idx[jrow]+1];
297:       jrow++;
298:     }
299:     y[2*i]   += sum1;
300:     y[2*i+1] += sum2;
301:   }

303:   PetscLogFlops(4.0*a->nz);
304:   VecRestoreArrayRead(xx,&x);
305:   VecRestoreArray(zz,&y);
306:   return(0);
307: }
308: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
309: {
310:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
311:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
312:   const PetscScalar *x,*v;
313:   PetscScalar       *y,alpha1,alpha2;
314:   PetscErrorCode    ierr;
315:   PetscInt          n,i;
316:   const PetscInt    m = b->AIJ->rmap->n,*idx;

319:   if (yy != zz) {VecCopy(yy,zz);}
320:   VecGetArrayRead(xx,&x);
321:   VecGetArray(zz,&y);

323:   for (i=0; i<m; i++) {
324:     idx    = a->j + a->i[i];
325:     v      = a->a + a->i[i];
326:     n      = a->i[i+1] - a->i[i];
327:     alpha1 = x[2*i];
328:     alpha2 = x[2*i+1];
329:     while (n-->0) {
330:       y[2*(*idx)]   += alpha1*(*v);
331:       y[2*(*idx)+1] += alpha2*(*v);
332:       idx++; v++;
333:     }
334:   }
335:   PetscLogFlops(4.0*a->nz);
336:   VecRestoreArrayRead(xx,&x);
337:   VecRestoreArray(zz,&y);
338:   return(0);
339: }
340: /* --------------------------------------------------------------------------------------*/
341: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
342: {
343:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
344:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
345:   const PetscScalar *x,*v;
346:   PetscScalar       *y,sum1, sum2, sum3;
347:   PetscErrorCode    ierr;
348:   PetscInt          nonzerorow=0,n,i,jrow,j;
349:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

352:   VecGetArrayRead(xx,&x);
353:   VecGetArray(yy,&y);
354:   idx  = a->j;
355:   v    = a->a;
356:   ii   = a->i;

358:   for (i=0; i<m; i++) {
359:     jrow  = ii[i];
360:     n     = ii[i+1] - jrow;
361:     sum1  = 0.0;
362:     sum2  = 0.0;
363:     sum3  = 0.0;

365:     nonzerorow += (n>0);
366:     for (j=0; j<n; j++) {
367:       sum1 += v[jrow]*x[3*idx[jrow]];
368:       sum2 += v[jrow]*x[3*idx[jrow]+1];
369:       sum3 += v[jrow]*x[3*idx[jrow]+2];
370:       jrow++;
371:      }
372:     y[3*i]   = sum1;
373:     y[3*i+1] = sum2;
374:     y[3*i+2] = sum3;
375:   }

377:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
378:   VecRestoreArrayRead(xx,&x);
379:   VecRestoreArray(yy,&y);
380:   return(0);
381: }

383: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
384: {
385:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
386:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
387:   const PetscScalar *x,*v;
388:   PetscScalar       *y,alpha1,alpha2,alpha3;
389:   PetscErrorCode    ierr;
390:   PetscInt          n,i;
391:   const PetscInt    m = b->AIJ->rmap->n,*idx;

394:   VecSet(yy,0.0);
395:   VecGetArrayRead(xx,&x);
396:   VecGetArray(yy,&y);

398:   for (i=0; i<m; i++) {
399:     idx    = a->j + a->i[i];
400:     v      = a->a + a->i[i];
401:     n      = a->i[i+1] - a->i[i];
402:     alpha1 = x[3*i];
403:     alpha2 = x[3*i+1];
404:     alpha3 = x[3*i+2];
405:     while (n-->0) {
406:       y[3*(*idx)]   += alpha1*(*v);
407:       y[3*(*idx)+1] += alpha2*(*v);
408:       y[3*(*idx)+2] += alpha3*(*v);
409:       idx++; v++;
410:     }
411:   }
412:   PetscLogFlops(6.0*a->nz);
413:   VecRestoreArrayRead(xx,&x);
414:   VecRestoreArray(yy,&y);
415:   return(0);
416: }

418: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
419: {
420:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
421:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
422:   const PetscScalar *x,*v;
423:   PetscScalar       *y,sum1, sum2, sum3;
424:   PetscErrorCode    ierr;
425:   PetscInt          n,i,jrow,j;
426:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

429:   if (yy != zz) {VecCopy(yy,zz);}
430:   VecGetArrayRead(xx,&x);
431:   VecGetArray(zz,&y);
432:   idx  = a->j;
433:   v    = a->a;
434:   ii   = a->i;

436:   for (i=0; i<m; i++) {
437:     jrow = ii[i];
438:     n    = ii[i+1] - jrow;
439:     sum1 = 0.0;
440:     sum2 = 0.0;
441:     sum3 = 0.0;
442:     for (j=0; j<n; j++) {
443:       sum1 += v[jrow]*x[3*idx[jrow]];
444:       sum2 += v[jrow]*x[3*idx[jrow]+1];
445:       sum3 += v[jrow]*x[3*idx[jrow]+2];
446:       jrow++;
447:     }
448:     y[3*i]   += sum1;
449:     y[3*i+1] += sum2;
450:     y[3*i+2] += sum3;
451:   }

453:   PetscLogFlops(6.0*a->nz);
454:   VecRestoreArrayRead(xx,&x);
455:   VecRestoreArray(zz,&y);
456:   return(0);
457: }
458: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
459: {
460:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
461:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
462:   const PetscScalar *x,*v;
463:   PetscScalar       *y,alpha1,alpha2,alpha3;
464:   PetscErrorCode    ierr;
465:   PetscInt          n,i;
466:   const PetscInt    m = b->AIJ->rmap->n,*idx;

469:   if (yy != zz) {VecCopy(yy,zz);}
470:   VecGetArrayRead(xx,&x);
471:   VecGetArray(zz,&y);
472:   for (i=0; i<m; i++) {
473:     idx    = a->j + a->i[i];
474:     v      = a->a + a->i[i];
475:     n      = a->i[i+1] - a->i[i];
476:     alpha1 = x[3*i];
477:     alpha2 = x[3*i+1];
478:     alpha3 = x[3*i+2];
479:     while (n-->0) {
480:       y[3*(*idx)]   += alpha1*(*v);
481:       y[3*(*idx)+1] += alpha2*(*v);
482:       y[3*(*idx)+2] += alpha3*(*v);
483:       idx++; v++;
484:     }
485:   }
486:   PetscLogFlops(6.0*a->nz);
487:   VecRestoreArrayRead(xx,&x);
488:   VecRestoreArray(zz,&y);
489:   return(0);
490: }

492: /* ------------------------------------------------------------------------------*/
493: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
494: {
495:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
496:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
497:   const PetscScalar *x,*v;
498:   PetscScalar       *y,sum1, sum2, sum3, sum4;
499:   PetscErrorCode    ierr;
500:   PetscInt          nonzerorow=0,n,i,jrow,j;
501:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

504:   VecGetArrayRead(xx,&x);
505:   VecGetArray(yy,&y);
506:   idx  = a->j;
507:   v    = a->a;
508:   ii   = a->i;

510:   for (i=0; i<m; i++) {
511:     jrow        = ii[i];
512:     n           = ii[i+1] - jrow;
513:     sum1        = 0.0;
514:     sum2        = 0.0;
515:     sum3        = 0.0;
516:     sum4        = 0.0;
517:     nonzerorow += (n>0);
518:     for (j=0; j<n; j++) {
519:       sum1 += v[jrow]*x[4*idx[jrow]];
520:       sum2 += v[jrow]*x[4*idx[jrow]+1];
521:       sum3 += v[jrow]*x[4*idx[jrow]+2];
522:       sum4 += v[jrow]*x[4*idx[jrow]+3];
523:       jrow++;
524:     }
525:     y[4*i]   = sum1;
526:     y[4*i+1] = sum2;
527:     y[4*i+2] = sum3;
528:     y[4*i+3] = sum4;
529:   }

531:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
532:   VecRestoreArrayRead(xx,&x);
533:   VecRestoreArray(yy,&y);
534:   return(0);
535: }

537: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
538: {
539:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
540:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
541:   const PetscScalar *x,*v;
542:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
543:   PetscErrorCode    ierr;
544:   PetscInt          n,i;
545:   const PetscInt    m = b->AIJ->rmap->n,*idx;

548:   VecSet(yy,0.0);
549:   VecGetArrayRead(xx,&x);
550:   VecGetArray(yy,&y);
551:   for (i=0; i<m; i++) {
552:     idx    = a->j + a->i[i];
553:     v      = a->a + a->i[i];
554:     n      = a->i[i+1] - a->i[i];
555:     alpha1 = x[4*i];
556:     alpha2 = x[4*i+1];
557:     alpha3 = x[4*i+2];
558:     alpha4 = x[4*i+3];
559:     while (n-->0) {
560:       y[4*(*idx)]   += alpha1*(*v);
561:       y[4*(*idx)+1] += alpha2*(*v);
562:       y[4*(*idx)+2] += alpha3*(*v);
563:       y[4*(*idx)+3] += alpha4*(*v);
564:       idx++; v++;
565:     }
566:   }
567:   PetscLogFlops(8.0*a->nz);
568:   VecRestoreArrayRead(xx,&x);
569:   VecRestoreArray(yy,&y);
570:   return(0);
571: }

573: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
574: {
575:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
576:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
577:   const PetscScalar *x,*v;
578:   PetscScalar       *y,sum1, sum2, sum3, sum4;
579:   PetscErrorCode    ierr;
580:   PetscInt          n,i,jrow,j;
581:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

584:   if (yy != zz) {VecCopy(yy,zz);}
585:   VecGetArrayRead(xx,&x);
586:   VecGetArray(zz,&y);
587:   idx  = a->j;
588:   v    = a->a;
589:   ii   = a->i;

591:   for (i=0; i<m; i++) {
592:     jrow = ii[i];
593:     n    = ii[i+1] - jrow;
594:     sum1 = 0.0;
595:     sum2 = 0.0;
596:     sum3 = 0.0;
597:     sum4 = 0.0;
598:     for (j=0; j<n; j++) {
599:       sum1 += v[jrow]*x[4*idx[jrow]];
600:       sum2 += v[jrow]*x[4*idx[jrow]+1];
601:       sum3 += v[jrow]*x[4*idx[jrow]+2];
602:       sum4 += v[jrow]*x[4*idx[jrow]+3];
603:       jrow++;
604:     }
605:     y[4*i]   += sum1;
606:     y[4*i+1] += sum2;
607:     y[4*i+2] += sum3;
608:     y[4*i+3] += sum4;
609:   }

611:   PetscLogFlops(8.0*a->nz);
612:   VecRestoreArrayRead(xx,&x);
613:   VecRestoreArray(zz,&y);
614:   return(0);
615: }
616: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
617: {
618:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
619:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
620:   const PetscScalar *x,*v;
621:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
622:   PetscErrorCode    ierr;
623:   PetscInt          n,i;
624:   const PetscInt    m = b->AIJ->rmap->n,*idx;

627:   if (yy != zz) {VecCopy(yy,zz);}
628:   VecGetArrayRead(xx,&x);
629:   VecGetArray(zz,&y);

631:   for (i=0; i<m; i++) {
632:     idx    = a->j + a->i[i];
633:     v      = a->a + a->i[i];
634:     n      = a->i[i+1] - a->i[i];
635:     alpha1 = x[4*i];
636:     alpha2 = x[4*i+1];
637:     alpha3 = x[4*i+2];
638:     alpha4 = x[4*i+3];
639:     while (n-->0) {
640:       y[4*(*idx)]   += alpha1*(*v);
641:       y[4*(*idx)+1] += alpha2*(*v);
642:       y[4*(*idx)+2] += alpha3*(*v);
643:       y[4*(*idx)+3] += alpha4*(*v);
644:       idx++; v++;
645:     }
646:   }
647:   PetscLogFlops(8.0*a->nz);
648:   VecRestoreArrayRead(xx,&x);
649:   VecRestoreArray(zz,&y);
650:   return(0);
651: }
652: /* ------------------------------------------------------------------------------*/

654: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
655: {
656:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
657:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
658:   const PetscScalar *x,*v;
659:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
660:   PetscErrorCode    ierr;
661:   PetscInt          nonzerorow=0,n,i,jrow,j;
662:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

665:   VecGetArrayRead(xx,&x);
666:   VecGetArray(yy,&y);
667:   idx  = a->j;
668:   v    = a->a;
669:   ii   = a->i;

671:   for (i=0; i<m; i++) {
672:     jrow  = ii[i];
673:     n     = ii[i+1] - jrow;
674:     sum1  = 0.0;
675:     sum2  = 0.0;
676:     sum3  = 0.0;
677:     sum4  = 0.0;
678:     sum5  = 0.0;

680:     nonzerorow += (n>0);
681:     for (j=0; j<n; j++) {
682:       sum1 += v[jrow]*x[5*idx[jrow]];
683:       sum2 += v[jrow]*x[5*idx[jrow]+1];
684:       sum3 += v[jrow]*x[5*idx[jrow]+2];
685:       sum4 += v[jrow]*x[5*idx[jrow]+3];
686:       sum5 += v[jrow]*x[5*idx[jrow]+4];
687:       jrow++;
688:     }
689:     y[5*i]   = sum1;
690:     y[5*i+1] = sum2;
691:     y[5*i+2] = sum3;
692:     y[5*i+3] = sum4;
693:     y[5*i+4] = sum5;
694:   }

696:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
697:   VecRestoreArrayRead(xx,&x);
698:   VecRestoreArray(yy,&y);
699:   return(0);
700: }

702: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
703: {
704:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
705:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
706:   const PetscScalar *x,*v;
707:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
708:   PetscErrorCode    ierr;
709:   PetscInt          n,i;
710:   const PetscInt    m = b->AIJ->rmap->n,*idx;

713:   VecSet(yy,0.0);
714:   VecGetArrayRead(xx,&x);
715:   VecGetArray(yy,&y);

717:   for (i=0; i<m; i++) {
718:     idx    = a->j + a->i[i];
719:     v      = a->a + a->i[i];
720:     n      = a->i[i+1] - a->i[i];
721:     alpha1 = x[5*i];
722:     alpha2 = x[5*i+1];
723:     alpha3 = x[5*i+2];
724:     alpha4 = x[5*i+3];
725:     alpha5 = x[5*i+4];
726:     while (n-->0) {
727:       y[5*(*idx)]   += alpha1*(*v);
728:       y[5*(*idx)+1] += alpha2*(*v);
729:       y[5*(*idx)+2] += alpha3*(*v);
730:       y[5*(*idx)+3] += alpha4*(*v);
731:       y[5*(*idx)+4] += alpha5*(*v);
732:       idx++; v++;
733:     }
734:   }
735:   PetscLogFlops(10.0*a->nz);
736:   VecRestoreArrayRead(xx,&x);
737:   VecRestoreArray(yy,&y);
738:   return(0);
739: }

741: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
742: {
743:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
744:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
745:   const PetscScalar *x,*v;
746:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
747:   PetscErrorCode    ierr;
748:   PetscInt          n,i,jrow,j;
749:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

752:   if (yy != zz) {VecCopy(yy,zz);}
753:   VecGetArrayRead(xx,&x);
754:   VecGetArray(zz,&y);
755:   idx  = a->j;
756:   v    = a->a;
757:   ii   = a->i;

759:   for (i=0; i<m; i++) {
760:     jrow = ii[i];
761:     n    = ii[i+1] - jrow;
762:     sum1 = 0.0;
763:     sum2 = 0.0;
764:     sum3 = 0.0;
765:     sum4 = 0.0;
766:     sum5 = 0.0;
767:     for (j=0; j<n; j++) {
768:       sum1 += v[jrow]*x[5*idx[jrow]];
769:       sum2 += v[jrow]*x[5*idx[jrow]+1];
770:       sum3 += v[jrow]*x[5*idx[jrow]+2];
771:       sum4 += v[jrow]*x[5*idx[jrow]+3];
772:       sum5 += v[jrow]*x[5*idx[jrow]+4];
773:       jrow++;
774:     }
775:     y[5*i]   += sum1;
776:     y[5*i+1] += sum2;
777:     y[5*i+2] += sum3;
778:     y[5*i+3] += sum4;
779:     y[5*i+4] += sum5;
780:   }

782:   PetscLogFlops(10.0*a->nz);
783:   VecRestoreArrayRead(xx,&x);
784:   VecRestoreArray(zz,&y);
785:   return(0);
786: }

788: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
789: {
790:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
791:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
792:   const PetscScalar *x,*v;
793:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
794:   PetscErrorCode    ierr;
795:   PetscInt          n,i;
796:   const PetscInt    m = b->AIJ->rmap->n,*idx;

799:   if (yy != zz) {VecCopy(yy,zz);}
800:   VecGetArrayRead(xx,&x);
801:   VecGetArray(zz,&y);

803:   for (i=0; i<m; i++) {
804:     idx    = a->j + a->i[i];
805:     v      = a->a + a->i[i];
806:     n      = a->i[i+1] - a->i[i];
807:     alpha1 = x[5*i];
808:     alpha2 = x[5*i+1];
809:     alpha3 = x[5*i+2];
810:     alpha4 = x[5*i+3];
811:     alpha5 = x[5*i+4];
812:     while (n-->0) {
813:       y[5*(*idx)]   += alpha1*(*v);
814:       y[5*(*idx)+1] += alpha2*(*v);
815:       y[5*(*idx)+2] += alpha3*(*v);
816:       y[5*(*idx)+3] += alpha4*(*v);
817:       y[5*(*idx)+4] += alpha5*(*v);
818:       idx++; v++;
819:     }
820:   }
821:   PetscLogFlops(10.0*a->nz);
822:   VecRestoreArrayRead(xx,&x);
823:   VecRestoreArray(zz,&y);
824:   return(0);
825: }

827: /* ------------------------------------------------------------------------------*/
828: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
829: {
830:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
831:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
832:   const PetscScalar *x,*v;
833:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
834:   PetscErrorCode    ierr;
835:   PetscInt          nonzerorow=0,n,i,jrow,j;
836:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

839:   VecGetArrayRead(xx,&x);
840:   VecGetArray(yy,&y);
841:   idx  = a->j;
842:   v    = a->a;
843:   ii   = a->i;

845:   for (i=0; i<m; i++) {
846:     jrow  = ii[i];
847:     n     = ii[i+1] - jrow;
848:     sum1  = 0.0;
849:     sum2  = 0.0;
850:     sum3  = 0.0;
851:     sum4  = 0.0;
852:     sum5  = 0.0;
853:     sum6  = 0.0;

855:     nonzerorow += (n>0);
856:     for (j=0; j<n; j++) {
857:       sum1 += v[jrow]*x[6*idx[jrow]];
858:       sum2 += v[jrow]*x[6*idx[jrow]+1];
859:       sum3 += v[jrow]*x[6*idx[jrow]+2];
860:       sum4 += v[jrow]*x[6*idx[jrow]+3];
861:       sum5 += v[jrow]*x[6*idx[jrow]+4];
862:       sum6 += v[jrow]*x[6*idx[jrow]+5];
863:       jrow++;
864:     }
865:     y[6*i]   = sum1;
866:     y[6*i+1] = sum2;
867:     y[6*i+2] = sum3;
868:     y[6*i+3] = sum4;
869:     y[6*i+4] = sum5;
870:     y[6*i+5] = sum6;
871:   }

873:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
874:   VecRestoreArrayRead(xx,&x);
875:   VecRestoreArray(yy,&y);
876:   return(0);
877: }

879: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
880: {
881:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
882:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
883:   const PetscScalar *x,*v;
884:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
885:   PetscErrorCode    ierr;
886:   PetscInt          n,i;
887:   const PetscInt    m = b->AIJ->rmap->n,*idx;

890:   VecSet(yy,0.0);
891:   VecGetArrayRead(xx,&x);
892:   VecGetArray(yy,&y);

894:   for (i=0; i<m; i++) {
895:     idx    = a->j + a->i[i];
896:     v      = a->a + a->i[i];
897:     n      = a->i[i+1] - a->i[i];
898:     alpha1 = x[6*i];
899:     alpha2 = x[6*i+1];
900:     alpha3 = x[6*i+2];
901:     alpha4 = x[6*i+3];
902:     alpha5 = x[6*i+4];
903:     alpha6 = x[6*i+5];
904:     while (n-->0) {
905:       y[6*(*idx)]   += alpha1*(*v);
906:       y[6*(*idx)+1] += alpha2*(*v);
907:       y[6*(*idx)+2] += alpha3*(*v);
908:       y[6*(*idx)+3] += alpha4*(*v);
909:       y[6*(*idx)+4] += alpha5*(*v);
910:       y[6*(*idx)+5] += alpha6*(*v);
911:       idx++; v++;
912:     }
913:   }
914:   PetscLogFlops(12.0*a->nz);
915:   VecRestoreArrayRead(xx,&x);
916:   VecRestoreArray(yy,&y);
917:   return(0);
918: }

920: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
921: {
922:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
923:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
924:   const PetscScalar *x,*v;
925:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
926:   PetscErrorCode    ierr;
927:   PetscInt          n,i,jrow,j;
928:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

931:   if (yy != zz) {VecCopy(yy,zz);}
932:   VecGetArrayRead(xx,&x);
933:   VecGetArray(zz,&y);
934:   idx  = a->j;
935:   v    = a->a;
936:   ii   = a->i;

938:   for (i=0; i<m; i++) {
939:     jrow = ii[i];
940:     n    = ii[i+1] - jrow;
941:     sum1 = 0.0;
942:     sum2 = 0.0;
943:     sum3 = 0.0;
944:     sum4 = 0.0;
945:     sum5 = 0.0;
946:     sum6 = 0.0;
947:     for (j=0; j<n; j++) {
948:       sum1 += v[jrow]*x[6*idx[jrow]];
949:       sum2 += v[jrow]*x[6*idx[jrow]+1];
950:       sum3 += v[jrow]*x[6*idx[jrow]+2];
951:       sum4 += v[jrow]*x[6*idx[jrow]+3];
952:       sum5 += v[jrow]*x[6*idx[jrow]+4];
953:       sum6 += v[jrow]*x[6*idx[jrow]+5];
954:       jrow++;
955:     }
956:     y[6*i]   += sum1;
957:     y[6*i+1] += sum2;
958:     y[6*i+2] += sum3;
959:     y[6*i+3] += sum4;
960:     y[6*i+4] += sum5;
961:     y[6*i+5] += sum6;
962:   }

964:   PetscLogFlops(12.0*a->nz);
965:   VecRestoreArrayRead(xx,&x);
966:   VecRestoreArray(zz,&y);
967:   return(0);
968: }

970: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
971: {
972:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
973:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
974:   const PetscScalar *x,*v;
975:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
976:   PetscErrorCode    ierr;
977:   PetscInt          n,i;
978:   const PetscInt    m = b->AIJ->rmap->n,*idx;

981:   if (yy != zz) {VecCopy(yy,zz);}
982:   VecGetArrayRead(xx,&x);
983:   VecGetArray(zz,&y);

985:   for (i=0; i<m; i++) {
986:     idx    = a->j + a->i[i];
987:     v      = a->a + a->i[i];
988:     n      = a->i[i+1] - a->i[i];
989:     alpha1 = x[6*i];
990:     alpha2 = x[6*i+1];
991:     alpha3 = x[6*i+2];
992:     alpha4 = x[6*i+3];
993:     alpha5 = x[6*i+4];
994:     alpha6 = x[6*i+5];
995:     while (n-->0) {
996:       y[6*(*idx)]   += alpha1*(*v);
997:       y[6*(*idx)+1] += alpha2*(*v);
998:       y[6*(*idx)+2] += alpha3*(*v);
999:       y[6*(*idx)+3] += alpha4*(*v);
1000:       y[6*(*idx)+4] += alpha5*(*v);
1001:       y[6*(*idx)+5] += alpha6*(*v);
1002:       idx++; v++;
1003:     }
1004:   }
1005:   PetscLogFlops(12.0*a->nz);
1006:   VecRestoreArrayRead(xx,&x);
1007:   VecRestoreArray(zz,&y);
1008:   return(0);
1009: }

1011: /* ------------------------------------------------------------------------------*/
1012: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1013: {
1014:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1015:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1016:   const PetscScalar *x,*v;
1017:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1018:   PetscErrorCode    ierr;
1019:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1020:   PetscInt          nonzerorow=0,n,i,jrow,j;

1023:   VecGetArrayRead(xx,&x);
1024:   VecGetArray(yy,&y);
1025:   idx  = a->j;
1026:   v    = a->a;
1027:   ii   = a->i;

1029:   for (i=0; i<m; i++) {
1030:     jrow  = ii[i];
1031:     n     = ii[i+1] - jrow;
1032:     sum1  = 0.0;
1033:     sum2  = 0.0;
1034:     sum3  = 0.0;
1035:     sum4  = 0.0;
1036:     sum5  = 0.0;
1037:     sum6  = 0.0;
1038:     sum7  = 0.0;

1040:     nonzerorow += (n>0);
1041:     for (j=0; j<n; j++) {
1042:       sum1 += v[jrow]*x[7*idx[jrow]];
1043:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1044:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1045:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1046:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1047:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1048:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1049:       jrow++;
1050:     }
1051:     y[7*i]   = sum1;
1052:     y[7*i+1] = sum2;
1053:     y[7*i+2] = sum3;
1054:     y[7*i+3] = sum4;
1055:     y[7*i+4] = sum5;
1056:     y[7*i+5] = sum6;
1057:     y[7*i+6] = sum7;
1058:   }

1060:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1061:   VecRestoreArrayRead(xx,&x);
1062:   VecRestoreArray(yy,&y);
1063:   return(0);
1064: }

1066: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1067: {
1068:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1069:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1070:   const PetscScalar *x,*v;
1071:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1072:   PetscErrorCode    ierr;
1073:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1074:   PetscInt          n,i;

1077:   VecSet(yy,0.0);
1078:   VecGetArrayRead(xx,&x);
1079:   VecGetArray(yy,&y);

1081:   for (i=0; i<m; i++) {
1082:     idx    = a->j + a->i[i];
1083:     v      = a->a + a->i[i];
1084:     n      = a->i[i+1] - a->i[i];
1085:     alpha1 = x[7*i];
1086:     alpha2 = x[7*i+1];
1087:     alpha3 = x[7*i+2];
1088:     alpha4 = x[7*i+3];
1089:     alpha5 = x[7*i+4];
1090:     alpha6 = x[7*i+5];
1091:     alpha7 = x[7*i+6];
1092:     while (n-->0) {
1093:       y[7*(*idx)]   += alpha1*(*v);
1094:       y[7*(*idx)+1] += alpha2*(*v);
1095:       y[7*(*idx)+2] += alpha3*(*v);
1096:       y[7*(*idx)+3] += alpha4*(*v);
1097:       y[7*(*idx)+4] += alpha5*(*v);
1098:       y[7*(*idx)+5] += alpha6*(*v);
1099:       y[7*(*idx)+6] += alpha7*(*v);
1100:       idx++; v++;
1101:     }
1102:   }
1103:   PetscLogFlops(14.0*a->nz);
1104:   VecRestoreArrayRead(xx,&x);
1105:   VecRestoreArray(yy,&y);
1106:   return(0);
1107: }

1109: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1110: {
1111:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1112:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1113:   const PetscScalar *x,*v;
1114:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1115:   PetscErrorCode    ierr;
1116:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1117:   PetscInt          n,i,jrow,j;

1120:   if (yy != zz) {VecCopy(yy,zz);}
1121:   VecGetArrayRead(xx,&x);
1122:   VecGetArray(zz,&y);
1123:   idx  = a->j;
1124:   v    = a->a;
1125:   ii   = a->i;

1127:   for (i=0; i<m; i++) {
1128:     jrow = ii[i];
1129:     n    = ii[i+1] - jrow;
1130:     sum1 = 0.0;
1131:     sum2 = 0.0;
1132:     sum3 = 0.0;
1133:     sum4 = 0.0;
1134:     sum5 = 0.0;
1135:     sum6 = 0.0;
1136:     sum7 = 0.0;
1137:     for (j=0; j<n; j++) {
1138:       sum1 += v[jrow]*x[7*idx[jrow]];
1139:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1140:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1141:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1142:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1143:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1144:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1145:       jrow++;
1146:     }
1147:     y[7*i]   += sum1;
1148:     y[7*i+1] += sum2;
1149:     y[7*i+2] += sum3;
1150:     y[7*i+3] += sum4;
1151:     y[7*i+4] += sum5;
1152:     y[7*i+5] += sum6;
1153:     y[7*i+6] += sum7;
1154:   }

1156:   PetscLogFlops(14.0*a->nz);
1157:   VecRestoreArrayRead(xx,&x);
1158:   VecRestoreArray(zz,&y);
1159:   return(0);
1160: }

1162: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1163: {
1164:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1165:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1166:   const PetscScalar *x,*v;
1167:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1168:   PetscErrorCode    ierr;
1169:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1170:   PetscInt          n,i;

1173:   if (yy != zz) {VecCopy(yy,zz);}
1174:   VecGetArrayRead(xx,&x);
1175:   VecGetArray(zz,&y);
1176:   for (i=0; i<m; i++) {
1177:     idx    = a->j + a->i[i];
1178:     v      = a->a + a->i[i];
1179:     n      = a->i[i+1] - a->i[i];
1180:     alpha1 = x[7*i];
1181:     alpha2 = x[7*i+1];
1182:     alpha3 = x[7*i+2];
1183:     alpha4 = x[7*i+3];
1184:     alpha5 = x[7*i+4];
1185:     alpha6 = x[7*i+5];
1186:     alpha7 = x[7*i+6];
1187:     while (n-->0) {
1188:       y[7*(*idx)]   += alpha1*(*v);
1189:       y[7*(*idx)+1] += alpha2*(*v);
1190:       y[7*(*idx)+2] += alpha3*(*v);
1191:       y[7*(*idx)+3] += alpha4*(*v);
1192:       y[7*(*idx)+4] += alpha5*(*v);
1193:       y[7*(*idx)+5] += alpha6*(*v);
1194:       y[7*(*idx)+6] += alpha7*(*v);
1195:       idx++; v++;
1196:     }
1197:   }
1198:   PetscLogFlops(14.0*a->nz);
1199:   VecRestoreArrayRead(xx,&x);
1200:   VecRestoreArray(zz,&y);
1201:   return(0);
1202: }

1204: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1205: {
1206:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1207:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1208:   const PetscScalar *x,*v;
1209:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1210:   PetscErrorCode    ierr;
1211:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1212:   PetscInt          nonzerorow=0,n,i,jrow,j;

1215:   VecGetArrayRead(xx,&x);
1216:   VecGetArray(yy,&y);
1217:   idx  = a->j;
1218:   v    = a->a;
1219:   ii   = a->i;

1221:   for (i=0; i<m; i++) {
1222:     jrow  = ii[i];
1223:     n     = ii[i+1] - jrow;
1224:     sum1  = 0.0;
1225:     sum2  = 0.0;
1226:     sum3  = 0.0;
1227:     sum4  = 0.0;
1228:     sum5  = 0.0;
1229:     sum6  = 0.0;
1230:     sum7  = 0.0;
1231:     sum8  = 0.0;

1233:     nonzerorow += (n>0);
1234:     for (j=0; j<n; j++) {
1235:       sum1 += v[jrow]*x[8*idx[jrow]];
1236:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1237:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1238:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1239:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1240:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1241:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1242:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1243:       jrow++;
1244:     }
1245:     y[8*i]   = sum1;
1246:     y[8*i+1] = sum2;
1247:     y[8*i+2] = sum3;
1248:     y[8*i+3] = sum4;
1249:     y[8*i+4] = sum5;
1250:     y[8*i+5] = sum6;
1251:     y[8*i+6] = sum7;
1252:     y[8*i+7] = sum8;
1253:   }

1255:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1256:   VecRestoreArrayRead(xx,&x);
1257:   VecRestoreArray(yy,&y);
1258:   return(0);
1259: }

1261: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1262: {
1263:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1264:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1265:   const PetscScalar *x,*v;
1266:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1267:   PetscErrorCode    ierr;
1268:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1269:   PetscInt          n,i;

1272:   VecSet(yy,0.0);
1273:   VecGetArrayRead(xx,&x);
1274:   VecGetArray(yy,&y);

1276:   for (i=0; i<m; i++) {
1277:     idx    = a->j + a->i[i];
1278:     v      = a->a + a->i[i];
1279:     n      = a->i[i+1] - a->i[i];
1280:     alpha1 = x[8*i];
1281:     alpha2 = x[8*i+1];
1282:     alpha3 = x[8*i+2];
1283:     alpha4 = x[8*i+3];
1284:     alpha5 = x[8*i+4];
1285:     alpha6 = x[8*i+5];
1286:     alpha7 = x[8*i+6];
1287:     alpha8 = x[8*i+7];
1288:     while (n-->0) {
1289:       y[8*(*idx)]   += alpha1*(*v);
1290:       y[8*(*idx)+1] += alpha2*(*v);
1291:       y[8*(*idx)+2] += alpha3*(*v);
1292:       y[8*(*idx)+3] += alpha4*(*v);
1293:       y[8*(*idx)+4] += alpha5*(*v);
1294:       y[8*(*idx)+5] += alpha6*(*v);
1295:       y[8*(*idx)+6] += alpha7*(*v);
1296:       y[8*(*idx)+7] += alpha8*(*v);
1297:       idx++; v++;
1298:     }
1299:   }
1300:   PetscLogFlops(16.0*a->nz);
1301:   VecRestoreArrayRead(xx,&x);
1302:   VecRestoreArray(yy,&y);
1303:   return(0);
1304: }

1306: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1307: {
1308:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1309:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1310:   const PetscScalar *x,*v;
1311:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1312:   PetscErrorCode    ierr;
1313:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1314:   PetscInt          n,i,jrow,j;

1317:   if (yy != zz) {VecCopy(yy,zz);}
1318:   VecGetArrayRead(xx,&x);
1319:   VecGetArray(zz,&y);
1320:   idx  = a->j;
1321:   v    = a->a;
1322:   ii   = a->i;

1324:   for (i=0; i<m; i++) {
1325:     jrow = ii[i];
1326:     n    = ii[i+1] - jrow;
1327:     sum1 = 0.0;
1328:     sum2 = 0.0;
1329:     sum3 = 0.0;
1330:     sum4 = 0.0;
1331:     sum5 = 0.0;
1332:     sum6 = 0.0;
1333:     sum7 = 0.0;
1334:     sum8 = 0.0;
1335:     for (j=0; j<n; j++) {
1336:       sum1 += v[jrow]*x[8*idx[jrow]];
1337:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1338:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1339:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1340:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1341:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1342:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1343:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1344:       jrow++;
1345:     }
1346:     y[8*i]   += sum1;
1347:     y[8*i+1] += sum2;
1348:     y[8*i+2] += sum3;
1349:     y[8*i+3] += sum4;
1350:     y[8*i+4] += sum5;
1351:     y[8*i+5] += sum6;
1352:     y[8*i+6] += sum7;
1353:     y[8*i+7] += sum8;
1354:   }

1356:   PetscLogFlops(16.0*a->nz);
1357:   VecRestoreArrayRead(xx,&x);
1358:   VecRestoreArray(zz,&y);
1359:   return(0);
1360: }

1362: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1363: {
1364:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1365:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1366:   const PetscScalar *x,*v;
1367:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1368:   PetscErrorCode    ierr;
1369:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1370:   PetscInt          n,i;

1373:   if (yy != zz) {VecCopy(yy,zz);}
1374:   VecGetArrayRead(xx,&x);
1375:   VecGetArray(zz,&y);
1376:   for (i=0; i<m; i++) {
1377:     idx    = a->j + a->i[i];
1378:     v      = a->a + a->i[i];
1379:     n      = a->i[i+1] - a->i[i];
1380:     alpha1 = x[8*i];
1381:     alpha2 = x[8*i+1];
1382:     alpha3 = x[8*i+2];
1383:     alpha4 = x[8*i+3];
1384:     alpha5 = x[8*i+4];
1385:     alpha6 = x[8*i+5];
1386:     alpha7 = x[8*i+6];
1387:     alpha8 = x[8*i+7];
1388:     while (n-->0) {
1389:       y[8*(*idx)]   += alpha1*(*v);
1390:       y[8*(*idx)+1] += alpha2*(*v);
1391:       y[8*(*idx)+2] += alpha3*(*v);
1392:       y[8*(*idx)+3] += alpha4*(*v);
1393:       y[8*(*idx)+4] += alpha5*(*v);
1394:       y[8*(*idx)+5] += alpha6*(*v);
1395:       y[8*(*idx)+6] += alpha7*(*v);
1396:       y[8*(*idx)+7] += alpha8*(*v);
1397:       idx++; v++;
1398:     }
1399:   }
1400:   PetscLogFlops(16.0*a->nz);
1401:   VecRestoreArrayRead(xx,&x);
1402:   VecRestoreArray(zz,&y);
1403:   return(0);
1404: }

1406: /* ------------------------------------------------------------------------------*/
1407: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1408: {
1409:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1410:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1411:   const PetscScalar *x,*v;
1412:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1413:   PetscErrorCode    ierr;
1414:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1415:   PetscInt          nonzerorow=0,n,i,jrow,j;

1418:   VecGetArrayRead(xx,&x);
1419:   VecGetArray(yy,&y);
1420:   idx  = a->j;
1421:   v    = a->a;
1422:   ii   = a->i;

1424:   for (i=0; i<m; i++) {
1425:     jrow  = ii[i];
1426:     n     = ii[i+1] - jrow;
1427:     sum1  = 0.0;
1428:     sum2  = 0.0;
1429:     sum3  = 0.0;
1430:     sum4  = 0.0;
1431:     sum5  = 0.0;
1432:     sum6  = 0.0;
1433:     sum7  = 0.0;
1434:     sum8  = 0.0;
1435:     sum9  = 0.0;

1437:     nonzerorow += (n>0);
1438:     for (j=0; j<n; j++) {
1439:       sum1 += v[jrow]*x[9*idx[jrow]];
1440:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1441:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1442:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1443:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1444:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1445:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1446:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1447:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1448:       jrow++;
1449:     }
1450:     y[9*i]   = sum1;
1451:     y[9*i+1] = sum2;
1452:     y[9*i+2] = sum3;
1453:     y[9*i+3] = sum4;
1454:     y[9*i+4] = sum5;
1455:     y[9*i+5] = sum6;
1456:     y[9*i+6] = sum7;
1457:     y[9*i+7] = sum8;
1458:     y[9*i+8] = sum9;
1459:   }

1461:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1462:   VecRestoreArrayRead(xx,&x);
1463:   VecRestoreArray(yy,&y);
1464:   return(0);
1465: }

1467: /* ------------------------------------------------------------------------------*/

1469: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1470: {
1471:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1472:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1473:   const PetscScalar *x,*v;
1474:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1475:   PetscErrorCode    ierr;
1476:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1477:   PetscInt          n,i;

1480:   VecSet(yy,0.0);
1481:   VecGetArrayRead(xx,&x);
1482:   VecGetArray(yy,&y);

1484:   for (i=0; i<m; i++) {
1485:     idx    = a->j + a->i[i];
1486:     v      = a->a + a->i[i];
1487:     n      = a->i[i+1] - a->i[i];
1488:     alpha1 = x[9*i];
1489:     alpha2 = x[9*i+1];
1490:     alpha3 = x[9*i+2];
1491:     alpha4 = x[9*i+3];
1492:     alpha5 = x[9*i+4];
1493:     alpha6 = x[9*i+5];
1494:     alpha7 = x[9*i+6];
1495:     alpha8 = x[9*i+7];
1496:     alpha9 = x[9*i+8];
1497:     while (n-->0) {
1498:       y[9*(*idx)]   += alpha1*(*v);
1499:       y[9*(*idx)+1] += alpha2*(*v);
1500:       y[9*(*idx)+2] += alpha3*(*v);
1501:       y[9*(*idx)+3] += alpha4*(*v);
1502:       y[9*(*idx)+4] += alpha5*(*v);
1503:       y[9*(*idx)+5] += alpha6*(*v);
1504:       y[9*(*idx)+6] += alpha7*(*v);
1505:       y[9*(*idx)+7] += alpha8*(*v);
1506:       y[9*(*idx)+8] += alpha9*(*v);
1507:       idx++; v++;
1508:     }
1509:   }
1510:   PetscLogFlops(18.0*a->nz);
1511:   VecRestoreArrayRead(xx,&x);
1512:   VecRestoreArray(yy,&y);
1513:   return(0);
1514: }

1516: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1517: {
1518:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1519:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1520:   const PetscScalar *x,*v;
1521:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1522:   PetscErrorCode    ierr;
1523:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1524:   PetscInt          n,i,jrow,j;

1527:   if (yy != zz) {VecCopy(yy,zz);}
1528:   VecGetArrayRead(xx,&x);
1529:   VecGetArray(zz,&y);
1530:   idx  = a->j;
1531:   v    = a->a;
1532:   ii   = a->i;

1534:   for (i=0; i<m; i++) {
1535:     jrow = ii[i];
1536:     n    = ii[i+1] - jrow;
1537:     sum1 = 0.0;
1538:     sum2 = 0.0;
1539:     sum3 = 0.0;
1540:     sum4 = 0.0;
1541:     sum5 = 0.0;
1542:     sum6 = 0.0;
1543:     sum7 = 0.0;
1544:     sum8 = 0.0;
1545:     sum9 = 0.0;
1546:     for (j=0; j<n; j++) {
1547:       sum1 += v[jrow]*x[9*idx[jrow]];
1548:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1549:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1550:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1551:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1552:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1553:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1554:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1555:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1556:       jrow++;
1557:     }
1558:     y[9*i]   += sum1;
1559:     y[9*i+1] += sum2;
1560:     y[9*i+2] += sum3;
1561:     y[9*i+3] += sum4;
1562:     y[9*i+4] += sum5;
1563:     y[9*i+5] += sum6;
1564:     y[9*i+6] += sum7;
1565:     y[9*i+7] += sum8;
1566:     y[9*i+8] += sum9;
1567:   }

1569:   PetscLogFlops(18.0*a->nz);
1570:   VecRestoreArrayRead(xx,&x);
1571:   VecRestoreArray(zz,&y);
1572:   return(0);
1573: }

1575: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1576: {
1577:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1578:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1579:   const PetscScalar *x,*v;
1580:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1581:   PetscErrorCode    ierr;
1582:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1583:   PetscInt          n,i;

1586:   if (yy != zz) {VecCopy(yy,zz);}
1587:   VecGetArrayRead(xx,&x);
1588:   VecGetArray(zz,&y);
1589:   for (i=0; i<m; i++) {
1590:     idx    = a->j + a->i[i];
1591:     v      = a->a + a->i[i];
1592:     n      = a->i[i+1] - a->i[i];
1593:     alpha1 = x[9*i];
1594:     alpha2 = x[9*i+1];
1595:     alpha3 = x[9*i+2];
1596:     alpha4 = x[9*i+3];
1597:     alpha5 = x[9*i+4];
1598:     alpha6 = x[9*i+5];
1599:     alpha7 = x[9*i+6];
1600:     alpha8 = x[9*i+7];
1601:     alpha9 = x[9*i+8];
1602:     while (n-->0) {
1603:       y[9*(*idx)]   += alpha1*(*v);
1604:       y[9*(*idx)+1] += alpha2*(*v);
1605:       y[9*(*idx)+2] += alpha3*(*v);
1606:       y[9*(*idx)+3] += alpha4*(*v);
1607:       y[9*(*idx)+4] += alpha5*(*v);
1608:       y[9*(*idx)+5] += alpha6*(*v);
1609:       y[9*(*idx)+6] += alpha7*(*v);
1610:       y[9*(*idx)+7] += alpha8*(*v);
1611:       y[9*(*idx)+8] += alpha9*(*v);
1612:       idx++; v++;
1613:     }
1614:   }
1615:   PetscLogFlops(18.0*a->nz);
1616:   VecRestoreArrayRead(xx,&x);
1617:   VecRestoreArray(zz,&y);
1618:   return(0);
1619: }
1620: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1621: {
1622:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1623:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1624:   const PetscScalar *x,*v;
1625:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1626:   PetscErrorCode    ierr;
1627:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1628:   PetscInt          nonzerorow=0,n,i,jrow,j;

1631:   VecGetArrayRead(xx,&x);
1632:   VecGetArray(yy,&y);
1633:   idx  = a->j;
1634:   v    = a->a;
1635:   ii   = a->i;

1637:   for (i=0; i<m; i++) {
1638:     jrow  = ii[i];
1639:     n     = ii[i+1] - jrow;
1640:     sum1  = 0.0;
1641:     sum2  = 0.0;
1642:     sum3  = 0.0;
1643:     sum4  = 0.0;
1644:     sum5  = 0.0;
1645:     sum6  = 0.0;
1646:     sum7  = 0.0;
1647:     sum8  = 0.0;
1648:     sum9  = 0.0;
1649:     sum10 = 0.0;

1651:     nonzerorow += (n>0);
1652:     for (j=0; j<n; j++) {
1653:       sum1  += v[jrow]*x[10*idx[jrow]];
1654:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1655:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1656:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1657:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1658:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1659:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1660:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1661:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1662:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1663:       jrow++;
1664:     }
1665:     y[10*i]   = sum1;
1666:     y[10*i+1] = sum2;
1667:     y[10*i+2] = sum3;
1668:     y[10*i+3] = sum4;
1669:     y[10*i+4] = sum5;
1670:     y[10*i+5] = sum6;
1671:     y[10*i+6] = sum7;
1672:     y[10*i+7] = sum8;
1673:     y[10*i+8] = sum9;
1674:     y[10*i+9] = sum10;
1675:   }

1677:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1678:   VecRestoreArrayRead(xx,&x);
1679:   VecRestoreArray(yy,&y);
1680:   return(0);
1681: }

1683: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1684: {
1685:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1686:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1687:   const PetscScalar *x,*v;
1688:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1689:   PetscErrorCode    ierr;
1690:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1691:   PetscInt          n,i,jrow,j;

1694:   if (yy != zz) {VecCopy(yy,zz);}
1695:   VecGetArrayRead(xx,&x);
1696:   VecGetArray(zz,&y);
1697:   idx  = a->j;
1698:   v    = a->a;
1699:   ii   = a->i;

1701:   for (i=0; i<m; i++) {
1702:     jrow  = ii[i];
1703:     n     = ii[i+1] - jrow;
1704:     sum1  = 0.0;
1705:     sum2  = 0.0;
1706:     sum3  = 0.0;
1707:     sum4  = 0.0;
1708:     sum5  = 0.0;
1709:     sum6  = 0.0;
1710:     sum7  = 0.0;
1711:     sum8  = 0.0;
1712:     sum9  = 0.0;
1713:     sum10 = 0.0;
1714:     for (j=0; j<n; j++) {
1715:       sum1  += v[jrow]*x[10*idx[jrow]];
1716:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1717:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1718:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1719:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1720:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1721:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1722:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1723:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1724:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1725:       jrow++;
1726:     }
1727:     y[10*i]   += sum1;
1728:     y[10*i+1] += sum2;
1729:     y[10*i+2] += sum3;
1730:     y[10*i+3] += sum4;
1731:     y[10*i+4] += sum5;
1732:     y[10*i+5] += sum6;
1733:     y[10*i+6] += sum7;
1734:     y[10*i+7] += sum8;
1735:     y[10*i+8] += sum9;
1736:     y[10*i+9] += sum10;
1737:   }

1739:   PetscLogFlops(20.0*a->nz);
1740:   VecRestoreArrayRead(xx,&x);
1741:   VecRestoreArray(yy,&y);
1742:   return(0);
1743: }

1745: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1746: {
1747:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1748:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1749:   const PetscScalar *x,*v;
1750:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1751:   PetscErrorCode    ierr;
1752:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1753:   PetscInt          n,i;

1756:   VecSet(yy,0.0);
1757:   VecGetArrayRead(xx,&x);
1758:   VecGetArray(yy,&y);

1760:   for (i=0; i<m; i++) {
1761:     idx     = a->j + a->i[i];
1762:     v       = a->a + a->i[i];
1763:     n       = a->i[i+1] - a->i[i];
1764:     alpha1  = x[10*i];
1765:     alpha2  = x[10*i+1];
1766:     alpha3  = x[10*i+2];
1767:     alpha4  = x[10*i+3];
1768:     alpha5  = x[10*i+4];
1769:     alpha6  = x[10*i+5];
1770:     alpha7  = x[10*i+6];
1771:     alpha8  = x[10*i+7];
1772:     alpha9  = x[10*i+8];
1773:     alpha10 = x[10*i+9];
1774:     while (n-->0) {
1775:       y[10*(*idx)]   += alpha1*(*v);
1776:       y[10*(*idx)+1] += alpha2*(*v);
1777:       y[10*(*idx)+2] += alpha3*(*v);
1778:       y[10*(*idx)+3] += alpha4*(*v);
1779:       y[10*(*idx)+4] += alpha5*(*v);
1780:       y[10*(*idx)+5] += alpha6*(*v);
1781:       y[10*(*idx)+6] += alpha7*(*v);
1782:       y[10*(*idx)+7] += alpha8*(*v);
1783:       y[10*(*idx)+8] += alpha9*(*v);
1784:       y[10*(*idx)+9] += alpha10*(*v);
1785:       idx++; v++;
1786:     }
1787:   }
1788:   PetscLogFlops(20.0*a->nz);
1789:   VecRestoreArrayRead(xx,&x);
1790:   VecRestoreArray(yy,&y);
1791:   return(0);
1792: }

1794: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1795: {
1796:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1797:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1798:   const PetscScalar *x,*v;
1799:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1800:   PetscErrorCode    ierr;
1801:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1802:   PetscInt          n,i;

1805:   if (yy != zz) {VecCopy(yy,zz);}
1806:   VecGetArrayRead(xx,&x);
1807:   VecGetArray(zz,&y);
1808:   for (i=0; i<m; i++) {
1809:     idx     = a->j + a->i[i];
1810:     v       = a->a + a->i[i];
1811:     n       = a->i[i+1] - a->i[i];
1812:     alpha1  = x[10*i];
1813:     alpha2  = x[10*i+1];
1814:     alpha3  = x[10*i+2];
1815:     alpha4  = x[10*i+3];
1816:     alpha5  = x[10*i+4];
1817:     alpha6  = x[10*i+5];
1818:     alpha7  = x[10*i+6];
1819:     alpha8  = x[10*i+7];
1820:     alpha9  = x[10*i+8];
1821:     alpha10 = x[10*i+9];
1822:     while (n-->0) {
1823:       y[10*(*idx)]   += alpha1*(*v);
1824:       y[10*(*idx)+1] += alpha2*(*v);
1825:       y[10*(*idx)+2] += alpha3*(*v);
1826:       y[10*(*idx)+3] += alpha4*(*v);
1827:       y[10*(*idx)+4] += alpha5*(*v);
1828:       y[10*(*idx)+5] += alpha6*(*v);
1829:       y[10*(*idx)+6] += alpha7*(*v);
1830:       y[10*(*idx)+7] += alpha8*(*v);
1831:       y[10*(*idx)+8] += alpha9*(*v);
1832:       y[10*(*idx)+9] += alpha10*(*v);
1833:       idx++; v++;
1834:     }
1835:   }
1836:   PetscLogFlops(20.0*a->nz);
1837:   VecRestoreArrayRead(xx,&x);
1838:   VecRestoreArray(zz,&y);
1839:   return(0);
1840: }


1843: /*--------------------------------------------------------------------------------------------*/
1844: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1845: {
1846:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1847:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1848:   const PetscScalar *x,*v;
1849:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1850:   PetscErrorCode    ierr;
1851:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1852:   PetscInt          nonzerorow=0,n,i,jrow,j;

1855:   VecGetArrayRead(xx,&x);
1856:   VecGetArray(yy,&y);
1857:   idx  = a->j;
1858:   v    = a->a;
1859:   ii   = a->i;

1861:   for (i=0; i<m; i++) {
1862:     jrow  = ii[i];
1863:     n     = ii[i+1] - jrow;
1864:     sum1  = 0.0;
1865:     sum2  = 0.0;
1866:     sum3  = 0.0;
1867:     sum4  = 0.0;
1868:     sum5  = 0.0;
1869:     sum6  = 0.0;
1870:     sum7  = 0.0;
1871:     sum8  = 0.0;
1872:     sum9  = 0.0;
1873:     sum10 = 0.0;
1874:     sum11 = 0.0;

1876:     nonzerorow += (n>0);
1877:     for (j=0; j<n; j++) {
1878:       sum1  += v[jrow]*x[11*idx[jrow]];
1879:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1880:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1881:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1882:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1883:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1884:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1885:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1886:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1887:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1888:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1889:       jrow++;
1890:     }
1891:     y[11*i]    = sum1;
1892:     y[11*i+1]  = sum2;
1893:     y[11*i+2]  = sum3;
1894:     y[11*i+3]  = sum4;
1895:     y[11*i+4]  = sum5;
1896:     y[11*i+5]  = sum6;
1897:     y[11*i+6]  = sum7;
1898:     y[11*i+7]  = sum8;
1899:     y[11*i+8]  = sum9;
1900:     y[11*i+9]  = sum10;
1901:     y[11*i+10] = sum11;
1902:   }

1904:   PetscLogFlops(22.0*a->nz - 11*nonzerorow);
1905:   VecRestoreArrayRead(xx,&x);
1906:   VecRestoreArray(yy,&y);
1907:   return(0);
1908: }

1910: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1911: {
1912:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1913:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1914:   const PetscScalar *x,*v;
1915:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1916:   PetscErrorCode    ierr;
1917:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1918:   PetscInt          n,i,jrow,j;

1921:   if (yy != zz) {VecCopy(yy,zz);}
1922:   VecGetArrayRead(xx,&x);
1923:   VecGetArray(zz,&y);
1924:   idx  = a->j;
1925:   v    = a->a;
1926:   ii   = a->i;

1928:   for (i=0; i<m; i++) {
1929:     jrow  = ii[i];
1930:     n     = ii[i+1] - jrow;
1931:     sum1  = 0.0;
1932:     sum2  = 0.0;
1933:     sum3  = 0.0;
1934:     sum4  = 0.0;
1935:     sum5  = 0.0;
1936:     sum6  = 0.0;
1937:     sum7  = 0.0;
1938:     sum8  = 0.0;
1939:     sum9  = 0.0;
1940:     sum10 = 0.0;
1941:     sum11 = 0.0;
1942:     for (j=0; j<n; j++) {
1943:       sum1  += v[jrow]*x[11*idx[jrow]];
1944:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1945:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1946:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1947:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1948:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1949:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1950:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1951:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1952:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1953:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1954:       jrow++;
1955:     }
1956:     y[11*i]    += sum1;
1957:     y[11*i+1]  += sum2;
1958:     y[11*i+2]  += sum3;
1959:     y[11*i+3]  += sum4;
1960:     y[11*i+4]  += sum5;
1961:     y[11*i+5]  += sum6;
1962:     y[11*i+6]  += sum7;
1963:     y[11*i+7]  += sum8;
1964:     y[11*i+8]  += sum9;
1965:     y[11*i+9]  += sum10;
1966:     y[11*i+10] += sum11;
1967:   }

1969:   PetscLogFlops(22.0*a->nz);
1970:   VecRestoreArrayRead(xx,&x);
1971:   VecRestoreArray(yy,&y);
1972:   return(0);
1973: }

1975: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1976: {
1977:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1978:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1979:   const PetscScalar *x,*v;
1980:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1981:   PetscErrorCode    ierr;
1982:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1983:   PetscInt          n,i;

1986:   VecSet(yy,0.0);
1987:   VecGetArrayRead(xx,&x);
1988:   VecGetArray(yy,&y);

1990:   for (i=0; i<m; i++) {
1991:     idx     = a->j + a->i[i];
1992:     v       = a->a + a->i[i];
1993:     n       = a->i[i+1] - a->i[i];
1994:     alpha1  = x[11*i];
1995:     alpha2  = x[11*i+1];
1996:     alpha3  = x[11*i+2];
1997:     alpha4  = x[11*i+3];
1998:     alpha5  = x[11*i+4];
1999:     alpha6  = x[11*i+5];
2000:     alpha7  = x[11*i+6];
2001:     alpha8  = x[11*i+7];
2002:     alpha9  = x[11*i+8];
2003:     alpha10 = x[11*i+9];
2004:     alpha11 = x[11*i+10];
2005:     while (n-->0) {
2006:       y[11*(*idx)]    += alpha1*(*v);
2007:       y[11*(*idx)+1]  += alpha2*(*v);
2008:       y[11*(*idx)+2]  += alpha3*(*v);
2009:       y[11*(*idx)+3]  += alpha4*(*v);
2010:       y[11*(*idx)+4]  += alpha5*(*v);
2011:       y[11*(*idx)+5]  += alpha6*(*v);
2012:       y[11*(*idx)+6]  += alpha7*(*v);
2013:       y[11*(*idx)+7]  += alpha8*(*v);
2014:       y[11*(*idx)+8]  += alpha9*(*v);
2015:       y[11*(*idx)+9]  += alpha10*(*v);
2016:       y[11*(*idx)+10] += alpha11*(*v);
2017:       idx++; v++;
2018:     }
2019:   }
2020:   PetscLogFlops(22.0*a->nz);
2021:   VecRestoreArrayRead(xx,&x);
2022:   VecRestoreArray(yy,&y);
2023:   return(0);
2024: }

2026: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2027: {
2028:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2029:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2030:   const PetscScalar *x,*v;
2031:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2032:   PetscErrorCode    ierr;
2033:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2034:   PetscInt          n,i;

2037:   if (yy != zz) {VecCopy(yy,zz);}
2038:   VecGetArrayRead(xx,&x);
2039:   VecGetArray(zz,&y);
2040:   for (i=0; i<m; i++) {
2041:     idx     = a->j + a->i[i];
2042:     v       = a->a + a->i[i];
2043:     n       = a->i[i+1] - a->i[i];
2044:     alpha1  = x[11*i];
2045:     alpha2  = x[11*i+1];
2046:     alpha3  = x[11*i+2];
2047:     alpha4  = x[11*i+3];
2048:     alpha5  = x[11*i+4];
2049:     alpha6  = x[11*i+5];
2050:     alpha7  = x[11*i+6];
2051:     alpha8  = x[11*i+7];
2052:     alpha9  = x[11*i+8];
2053:     alpha10 = x[11*i+9];
2054:     alpha11 = x[11*i+10];
2055:     while (n-->0) {
2056:       y[11*(*idx)]    += alpha1*(*v);
2057:       y[11*(*idx)+1]  += alpha2*(*v);
2058:       y[11*(*idx)+2]  += alpha3*(*v);
2059:       y[11*(*idx)+3]  += alpha4*(*v);
2060:       y[11*(*idx)+4]  += alpha5*(*v);
2061:       y[11*(*idx)+5]  += alpha6*(*v);
2062:       y[11*(*idx)+6]  += alpha7*(*v);
2063:       y[11*(*idx)+7]  += alpha8*(*v);
2064:       y[11*(*idx)+8]  += alpha9*(*v);
2065:       y[11*(*idx)+9]  += alpha10*(*v);
2066:       y[11*(*idx)+10] += alpha11*(*v);
2067:       idx++; v++;
2068:     }
2069:   }
2070:   PetscLogFlops(22.0*a->nz);
2071:   VecRestoreArrayRead(xx,&x);
2072:   VecRestoreArray(zz,&y);
2073:   return(0);
2074: }


2077: /*--------------------------------------------------------------------------------------------*/
2078: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2079: {
2080:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2081:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2082:   const PetscScalar *x,*v;
2083:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2084:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2085:   PetscErrorCode    ierr;
2086:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2087:   PetscInt          nonzerorow=0,n,i,jrow,j;

2090:   VecGetArrayRead(xx,&x);
2091:   VecGetArray(yy,&y);
2092:   idx  = a->j;
2093:   v    = a->a;
2094:   ii   = a->i;

2096:   for (i=0; i<m; i++) {
2097:     jrow  = ii[i];
2098:     n     = ii[i+1] - jrow;
2099:     sum1  = 0.0;
2100:     sum2  = 0.0;
2101:     sum3  = 0.0;
2102:     sum4  = 0.0;
2103:     sum5  = 0.0;
2104:     sum6  = 0.0;
2105:     sum7  = 0.0;
2106:     sum8  = 0.0;
2107:     sum9  = 0.0;
2108:     sum10 = 0.0;
2109:     sum11 = 0.0;
2110:     sum12 = 0.0;
2111:     sum13 = 0.0;
2112:     sum14 = 0.0;
2113:     sum15 = 0.0;
2114:     sum16 = 0.0;

2116:     nonzerorow += (n>0);
2117:     for (j=0; j<n; j++) {
2118:       sum1  += v[jrow]*x[16*idx[jrow]];
2119:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2120:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2121:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2122:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2123:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2124:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2125:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2126:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2127:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2128:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2129:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2130:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2131:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2132:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2133:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2134:       jrow++;
2135:     }
2136:     y[16*i]    = sum1;
2137:     y[16*i+1]  = sum2;
2138:     y[16*i+2]  = sum3;
2139:     y[16*i+3]  = sum4;
2140:     y[16*i+4]  = sum5;
2141:     y[16*i+5]  = sum6;
2142:     y[16*i+6]  = sum7;
2143:     y[16*i+7]  = sum8;
2144:     y[16*i+8]  = sum9;
2145:     y[16*i+9]  = sum10;
2146:     y[16*i+10] = sum11;
2147:     y[16*i+11] = sum12;
2148:     y[16*i+12] = sum13;
2149:     y[16*i+13] = sum14;
2150:     y[16*i+14] = sum15;
2151:     y[16*i+15] = sum16;
2152:   }

2154:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2155:   VecRestoreArrayRead(xx,&x);
2156:   VecRestoreArray(yy,&y);
2157:   return(0);
2158: }

2160: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2161: {
2162:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2163:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2164:   const PetscScalar *x,*v;
2165:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2166:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2167:   PetscErrorCode    ierr;
2168:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2169:   PetscInt          n,i;

2172:   VecSet(yy,0.0);
2173:   VecGetArrayRead(xx,&x);
2174:   VecGetArray(yy,&y);

2176:   for (i=0; i<m; i++) {
2177:     idx     = a->j + a->i[i];
2178:     v       = a->a + a->i[i];
2179:     n       = a->i[i+1] - a->i[i];
2180:     alpha1  = x[16*i];
2181:     alpha2  = x[16*i+1];
2182:     alpha3  = x[16*i+2];
2183:     alpha4  = x[16*i+3];
2184:     alpha5  = x[16*i+4];
2185:     alpha6  = x[16*i+5];
2186:     alpha7  = x[16*i+6];
2187:     alpha8  = x[16*i+7];
2188:     alpha9  = x[16*i+8];
2189:     alpha10 = x[16*i+9];
2190:     alpha11 = x[16*i+10];
2191:     alpha12 = x[16*i+11];
2192:     alpha13 = x[16*i+12];
2193:     alpha14 = x[16*i+13];
2194:     alpha15 = x[16*i+14];
2195:     alpha16 = x[16*i+15];
2196:     while (n-->0) {
2197:       y[16*(*idx)]    += alpha1*(*v);
2198:       y[16*(*idx)+1]  += alpha2*(*v);
2199:       y[16*(*idx)+2]  += alpha3*(*v);
2200:       y[16*(*idx)+3]  += alpha4*(*v);
2201:       y[16*(*idx)+4]  += alpha5*(*v);
2202:       y[16*(*idx)+5]  += alpha6*(*v);
2203:       y[16*(*idx)+6]  += alpha7*(*v);
2204:       y[16*(*idx)+7]  += alpha8*(*v);
2205:       y[16*(*idx)+8]  += alpha9*(*v);
2206:       y[16*(*idx)+9]  += alpha10*(*v);
2207:       y[16*(*idx)+10] += alpha11*(*v);
2208:       y[16*(*idx)+11] += alpha12*(*v);
2209:       y[16*(*idx)+12] += alpha13*(*v);
2210:       y[16*(*idx)+13] += alpha14*(*v);
2211:       y[16*(*idx)+14] += alpha15*(*v);
2212:       y[16*(*idx)+15] += alpha16*(*v);
2213:       idx++; v++;
2214:     }
2215:   }
2216:   PetscLogFlops(32.0*a->nz);
2217:   VecRestoreArrayRead(xx,&x);
2218:   VecRestoreArray(yy,&y);
2219:   return(0);
2220: }

2222: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2223: {
2224:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2225:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2226:   const PetscScalar *x,*v;
2227:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2228:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2229:   PetscErrorCode    ierr;
2230:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2231:   PetscInt          n,i,jrow,j;

2234:   if (yy != zz) {VecCopy(yy,zz);}
2235:   VecGetArrayRead(xx,&x);
2236:   VecGetArray(zz,&y);
2237:   idx  = a->j;
2238:   v    = a->a;
2239:   ii   = a->i;

2241:   for (i=0; i<m; i++) {
2242:     jrow  = ii[i];
2243:     n     = ii[i+1] - jrow;
2244:     sum1  = 0.0;
2245:     sum2  = 0.0;
2246:     sum3  = 0.0;
2247:     sum4  = 0.0;
2248:     sum5  = 0.0;
2249:     sum6  = 0.0;
2250:     sum7  = 0.0;
2251:     sum8  = 0.0;
2252:     sum9  = 0.0;
2253:     sum10 = 0.0;
2254:     sum11 = 0.0;
2255:     sum12 = 0.0;
2256:     sum13 = 0.0;
2257:     sum14 = 0.0;
2258:     sum15 = 0.0;
2259:     sum16 = 0.0;
2260:     for (j=0; j<n; j++) {
2261:       sum1  += v[jrow]*x[16*idx[jrow]];
2262:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2263:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2264:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2265:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2266:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2267:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2268:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2269:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2270:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2271:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2272:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2273:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2274:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2275:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2276:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2277:       jrow++;
2278:     }
2279:     y[16*i]    += sum1;
2280:     y[16*i+1]  += sum2;
2281:     y[16*i+2]  += sum3;
2282:     y[16*i+3]  += sum4;
2283:     y[16*i+4]  += sum5;
2284:     y[16*i+5]  += sum6;
2285:     y[16*i+6]  += sum7;
2286:     y[16*i+7]  += sum8;
2287:     y[16*i+8]  += sum9;
2288:     y[16*i+9]  += sum10;
2289:     y[16*i+10] += sum11;
2290:     y[16*i+11] += sum12;
2291:     y[16*i+12] += sum13;
2292:     y[16*i+13] += sum14;
2293:     y[16*i+14] += sum15;
2294:     y[16*i+15] += sum16;
2295:   }

2297:   PetscLogFlops(32.0*a->nz);
2298:   VecRestoreArrayRead(xx,&x);
2299:   VecRestoreArray(zz,&y);
2300:   return(0);
2301: }

2303: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2304: {
2305:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2306:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2307:   const PetscScalar *x,*v;
2308:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2309:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2310:   PetscErrorCode    ierr;
2311:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2312:   PetscInt          n,i;

2315:   if (yy != zz) {VecCopy(yy,zz);}
2316:   VecGetArrayRead(xx,&x);
2317:   VecGetArray(zz,&y);
2318:   for (i=0; i<m; i++) {
2319:     idx     = a->j + a->i[i];
2320:     v       = a->a + a->i[i];
2321:     n       = a->i[i+1] - a->i[i];
2322:     alpha1  = x[16*i];
2323:     alpha2  = x[16*i+1];
2324:     alpha3  = x[16*i+2];
2325:     alpha4  = x[16*i+3];
2326:     alpha5  = x[16*i+4];
2327:     alpha6  = x[16*i+5];
2328:     alpha7  = x[16*i+6];
2329:     alpha8  = x[16*i+7];
2330:     alpha9  = x[16*i+8];
2331:     alpha10 = x[16*i+9];
2332:     alpha11 = x[16*i+10];
2333:     alpha12 = x[16*i+11];
2334:     alpha13 = x[16*i+12];
2335:     alpha14 = x[16*i+13];
2336:     alpha15 = x[16*i+14];
2337:     alpha16 = x[16*i+15];
2338:     while (n-->0) {
2339:       y[16*(*idx)]    += alpha1*(*v);
2340:       y[16*(*idx)+1]  += alpha2*(*v);
2341:       y[16*(*idx)+2]  += alpha3*(*v);
2342:       y[16*(*idx)+3]  += alpha4*(*v);
2343:       y[16*(*idx)+4]  += alpha5*(*v);
2344:       y[16*(*idx)+5]  += alpha6*(*v);
2345:       y[16*(*idx)+6]  += alpha7*(*v);
2346:       y[16*(*idx)+7]  += alpha8*(*v);
2347:       y[16*(*idx)+8]  += alpha9*(*v);
2348:       y[16*(*idx)+9]  += alpha10*(*v);
2349:       y[16*(*idx)+10] += alpha11*(*v);
2350:       y[16*(*idx)+11] += alpha12*(*v);
2351:       y[16*(*idx)+12] += alpha13*(*v);
2352:       y[16*(*idx)+13] += alpha14*(*v);
2353:       y[16*(*idx)+14] += alpha15*(*v);
2354:       y[16*(*idx)+15] += alpha16*(*v);
2355:       idx++; v++;
2356:     }
2357:   }
2358:   PetscLogFlops(32.0*a->nz);
2359:   VecRestoreArrayRead(xx,&x);
2360:   VecRestoreArray(zz,&y);
2361:   return(0);
2362: }

2364: /*--------------------------------------------------------------------------------------------*/
2365: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2366: {
2367:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2368:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2369:   const PetscScalar *x,*v;
2370:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2371:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2372:   PetscErrorCode    ierr;
2373:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2374:   PetscInt          nonzerorow=0,n,i,jrow,j;

2377:   VecGetArrayRead(xx,&x);
2378:   VecGetArray(yy,&y);
2379:   idx  = a->j;
2380:   v    = a->a;
2381:   ii   = a->i;

2383:   for (i=0; i<m; i++) {
2384:     jrow  = ii[i];
2385:     n     = ii[i+1] - jrow;
2386:     sum1  = 0.0;
2387:     sum2  = 0.0;
2388:     sum3  = 0.0;
2389:     sum4  = 0.0;
2390:     sum5  = 0.0;
2391:     sum6  = 0.0;
2392:     sum7  = 0.0;
2393:     sum8  = 0.0;
2394:     sum9  = 0.0;
2395:     sum10 = 0.0;
2396:     sum11 = 0.0;
2397:     sum12 = 0.0;
2398:     sum13 = 0.0;
2399:     sum14 = 0.0;
2400:     sum15 = 0.0;
2401:     sum16 = 0.0;
2402:     sum17 = 0.0;
2403:     sum18 = 0.0;

2405:     nonzerorow += (n>0);
2406:     for (j=0; j<n; j++) {
2407:       sum1  += v[jrow]*x[18*idx[jrow]];
2408:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2409:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2410:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2411:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2412:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2413:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2414:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2415:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2416:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2417:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2418:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2419:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2420:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2421:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2422:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2423:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2424:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2425:       jrow++;
2426:     }
2427:     y[18*i]    = sum1;
2428:     y[18*i+1]  = sum2;
2429:     y[18*i+2]  = sum3;
2430:     y[18*i+3]  = sum4;
2431:     y[18*i+4]  = sum5;
2432:     y[18*i+5]  = sum6;
2433:     y[18*i+6]  = sum7;
2434:     y[18*i+7]  = sum8;
2435:     y[18*i+8]  = sum9;
2436:     y[18*i+9]  = sum10;
2437:     y[18*i+10] = sum11;
2438:     y[18*i+11] = sum12;
2439:     y[18*i+12] = sum13;
2440:     y[18*i+13] = sum14;
2441:     y[18*i+14] = sum15;
2442:     y[18*i+15] = sum16;
2443:     y[18*i+16] = sum17;
2444:     y[18*i+17] = sum18;
2445:   }

2447:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2448:   VecRestoreArrayRead(xx,&x);
2449:   VecRestoreArray(yy,&y);
2450:   return(0);
2451: }

2453: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2454: {
2455:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2456:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2457:   const PetscScalar *x,*v;
2458:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2459:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2460:   PetscErrorCode    ierr;
2461:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2462:   PetscInt          n,i;

2465:   VecSet(yy,0.0);
2466:   VecGetArrayRead(xx,&x);
2467:   VecGetArray(yy,&y);

2469:   for (i=0; i<m; i++) {
2470:     idx     = a->j + a->i[i];
2471:     v       = a->a + a->i[i];
2472:     n       = a->i[i+1] - a->i[i];
2473:     alpha1  = x[18*i];
2474:     alpha2  = x[18*i+1];
2475:     alpha3  = x[18*i+2];
2476:     alpha4  = x[18*i+3];
2477:     alpha5  = x[18*i+4];
2478:     alpha6  = x[18*i+5];
2479:     alpha7  = x[18*i+6];
2480:     alpha8  = x[18*i+7];
2481:     alpha9  = x[18*i+8];
2482:     alpha10 = x[18*i+9];
2483:     alpha11 = x[18*i+10];
2484:     alpha12 = x[18*i+11];
2485:     alpha13 = x[18*i+12];
2486:     alpha14 = x[18*i+13];
2487:     alpha15 = x[18*i+14];
2488:     alpha16 = x[18*i+15];
2489:     alpha17 = x[18*i+16];
2490:     alpha18 = x[18*i+17];
2491:     while (n-->0) {
2492:       y[18*(*idx)]    += alpha1*(*v);
2493:       y[18*(*idx)+1]  += alpha2*(*v);
2494:       y[18*(*idx)+2]  += alpha3*(*v);
2495:       y[18*(*idx)+3]  += alpha4*(*v);
2496:       y[18*(*idx)+4]  += alpha5*(*v);
2497:       y[18*(*idx)+5]  += alpha6*(*v);
2498:       y[18*(*idx)+6]  += alpha7*(*v);
2499:       y[18*(*idx)+7]  += alpha8*(*v);
2500:       y[18*(*idx)+8]  += alpha9*(*v);
2501:       y[18*(*idx)+9]  += alpha10*(*v);
2502:       y[18*(*idx)+10] += alpha11*(*v);
2503:       y[18*(*idx)+11] += alpha12*(*v);
2504:       y[18*(*idx)+12] += alpha13*(*v);
2505:       y[18*(*idx)+13] += alpha14*(*v);
2506:       y[18*(*idx)+14] += alpha15*(*v);
2507:       y[18*(*idx)+15] += alpha16*(*v);
2508:       y[18*(*idx)+16] += alpha17*(*v);
2509:       y[18*(*idx)+17] += alpha18*(*v);
2510:       idx++; v++;
2511:     }
2512:   }
2513:   PetscLogFlops(36.0*a->nz);
2514:   VecRestoreArrayRead(xx,&x);
2515:   VecRestoreArray(yy,&y);
2516:   return(0);
2517: }

2519: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2520: {
2521:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2522:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2523:   const PetscScalar *x,*v;
2524:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2525:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2526:   PetscErrorCode    ierr;
2527:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2528:   PetscInt          n,i,jrow,j;

2531:   if (yy != zz) {VecCopy(yy,zz);}
2532:   VecGetArrayRead(xx,&x);
2533:   VecGetArray(zz,&y);
2534:   idx  = a->j;
2535:   v    = a->a;
2536:   ii   = a->i;

2538:   for (i=0; i<m; i++) {
2539:     jrow  = ii[i];
2540:     n     = ii[i+1] - jrow;
2541:     sum1  = 0.0;
2542:     sum2  = 0.0;
2543:     sum3  = 0.0;
2544:     sum4  = 0.0;
2545:     sum5  = 0.0;
2546:     sum6  = 0.0;
2547:     sum7  = 0.0;
2548:     sum8  = 0.0;
2549:     sum9  = 0.0;
2550:     sum10 = 0.0;
2551:     sum11 = 0.0;
2552:     sum12 = 0.0;
2553:     sum13 = 0.0;
2554:     sum14 = 0.0;
2555:     sum15 = 0.0;
2556:     sum16 = 0.0;
2557:     sum17 = 0.0;
2558:     sum18 = 0.0;
2559:     for (j=0; j<n; j++) {
2560:       sum1  += v[jrow]*x[18*idx[jrow]];
2561:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2562:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2563:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2564:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2565:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2566:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2567:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2568:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2569:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2570:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2571:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2572:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2573:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2574:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2575:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2576:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2577:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2578:       jrow++;
2579:     }
2580:     y[18*i]    += sum1;
2581:     y[18*i+1]  += sum2;
2582:     y[18*i+2]  += sum3;
2583:     y[18*i+3]  += sum4;
2584:     y[18*i+4]  += sum5;
2585:     y[18*i+5]  += sum6;
2586:     y[18*i+6]  += sum7;
2587:     y[18*i+7]  += sum8;
2588:     y[18*i+8]  += sum9;
2589:     y[18*i+9]  += sum10;
2590:     y[18*i+10] += sum11;
2591:     y[18*i+11] += sum12;
2592:     y[18*i+12] += sum13;
2593:     y[18*i+13] += sum14;
2594:     y[18*i+14] += sum15;
2595:     y[18*i+15] += sum16;
2596:     y[18*i+16] += sum17;
2597:     y[18*i+17] += sum18;
2598:   }

2600:   PetscLogFlops(36.0*a->nz);
2601:   VecRestoreArrayRead(xx,&x);
2602:   VecRestoreArray(zz,&y);
2603:   return(0);
2604: }

2606: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2607: {
2608:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2609:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2610:   const PetscScalar *x,*v;
2611:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2612:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2613:   PetscErrorCode    ierr;
2614:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2615:   PetscInt          n,i;

2618:   if (yy != zz) {VecCopy(yy,zz);}
2619:   VecGetArrayRead(xx,&x);
2620:   VecGetArray(zz,&y);
2621:   for (i=0; i<m; i++) {
2622:     idx     = a->j + a->i[i];
2623:     v       = a->a + a->i[i];
2624:     n       = a->i[i+1] - a->i[i];
2625:     alpha1  = x[18*i];
2626:     alpha2  = x[18*i+1];
2627:     alpha3  = x[18*i+2];
2628:     alpha4  = x[18*i+3];
2629:     alpha5  = x[18*i+4];
2630:     alpha6  = x[18*i+5];
2631:     alpha7  = x[18*i+6];
2632:     alpha8  = x[18*i+7];
2633:     alpha9  = x[18*i+8];
2634:     alpha10 = x[18*i+9];
2635:     alpha11 = x[18*i+10];
2636:     alpha12 = x[18*i+11];
2637:     alpha13 = x[18*i+12];
2638:     alpha14 = x[18*i+13];
2639:     alpha15 = x[18*i+14];
2640:     alpha16 = x[18*i+15];
2641:     alpha17 = x[18*i+16];
2642:     alpha18 = x[18*i+17];
2643:     while (n-->0) {
2644:       y[18*(*idx)]    += alpha1*(*v);
2645:       y[18*(*idx)+1]  += alpha2*(*v);
2646:       y[18*(*idx)+2]  += alpha3*(*v);
2647:       y[18*(*idx)+3]  += alpha4*(*v);
2648:       y[18*(*idx)+4]  += alpha5*(*v);
2649:       y[18*(*idx)+5]  += alpha6*(*v);
2650:       y[18*(*idx)+6]  += alpha7*(*v);
2651:       y[18*(*idx)+7]  += alpha8*(*v);
2652:       y[18*(*idx)+8]  += alpha9*(*v);
2653:       y[18*(*idx)+9]  += alpha10*(*v);
2654:       y[18*(*idx)+10] += alpha11*(*v);
2655:       y[18*(*idx)+11] += alpha12*(*v);
2656:       y[18*(*idx)+12] += alpha13*(*v);
2657:       y[18*(*idx)+13] += alpha14*(*v);
2658:       y[18*(*idx)+14] += alpha15*(*v);
2659:       y[18*(*idx)+15] += alpha16*(*v);
2660:       y[18*(*idx)+16] += alpha17*(*v);
2661:       y[18*(*idx)+17] += alpha18*(*v);
2662:       idx++; v++;
2663:     }
2664:   }
2665:   PetscLogFlops(36.0*a->nz);
2666:   VecRestoreArrayRead(xx,&x);
2667:   VecRestoreArray(zz,&y);
2668:   return(0);
2669: }

2671: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2672: {
2673:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2674:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2675:   const PetscScalar *x,*v;
2676:   PetscScalar       *y,*sums;
2677:   PetscErrorCode    ierr;
2678:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2679:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2682:   VecGetArrayRead(xx,&x);
2683:   VecSet(yy,0.0);
2684:   VecGetArray(yy,&y);
2685:   idx  = a->j;
2686:   v    = a->a;
2687:   ii   = a->i;

2689:   for (i=0; i<m; i++) {
2690:     jrow = ii[i];
2691:     n    = ii[i+1] - jrow;
2692:     sums = y + dof*i;
2693:     for (j=0; j<n; j++) {
2694:       for (k=0; k<dof; k++) {
2695:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2696:       }
2697:       jrow++;
2698:     }
2699:   }

2701:   PetscLogFlops(2.0*dof*a->nz);
2702:   VecRestoreArrayRead(xx,&x);
2703:   VecRestoreArray(yy,&y);
2704:   return(0);
2705: }

2707: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2708: {
2709:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2710:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2711:   const PetscScalar *x,*v;
2712:   PetscScalar       *y,*sums;
2713:   PetscErrorCode    ierr;
2714:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2715:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2718:   if (yy != zz) {VecCopy(yy,zz);}
2719:   VecGetArrayRead(xx,&x);
2720:   VecGetArray(zz,&y);
2721:   idx  = a->j;
2722:   v    = a->a;
2723:   ii   = a->i;

2725:   for (i=0; i<m; i++) {
2726:     jrow = ii[i];
2727:     n    = ii[i+1] - jrow;
2728:     sums = y + dof*i;
2729:     for (j=0; j<n; j++) {
2730:       for (k=0; k<dof; k++) {
2731:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2732:       }
2733:       jrow++;
2734:     }
2735:   }

2737:   PetscLogFlops(2.0*dof*a->nz);
2738:   VecRestoreArrayRead(xx,&x);
2739:   VecRestoreArray(zz,&y);
2740:   return(0);
2741: }

2743: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2744: {
2745:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2746:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2747:   const PetscScalar *x,*v,*alpha;
2748:   PetscScalar       *y;
2749:   PetscErrorCode    ierr;
2750:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2751:   PetscInt          n,i,k;

2754:   VecGetArrayRead(xx,&x);
2755:   VecSet(yy,0.0);
2756:   VecGetArray(yy,&y);
2757:   for (i=0; i<m; i++) {
2758:     idx   = a->j + a->i[i];
2759:     v     = a->a + a->i[i];
2760:     n     = a->i[i+1] - a->i[i];
2761:     alpha = x + dof*i;
2762:     while (n-->0) {
2763:       for (k=0; k<dof; k++) {
2764:         y[dof*(*idx)+k] += alpha[k]*(*v);
2765:       }
2766:       idx++; v++;
2767:     }
2768:   }
2769:   PetscLogFlops(2.0*dof*a->nz);
2770:   VecRestoreArrayRead(xx,&x);
2771:   VecRestoreArray(yy,&y);
2772:   return(0);
2773: }

2775: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2776: {
2777:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2778:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2779:   const PetscScalar *x,*v,*alpha;
2780:   PetscScalar       *y;
2781:   PetscErrorCode    ierr;
2782:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2783:   PetscInt          n,i,k;

2786:   if (yy != zz) {VecCopy(yy,zz);}
2787:   VecGetArrayRead(xx,&x);
2788:   VecGetArray(zz,&y);
2789:   for (i=0; i<m; i++) {
2790:     idx   = a->j + a->i[i];
2791:     v     = a->a + a->i[i];
2792:     n     = a->i[i+1] - a->i[i];
2793:     alpha = x + dof*i;
2794:     while (n-->0) {
2795:       for (k=0; k<dof; k++) {
2796:         y[dof*(*idx)+k] += alpha[k]*(*v);
2797:       }
2798:       idx++; v++;
2799:     }
2800:   }
2801:   PetscLogFlops(2.0*dof*a->nz);
2802:   VecRestoreArrayRead(xx,&x);
2803:   VecRestoreArray(zz,&y);
2804:   return(0);
2805: }

2807: /*===================================================================================*/
2808: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2809: {
2810:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2814:   /* start the scatter */
2815:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2816:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2817:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2818:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2819:   return(0);
2820: }

2822: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2823: {
2824:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2828:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2829:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2830:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2831:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2832:   return(0);
2833: }

2835: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2836: {
2837:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2841:   /* start the scatter */
2842:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2843:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2844:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2845:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2846:   return(0);
2847: }

2849: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2850: {
2851:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2855:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2856:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2857:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2858:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2859:   return(0);
2860: }

2862: /* ----------------------------------------------------------------*/
2863: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2864: {
2865:   Mat_Product    *product = C->product;

2868:   if (product->type == MATPRODUCT_PtAP) {
2869:     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2870:   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2871:   return(0);
2872: }

2874: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2875: {
2877:   Mat_Product    *product = C->product;
2878:   PetscBool      flg = PETSC_FALSE;
2879:   Mat            A=product->A,P=product->B;
2880:   PetscInt       alg=1; /* set default algorithm */
2881: #if !defined(PETSC_HAVE_HYPRE)
2882:   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2883:   PetscInt       nalg=4;
2884: #else
2885:   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2886:   PetscInt       nalg=5;
2887: #endif

2890:   if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);

2892:   /* PtAP */
2893:   /* Check matrix local sizes */
2894:   if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2895:   if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);

2897:   /* Set the default algorithm */
2898:   PetscStrcmp(C->product->alg,"default",&flg);
2899:   if (flg) {
2900:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2901:   }

2903:   /* Get runtime option */
2904:   PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2905:   PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);
2906:   if (flg) {
2907:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2908:   }
2909:   PetscOptionsEnd();

2911:   PetscStrcmp(C->product->alg,"allatonce",&flg);
2912:   if (flg) {
2913:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2914:     return(0);
2915:   }

2917:   PetscStrcmp(C->product->alg,"allatonce_merged",&flg);
2918:   if (flg) {
2919:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2920:     return(0);
2921:   }

2923:   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2924:   PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");
2925:   MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
2926:   MatProductSetFromOptions(C);
2927:   return(0);
2928: }

2930: /* ----------------------------------------------------------------*/
2931: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2932: {
2933:   PetscErrorCode     ierr;
2934:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2935:   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2936:   Mat                P         =pp->AIJ;
2937:   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2938:   PetscInt           *pti,*ptj,*ptJ;
2939:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2940:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2941:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2942:   MatScalar          *ca;
2943:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

2946:   /* Get ij structure of P^T */
2947:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2949:   cn = pn*ppdof;
2950:   /* Allocate ci array, arrays for fill computation and */
2951:   /* free space for accumulating nonzero column info */
2952:   PetscMalloc1(cn+1,&ci);
2953:   ci[0] = 0;

2955:   /* Work arrays for rows of P^T*A */
2956:   PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2957:   PetscArrayzero(ptadenserow,an);
2958:   PetscArrayzero(denserow,cn);

2960:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2961:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2962:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2963:   PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2964:   current_space = free_space;

2966:   /* Determine symbolic info for each row of C: */
2967:   for (i=0; i<pn; i++) {
2968:     ptnzi = pti[i+1] - pti[i];
2969:     ptJ   = ptj + pti[i];
2970:     for (dof=0; dof<ppdof; dof++) {
2971:       ptanzi = 0;
2972:       /* Determine symbolic row of PtA: */
2973:       for (j=0; j<ptnzi; j++) {
2974:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2975:         arow = ptJ[j]*ppdof + dof;
2976:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2977:         anzj = ai[arow+1] - ai[arow];
2978:         ajj  = aj + ai[arow];
2979:         for (k=0; k<anzj; k++) {
2980:           if (!ptadenserow[ajj[k]]) {
2981:             ptadenserow[ajj[k]]    = -1;
2982:             ptasparserow[ptanzi++] = ajj[k];
2983:           }
2984:         }
2985:       }
2986:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2987:       ptaj = ptasparserow;
2988:       cnzi = 0;
2989:       for (j=0; j<ptanzi; j++) {
2990:         /* Get offset within block of P */
2991:         pshift = *ptaj%ppdof;
2992:         /* Get block row of P */
2993:         prow = (*ptaj++)/ppdof; /* integer division */
2994:         /* P has same number of nonzeros per row as the compressed form */
2995:         pnzj = pi[prow+1] - pi[prow];
2996:         pjj  = pj + pi[prow];
2997:         for (k=0;k<pnzj;k++) {
2998:           /* Locations in C are shifted by the offset within the block */
2999:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3000:           if (!denserow[pjj[k]*ppdof+pshift]) {
3001:             denserow[pjj[k]*ppdof+pshift] = -1;
3002:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3003:           }
3004:         }
3005:       }

3007:       /* sort sparserow */
3008:       PetscSortInt(cnzi,sparserow);

3010:       /* If free space is not available, make more free space */
3011:       /* Double the amount of total space in the list */
3012:       if (current_space->local_remaining<cnzi) {
3013:         PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
3014:       }

3016:       /* Copy data into free space, and zero out denserows */
3017:       PetscArraycpy(current_space->array,sparserow,cnzi);

3019:       current_space->array           += cnzi;
3020:       current_space->local_used      += cnzi;
3021:       current_space->local_remaining -= cnzi;

3023:       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3024:       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;

3026:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3027:       /*        For now, we will recompute what is needed. */
3028:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3029:     }
3030:   }
3031:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3032:   /* Allocate space for cj, initialize cj, and */
3033:   /* destroy list of free space and other temporary array(s) */
3034:   PetscMalloc1(ci[cn]+1,&cj);
3035:   PetscFreeSpaceContiguous(&free_space,cj);
3036:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);

3038:   /* Allocate space for ca */
3039:   PetscCalloc1(ci[cn]+1,&ca);

3041:   /* put together the new matrix */
3042:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);
3043:   MatSetBlockSize(C,pp->dof);

3045:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3046:   /* Since these are PETSc arrays, change flags to free them as necessary. */
3047:   c          = (Mat_SeqAIJ*)(C->data);
3048:   c->free_a  = PETSC_TRUE;
3049:   c->free_ij = PETSC_TRUE;
3050:   c->nonew   = 0;

3052:   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3053:   C->ops->productnumeric = MatProductNumeric_PtAP;

3055:   /* Clean up. */
3056:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3057:   return(0);
3058: }

3060: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3061: {
3062:   /* This routine requires testing -- first draft only */
3063:   PetscErrorCode  ierr;
3064:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3065:   Mat             P  =pp->AIJ;
3066:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3067:   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3068:   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3069:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3070:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3071:   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3072:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3073:   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3074:   MatScalar       *ca=c->a,*caj,*apa;

3077:   /* Allocate temporary array for storage of one row of A*P */
3078:   PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);

3080:   /* Clear old values in C */
3081:   PetscArrayzero(ca,ci[cm]);

3083:   for (i=0; i<am; i++) {
3084:     /* Form sparse row of A*P */
3085:     anzi  = ai[i+1] - ai[i];
3086:     apnzj = 0;
3087:     for (j=0; j<anzi; j++) {
3088:       /* Get offset within block of P */
3089:       pshift = *aj%ppdof;
3090:       /* Get block row of P */
3091:       prow = *aj++/ppdof;   /* integer division */
3092:       pnzj = pi[prow+1] - pi[prow];
3093:       pjj  = pj + pi[prow];
3094:       paj  = pa + pi[prow];
3095:       for (k=0; k<pnzj; k++) {
3096:         poffset = pjj[k]*ppdof+pshift;
3097:         if (!apjdense[poffset]) {
3098:           apjdense[poffset] = -1;
3099:           apj[apnzj++]      = poffset;
3100:         }
3101:         apa[poffset] += (*aa)*paj[k];
3102:       }
3103:       PetscLogFlops(2.0*pnzj);
3104:       aa++;
3105:     }

3107:     /* Sort the j index array for quick sparse axpy. */
3108:     /* Note: a array does not need sorting as it is in dense storage locations. */
3109:     PetscSortInt(apnzj,apj);

3111:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3112:     prow    = i/ppdof; /* integer division */
3113:     pshift  = i%ppdof;
3114:     poffset = pi[prow];
3115:     pnzi    = pi[prow+1] - poffset;
3116:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3117:     pJ = pj+poffset;
3118:     pA = pa+poffset;
3119:     for (j=0; j<pnzi; j++) {
3120:       crow = (*pJ)*ppdof+pshift;
3121:       cjj  = cj + ci[crow];
3122:       caj  = ca + ci[crow];
3123:       pJ++;
3124:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3125:       for (k=0,nextap=0; nextap<apnzj; k++) {
3126:         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3127:       }
3128:       PetscLogFlops(2.0*apnzj);
3129:       pA++;
3130:     }

3132:     /* Zero the current row info for A*P */
3133:     for (j=0; j<apnzj; j++) {
3134:       apa[apj[j]]      = 0.;
3135:       apjdense[apj[j]] = 0;
3136:     }
3137:   }

3139:   /* Assemble the final matrix and clean up */
3140:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3141:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3142:   PetscFree3(apa,apj,apjdense);
3143:   return(0);
3144: }

3146: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3147: {
3148:   PetscErrorCode      ierr;
3149:   Mat_Product         *product = C->product;
3150:   Mat                 A=product->A,P=product->B;

3153:   MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);
3154:   return(0);
3155: }

3157: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3158: {
3160:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3161:   return(0);
3162: }

3164: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3165: {
3167:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3168:   return(0);
3169: }

3171: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);

3173: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3174: {
3175:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3176:   PetscErrorCode  ierr;


3180:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3181:   return(0);
3182: }

3184: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);

3186: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3187: {
3188:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3189:   PetscErrorCode  ierr;

3192:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3193:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3194:   return(0);
3195: }

3197: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);

3199: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3200: {
3201:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3202:   PetscErrorCode  ierr;


3206:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3207:   return(0);
3208: }

3210: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);

3212: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3213: {
3214:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3215:   PetscErrorCode  ierr;


3219:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3220:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3221:   return(0);
3222: }

3224: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3225: {
3226:   PetscErrorCode      ierr;
3227:   Mat_Product         *product = C->product;
3228:   Mat                 A=product->A,P=product->B;
3229:   PetscBool           flg;

3232:   PetscStrcmp(product->alg,"allatonce",&flg);
3233:   if (flg) {
3234:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);
3235:     C->ops->productnumeric = MatProductNumeric_PtAP;
3236:     return(0);
3237:   }

3239:   PetscStrcmp(product->alg,"allatonce_merged",&flg);
3240:   if (flg) {
3241:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);
3242:     C->ops->productnumeric = MatProductNumeric_PtAP;
3243:     return(0);
3244:   }

3246:   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3247:   return(0);
3248: }

3250: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3251: {
3252:   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3253:   Mat            a    = b->AIJ,B;
3254:   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3256:   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3257:   PetscInt       *cols;
3258:   PetscScalar    *vals;

3261:   MatGetSize(a,&m,&n);
3262:   PetscMalloc1(dof*m,&ilen);
3263:   for (i=0; i<m; i++) {
3264:     nmax = PetscMax(nmax,aij->ilen[i]);
3265:     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3266:   }
3267:   MatCreate(PETSC_COMM_SELF,&B);
3268:   MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3269:   MatSetType(B,newtype);
3270:   MatSeqAIJSetPreallocation(B,0,ilen);
3271:   PetscFree(ilen);
3272:   PetscMalloc1(nmax,&icols);
3273:   ii   = 0;
3274:   for (i=0; i<m; i++) {
3275:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3276:     for (j=0; j<dof; j++) {
3277:       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3278:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3279:       ii++;
3280:     }
3281:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3282:   }
3283:   PetscFree(icols);
3284:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3285:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3287:   if (reuse == MAT_INPLACE_MATRIX) {
3288:     MatHeaderReplace(A,&B);
3289:   } else {
3290:     *newmat = B;
3291:   }
3292:   return(0);
3293: }

3295:  #include <../src/mat/impls/aij/mpi/mpiaij.h>

3297: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3298: {
3299:   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3300:   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3301:   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3302:   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3303:   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3304:   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3305:   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3306:   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3307:   PetscInt       rstart,cstart,*garray,ii,k;
3309:   PetscScalar    *vals,*ovals;

3312:   PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3313:   for (i=0; i<A->rmap->n/dof; i++) {
3314:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3315:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3316:     for (j=0; j<dof; j++) {
3317:       dnz[dof*i+j] = AIJ->ilen[i];
3318:       onz[dof*i+j] = OAIJ->ilen[i];
3319:     }
3320:   }
3321:   MatCreate(PetscObjectComm((PetscObject)A),&B);
3322:   MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3323:   MatSetType(B,newtype);
3324:   MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3325:   MatSetBlockSize(B,dof);
3326:   PetscFree2(dnz,onz);

3328:   PetscMalloc2(nmax,&icols,onmax,&oicols);
3329:   rstart = dof*maij->A->rmap->rstart;
3330:   cstart = dof*maij->A->cmap->rstart;
3331:   garray = mpiaij->garray;

3333:   ii = rstart;
3334:   for (i=0; i<A->rmap->n/dof; i++) {
3335:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3336:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3337:     for (j=0; j<dof; j++) {
3338:       for (k=0; k<ncols; k++) {
3339:         icols[k] = cstart + dof*cols[k]+j;
3340:       }
3341:       for (k=0; k<oncols; k++) {
3342:         oicols[k] = dof*garray[ocols[k]]+j;
3343:       }
3344:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3345:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3346:       ii++;
3347:     }
3348:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3349:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3350:   }
3351:   PetscFree2(icols,oicols);

3353:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3354:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3356:   if (reuse == MAT_INPLACE_MATRIX) {
3357:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3358:     ((PetscObject)A)->refct = 1;

3360:     MatHeaderReplace(A,&B);

3362:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3363:   } else {
3364:     *newmat = B;
3365:   }
3366:   return(0);
3367: }

3369: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3370: {
3372:   Mat            A;

3375:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3376:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3377:   MatDestroy(&A);
3378:   return(0);
3379: }

3381: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3382: {
3384:   Mat            A;

3387:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3388:   MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3389:   MatDestroy(&A);
3390:   return(0);
3391: }

3393: /* ---------------------------------------------------------------------------------- */
3394: /*@
3395:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3396:   operations for multicomponent problems.  It interpolates each component the same
3397:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3398:   and MATMPIAIJ for distributed matrices.

3400:   Collective

3402:   Input Parameters:
3403: + A - the AIJ matrix describing the action on blocks
3404: - dof - the block size (number of components per node)

3406:   Output Parameter:
3407: . maij - the new MAIJ matrix

3409:   Operations provided:
3410: + MatMult
3411: . MatMultTranspose
3412: . MatMultAdd
3413: . MatMultTransposeAdd
3414: - MatView

3416:   Level: advanced

3418: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3419: @*/
3420: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3421: {
3423:   PetscMPIInt    size;
3424:   PetscInt       n;
3425:   Mat            B;
3426: #if defined(PETSC_HAVE_CUDA)
3427:   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3428:   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3429: #endif

3432:   dof  = PetscAbs(dof);
3433:   PetscObjectReference((PetscObject)A);

3435:   if (dof == 1) *maij = A;
3436:   else {
3437:     MatCreate(PetscObjectComm((PetscObject)A),&B);
3438:     /* propagate vec type */
3439:     MatSetVecType(B,A->defaultvectype);
3440:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3441:     PetscLayoutSetBlockSize(B->rmap,dof);
3442:     PetscLayoutSetBlockSize(B->cmap,dof);
3443:     PetscLayoutSetUp(B->rmap);
3444:     PetscLayoutSetUp(B->cmap);

3446:     B->assembled = PETSC_TRUE;

3448:     MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3449:     if (size == 1) {
3450:       Mat_SeqMAIJ *b;

3452:       MatSetType(B,MATSEQMAIJ);

3454:       B->ops->setup   = NULL;
3455:       B->ops->destroy = MatDestroy_SeqMAIJ;
3456:       B->ops->view    = MatView_SeqMAIJ;

3458:       b               = (Mat_SeqMAIJ*)B->data;
3459:       b->dof          = dof;
3460:       b->AIJ          = A;

3462:       if (dof == 2) {
3463:         B->ops->mult             = MatMult_SeqMAIJ_2;
3464:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3465:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3466:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3467:       } else if (dof == 3) {
3468:         B->ops->mult             = MatMult_SeqMAIJ_3;
3469:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3470:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3471:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3472:       } else if (dof == 4) {
3473:         B->ops->mult             = MatMult_SeqMAIJ_4;
3474:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3475:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3476:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3477:       } else if (dof == 5) {
3478:         B->ops->mult             = MatMult_SeqMAIJ_5;
3479:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3480:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3481:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3482:       } else if (dof == 6) {
3483:         B->ops->mult             = MatMult_SeqMAIJ_6;
3484:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3485:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3486:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3487:       } else if (dof == 7) {
3488:         B->ops->mult             = MatMult_SeqMAIJ_7;
3489:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3490:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3491:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3492:       } else if (dof == 8) {
3493:         B->ops->mult             = MatMult_SeqMAIJ_8;
3494:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3495:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3496:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3497:       } else if (dof == 9) {
3498:         B->ops->mult             = MatMult_SeqMAIJ_9;
3499:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3500:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3501:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3502:       } else if (dof == 10) {
3503:         B->ops->mult             = MatMult_SeqMAIJ_10;
3504:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3505:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3506:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3507:       } else if (dof == 11) {
3508:         B->ops->mult             = MatMult_SeqMAIJ_11;
3509:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3510:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3511:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3512:       } else if (dof == 16) {
3513:         B->ops->mult             = MatMult_SeqMAIJ_16;
3514:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3515:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3516:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3517:       } else if (dof == 18) {
3518:         B->ops->mult             = MatMult_SeqMAIJ_18;
3519:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3520:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3521:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3522:       } else {
3523:         B->ops->mult             = MatMult_SeqMAIJ_N;
3524:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3525:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3526:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3527:       }
3528: #if defined(PETSC_HAVE_CUDA)
3529:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3530: #endif
3531:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3532:       PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3533:     } else {
3534:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3535:       Mat_MPIMAIJ *b;
3536:       IS          from,to;
3537:       Vec         gvec;

3539:       MatSetType(B,MATMPIMAIJ);

3541:       B->ops->setup            = NULL;
3542:       B->ops->destroy          = MatDestroy_MPIMAIJ;
3543:       B->ops->view             = MatView_MPIMAIJ;

3545:       b      = (Mat_MPIMAIJ*)B->data;
3546:       b->dof = dof;
3547:       b->A   = A;

3549:       MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3550:       MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);

3552:       VecGetSize(mpiaij->lvec,&n);
3553:       VecCreate(PETSC_COMM_SELF,&b->w);
3554:       VecSetSizes(b->w,n*dof,n*dof);
3555:       VecSetBlockSize(b->w,dof);
3556:       VecSetType(b->w,VECSEQ);

3558:       /* create two temporary Index sets for build scatter gather */
3559:       ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3560:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3562:       /* create temporary global vector to generate scatter context */
3563:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);

3565:       /* generate the scatter context */
3566:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3568:       ISDestroy(&from);
3569:       ISDestroy(&to);
3570:       VecDestroy(&gvec);

3572:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3573:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3574:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3575:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

3577: #if defined(PETSC_HAVE_CUDA)
3578:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3579: #endif
3580:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3581:       PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3582:     }
3583:     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3584:     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3585:     MatSetFromOptions(B);
3586:     MatSetUp(B);
3587: #if defined(PETSC_HAVE_CUDA)
3588:     /* temporary until we have CUDA implementation of MAIJ */
3589:     {
3590:       PetscBool flg;
3591:       if (convert) {
3592:         PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3593:         if (flg) {
3594:           MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3595:         }
3596:       }
3597:     }
3598: #endif
3599:     *maij = B;
3600:     MatViewFromOptions(B,NULL,"-mat_view");
3601:   }
3602:   return(0);
3603: }