Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include ../src/mat/impls/maij/maij.h
 21:  #include ../src/mat/utils/freespace.h
 22:  #include private/vecimpl.h

 26: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 27: {
 29:   PetscTruth     ismpimaij,isseqmaij;

 32:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 33:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 34:   if (ismpimaij) {
 35:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 37:     *B = b->A;
 38:   } else if (isseqmaij) {
 39:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 41:     *B = b->AIJ;
 42:   } else {
 43:     *B = A;
 44:   }
 45:   return(0);
 46: }

 50: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 51: {
 53:   Mat            Aij;

 56:   MatMAIJGetAIJ(A,&Aij);
 57:   MatCreateMAIJ(Aij,dof,B);
 58:   return(0);
 59: }

 63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 64: {
 66:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 69:   if (b->AIJ) {
 70:     MatDestroy(b->AIJ);
 71:   }
 72:   PetscFree(b);
 73:   return(0);
 74: }

 78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
 79: {
 81:   Mat            B;

 84:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
 85:   MatView(B,viewer);
 86:   MatDestroy(B);
 87:   return(0);
 88: }

 92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
 93: {
 95:   Mat            B;

 98:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
 99:   MatView(B,viewer);
100:   MatDestroy(B);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

112:   if (b->AIJ) {
113:     MatDestroy(b->AIJ);
114:   }
115:   if (b->OAIJ) {
116:     MatDestroy(b->OAIJ);
117:   }
118:   if (b->A) {
119:     MatDestroy(b->A);
120:   }
121:   if (b->ctx) {
122:     VecScatterDestroy(b->ctx);
123:   }
124:   if (b->w) {
125:     VecDestroy(b->w);
126:   }
127:   PetscFree(b);
128:   PetscObjectChangeTypeName((PetscObject)A,0);
129:   return(0);
130: }

132: /*MC
133:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
134:   multicomponent problems, interpolating or restricting each component the same way independently.
135:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

137:   Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd

143:   Level: advanced

145: .seealso: MatCreateSeqDense
146: M*/

151: PetscErrorCode  MatCreate_MAIJ(Mat A)
152: {
154:   Mat_MPIMAIJ    *b;
155:   PetscMPIInt    size;

158:   PetscNewLog(A,Mat_MPIMAIJ,&b);
159:   A->data  = (void*)b;
160:   PetscMemzero(A->ops,sizeof(struct _MatOps));
161:   A->mapping          = 0;

163:   b->AIJ  = 0;
164:   b->dof  = 0;
165:   b->OAIJ = 0;
166:   b->ctx  = 0;
167:   b->w    = 0;
168:   MPI_Comm_size(((PetscObject)A)->comm,&size);
169:   if (size == 1){
170:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
171:   } else {
172:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
173:   }
174:   return(0);
175: }

178: /* --------------------------------------------------------------------------------------*/
181: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
182: {
183:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
184:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
185:   PetscScalar    *x,*y,*v,sum1, sum2;
187:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
188:   PetscInt       n,i,jrow,j;

191:   VecGetArray(xx,&x);
192:   VecGetArray(yy,&y);
193:   idx  = a->j;
194:   v    = a->a;
195:   ii   = a->i;

197:   for (i=0; i<m; i++) {
198:     jrow = ii[i];
199:     n    = ii[i+1] - jrow;
200:     sum1  = 0.0;
201:     sum2  = 0.0;
202:     nonzerorow += (n>0);
203:     for (j=0; j<n; j++) {
204:       sum1 += v[jrow]*x[2*idx[jrow]];
205:       sum2 += v[jrow]*x[2*idx[jrow]+1];
206:       jrow++;
207:      }
208:     y[2*i]   = sum1;
209:     y[2*i+1] = sum2;
210:   }

212:   PetscLogFlops(4*a->nz - 2*nonzerorow);
213:   VecRestoreArray(xx,&x);
214:   VecRestoreArray(yy,&y);
215:   return(0);
216: }

220: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221: {
222:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
223:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
224:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
226:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

229:   VecSet(yy,zero);
230:   VecGetArray(xx,&x);
231:   VecGetArray(yy,&y);
232: 
233:   for (i=0; i<m; i++) {
234:     idx    = a->j + a->i[i] ;
235:     v      = a->a + a->i[i] ;
236:     n      = a->i[i+1] - a->i[i];
237:     alpha1 = x[2*i];
238:     alpha2 = x[2*i+1];
239:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240:   }
241:   PetscLogFlops(4*a->nz);
242:   VecRestoreArray(xx,&x);
243:   VecRestoreArray(yy,&y);
244:   return(0);
245: }

249: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250: {
251:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
252:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
253:   PetscScalar    *x,*y,*v,sum1, sum2;
255:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
256:   PetscInt       n,i,jrow,j;

259:   if (yy != zz) {VecCopy(yy,zz);}
260:   VecGetArray(xx,&x);
261:   VecGetArray(zz,&y);
262:   idx  = a->j;
263:   v    = a->a;
264:   ii   = a->i;

266:   for (i=0; i<m; i++) {
267:     jrow = ii[i];
268:     n    = ii[i+1] - jrow;
269:     sum1  = 0.0;
270:     sum2  = 0.0;
271:     for (j=0; j<n; j++) {
272:       sum1 += v[jrow]*x[2*idx[jrow]];
273:       sum2 += v[jrow]*x[2*idx[jrow]+1];
274:       jrow++;
275:      }
276:     y[2*i]   += sum1;
277:     y[2*i+1] += sum2;
278:   }

280:   PetscLogFlops(4*a->nz);
281:   VecRestoreArray(xx,&x);
282:   VecRestoreArray(zz,&y);
283:   return(0);
284: }
287: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288: {
289:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
290:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
291:   PetscScalar    *x,*y,*v,alpha1,alpha2;
293:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

296:   if (yy != zz) {VecCopy(yy,zz);}
297:   VecGetArray(xx,&x);
298:   VecGetArray(zz,&y);
299: 
300:   for (i=0; i<m; i++) {
301:     idx   = a->j + a->i[i] ;
302:     v     = a->a + a->i[i] ;
303:     n     = a->i[i+1] - a->i[i];
304:     alpha1 = x[2*i];
305:     alpha2 = x[2*i+1];
306:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307:   }
308:   PetscLogFlops(4*a->nz);
309:   VecRestoreArray(xx,&x);
310:   VecRestoreArray(zz,&y);
311:   return(0);
312: }
313: /* --------------------------------------------------------------------------------------*/
316: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317: {
318:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
319:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
320:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
322:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
323:   PetscInt       n,i,jrow,j;

326:   VecGetArray(xx,&x);
327:   VecGetArray(yy,&y);
328:   idx  = a->j;
329:   v    = a->a;
330:   ii   = a->i;

332:   for (i=0; i<m; i++) {
333:     jrow = ii[i];
334:     n    = ii[i+1] - jrow;
335:     sum1  = 0.0;
336:     sum2  = 0.0;
337:     sum3  = 0.0;
338:     nonzerorow += (n>0);
339:     for (j=0; j<n; j++) {
340:       sum1 += v[jrow]*x[3*idx[jrow]];
341:       sum2 += v[jrow]*x[3*idx[jrow]+1];
342:       sum3 += v[jrow]*x[3*idx[jrow]+2];
343:       jrow++;
344:      }
345:     y[3*i]   = sum1;
346:     y[3*i+1] = sum2;
347:     y[3*i+2] = sum3;
348:   }

350:   PetscLogFlops(6*a->nz - 3*nonzerorow);
351:   VecRestoreArray(xx,&x);
352:   VecRestoreArray(yy,&y);
353:   return(0);
354: }

358: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
359: {
360:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
361:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
362:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
364:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

367:   VecSet(yy,zero);
368:   VecGetArray(xx,&x);
369:   VecGetArray(yy,&y);
370: 
371:   for (i=0; i<m; i++) {
372:     idx    = a->j + a->i[i];
373:     v      = a->a + a->i[i];
374:     n      = a->i[i+1] - a->i[i];
375:     alpha1 = x[3*i];
376:     alpha2 = x[3*i+1];
377:     alpha3 = x[3*i+2];
378:     while (n-->0) {
379:       y[3*(*idx)]   += alpha1*(*v);
380:       y[3*(*idx)+1] += alpha2*(*v);
381:       y[3*(*idx)+2] += alpha3*(*v);
382:       idx++; v++;
383:     }
384:   }
385:   PetscLogFlops(6*a->nz);
386:   VecRestoreArray(xx,&x);
387:   VecRestoreArray(yy,&y);
388:   return(0);
389: }

393: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
394: {
395:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
396:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
397:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
399:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
400:   PetscInt       n,i,jrow,j;

403:   if (yy != zz) {VecCopy(yy,zz);}
404:   VecGetArray(xx,&x);
405:   VecGetArray(zz,&y);
406:   idx  = a->j;
407:   v    = a->a;
408:   ii   = a->i;

410:   for (i=0; i<m; i++) {
411:     jrow = ii[i];
412:     n    = ii[i+1] - jrow;
413:     sum1  = 0.0;
414:     sum2  = 0.0;
415:     sum3  = 0.0;
416:     for (j=0; j<n; j++) {
417:       sum1 += v[jrow]*x[3*idx[jrow]];
418:       sum2 += v[jrow]*x[3*idx[jrow]+1];
419:       sum3 += v[jrow]*x[3*idx[jrow]+2];
420:       jrow++;
421:      }
422:     y[3*i]   += sum1;
423:     y[3*i+1] += sum2;
424:     y[3*i+2] += sum3;
425:   }

427:   PetscLogFlops(6*a->nz);
428:   VecRestoreArray(xx,&x);
429:   VecRestoreArray(zz,&y);
430:   return(0);
431: }
434: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
435: {
436:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
437:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
438:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
440:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

443:   if (yy != zz) {VecCopy(yy,zz);}
444:   VecGetArray(xx,&x);
445:   VecGetArray(zz,&y);
446:   for (i=0; i<m; i++) {
447:     idx    = a->j + a->i[i] ;
448:     v      = a->a + a->i[i] ;
449:     n      = a->i[i+1] - a->i[i];
450:     alpha1 = x[3*i];
451:     alpha2 = x[3*i+1];
452:     alpha3 = x[3*i+2];
453:     while (n-->0) {
454:       y[3*(*idx)]   += alpha1*(*v);
455:       y[3*(*idx)+1] += alpha2*(*v);
456:       y[3*(*idx)+2] += alpha3*(*v);
457:       idx++; v++;
458:     }
459:   }
460:   PetscLogFlops(6*a->nz);
461:   VecRestoreArray(xx,&x);
462:   VecRestoreArray(zz,&y);
463:   return(0);
464: }

466: /* ------------------------------------------------------------------------------*/
469: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
470: {
471:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
472:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
473:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
475:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
476:   PetscInt       n,i,jrow,j;

479:   VecGetArray(xx,&x);
480:   VecGetArray(yy,&y);
481:   idx  = a->j;
482:   v    = a->a;
483:   ii   = a->i;

485:   for (i=0; i<m; i++) {
486:     jrow = ii[i];
487:     n    = ii[i+1] - jrow;
488:     sum1  = 0.0;
489:     sum2  = 0.0;
490:     sum3  = 0.0;
491:     sum4  = 0.0;
492:     nonzerorow += (n>0);
493:     for (j=0; j<n; j++) {
494:       sum1 += v[jrow]*x[4*idx[jrow]];
495:       sum2 += v[jrow]*x[4*idx[jrow]+1];
496:       sum3 += v[jrow]*x[4*idx[jrow]+2];
497:       sum4 += v[jrow]*x[4*idx[jrow]+3];
498:       jrow++;
499:      }
500:     y[4*i]   = sum1;
501:     y[4*i+1] = sum2;
502:     y[4*i+2] = sum3;
503:     y[4*i+3] = sum4;
504:   }

506:   PetscLogFlops(8*a->nz - 4*nonzerorow);
507:   VecRestoreArray(xx,&x);
508:   VecRestoreArray(yy,&y);
509:   return(0);
510: }

514: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
515: {
516:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
517:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
518:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
520:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

523:   VecSet(yy,zero);
524:   VecGetArray(xx,&x);
525:   VecGetArray(yy,&y);
526:   for (i=0; i<m; i++) {
527:     idx    = a->j + a->i[i] ;
528:     v      = a->a + a->i[i] ;
529:     n      = a->i[i+1] - a->i[i];
530:     alpha1 = x[4*i];
531:     alpha2 = x[4*i+1];
532:     alpha3 = x[4*i+2];
533:     alpha4 = x[4*i+3];
534:     while (n-->0) {
535:       y[4*(*idx)]   += alpha1*(*v);
536:       y[4*(*idx)+1] += alpha2*(*v);
537:       y[4*(*idx)+2] += alpha3*(*v);
538:       y[4*(*idx)+3] += alpha4*(*v);
539:       idx++; v++;
540:     }
541:   }
542:   PetscLogFlops(8*a->nz);
543:   VecRestoreArray(xx,&x);
544:   VecRestoreArray(yy,&y);
545:   return(0);
546: }

550: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
551: {
552:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
553:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
554:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
556:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
557:   PetscInt       n,i,jrow,j;

560:   if (yy != zz) {VecCopy(yy,zz);}
561:   VecGetArray(xx,&x);
562:   VecGetArray(zz,&y);
563:   idx  = a->j;
564:   v    = a->a;
565:   ii   = a->i;

567:   for (i=0; i<m; i++) {
568:     jrow = ii[i];
569:     n    = ii[i+1] - jrow;
570:     sum1  = 0.0;
571:     sum2  = 0.0;
572:     sum3  = 0.0;
573:     sum4  = 0.0;
574:     for (j=0; j<n; j++) {
575:       sum1 += v[jrow]*x[4*idx[jrow]];
576:       sum2 += v[jrow]*x[4*idx[jrow]+1];
577:       sum3 += v[jrow]*x[4*idx[jrow]+2];
578:       sum4 += v[jrow]*x[4*idx[jrow]+3];
579:       jrow++;
580:      }
581:     y[4*i]   += sum1;
582:     y[4*i+1] += sum2;
583:     y[4*i+2] += sum3;
584:     y[4*i+3] += sum4;
585:   }

587:   PetscLogFlops(8*a->nz);
588:   VecRestoreArray(xx,&x);
589:   VecRestoreArray(zz,&y);
590:   return(0);
591: }
594: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
595: {
596:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
597:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
598:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
600:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

603:   if (yy != zz) {VecCopy(yy,zz);}
604:   VecGetArray(xx,&x);
605:   VecGetArray(zz,&y);
606: 
607:   for (i=0; i<m; i++) {
608:     idx    = a->j + a->i[i] ;
609:     v      = a->a + a->i[i] ;
610:     n      = a->i[i+1] - a->i[i];
611:     alpha1 = x[4*i];
612:     alpha2 = x[4*i+1];
613:     alpha3 = x[4*i+2];
614:     alpha4 = x[4*i+3];
615:     while (n-->0) {
616:       y[4*(*idx)]   += alpha1*(*v);
617:       y[4*(*idx)+1] += alpha2*(*v);
618:       y[4*(*idx)+2] += alpha3*(*v);
619:       y[4*(*idx)+3] += alpha4*(*v);
620:       idx++; v++;
621:     }
622:   }
623:   PetscLogFlops(8*a->nz);
624:   VecRestoreArray(xx,&x);
625:   VecRestoreArray(zz,&y);
626:   return(0);
627: }
628: /* ------------------------------------------------------------------------------*/

632: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
633: {
634:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
635:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
636:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
638:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
639:   PetscInt       n,i,jrow,j;

642:   VecGetArray(xx,&x);
643:   VecGetArray(yy,&y);
644:   idx  = a->j;
645:   v    = a->a;
646:   ii   = a->i;

648:   for (i=0; i<m; i++) {
649:     jrow = ii[i];
650:     n    = ii[i+1] - jrow;
651:     sum1  = 0.0;
652:     sum2  = 0.0;
653:     sum3  = 0.0;
654:     sum4  = 0.0;
655:     sum5  = 0.0;
656:     nonzerorow += (n>0);
657:     for (j=0; j<n; j++) {
658:       sum1 += v[jrow]*x[5*idx[jrow]];
659:       sum2 += v[jrow]*x[5*idx[jrow]+1];
660:       sum3 += v[jrow]*x[5*idx[jrow]+2];
661:       sum4 += v[jrow]*x[5*idx[jrow]+3];
662:       sum5 += v[jrow]*x[5*idx[jrow]+4];
663:       jrow++;
664:      }
665:     y[5*i]   = sum1;
666:     y[5*i+1] = sum2;
667:     y[5*i+2] = sum3;
668:     y[5*i+3] = sum4;
669:     y[5*i+4] = sum5;
670:   }

672:   PetscLogFlops(10*a->nz - 5*nonzerorow);
673:   VecRestoreArray(xx,&x);
674:   VecRestoreArray(yy,&y);
675:   return(0);
676: }

680: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
681: {
682:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
683:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
684:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
686:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

689:   VecSet(yy,zero);
690:   VecGetArray(xx,&x);
691:   VecGetArray(yy,&y);
692: 
693:   for (i=0; i<m; i++) {
694:     idx    = a->j + a->i[i] ;
695:     v      = a->a + a->i[i] ;
696:     n      = a->i[i+1] - a->i[i];
697:     alpha1 = x[5*i];
698:     alpha2 = x[5*i+1];
699:     alpha3 = x[5*i+2];
700:     alpha4 = x[5*i+3];
701:     alpha5 = x[5*i+4];
702:     while (n-->0) {
703:       y[5*(*idx)]   += alpha1*(*v);
704:       y[5*(*idx)+1] += alpha2*(*v);
705:       y[5*(*idx)+2] += alpha3*(*v);
706:       y[5*(*idx)+3] += alpha4*(*v);
707:       y[5*(*idx)+4] += alpha5*(*v);
708:       idx++; v++;
709:     }
710:   }
711:   PetscLogFlops(10*a->nz);
712:   VecRestoreArray(xx,&x);
713:   VecRestoreArray(yy,&y);
714:   return(0);
715: }

719: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
720: {
721:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
722:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
723:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
725:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
726:   PetscInt       n,i,jrow,j;

729:   if (yy != zz) {VecCopy(yy,zz);}
730:   VecGetArray(xx,&x);
731:   VecGetArray(zz,&y);
732:   idx  = a->j;
733:   v    = a->a;
734:   ii   = a->i;

736:   for (i=0; i<m; i++) {
737:     jrow = ii[i];
738:     n    = ii[i+1] - jrow;
739:     sum1  = 0.0;
740:     sum2  = 0.0;
741:     sum3  = 0.0;
742:     sum4  = 0.0;
743:     sum5  = 0.0;
744:     for (j=0; j<n; j++) {
745:       sum1 += v[jrow]*x[5*idx[jrow]];
746:       sum2 += v[jrow]*x[5*idx[jrow]+1];
747:       sum3 += v[jrow]*x[5*idx[jrow]+2];
748:       sum4 += v[jrow]*x[5*idx[jrow]+3];
749:       sum5 += v[jrow]*x[5*idx[jrow]+4];
750:       jrow++;
751:      }
752:     y[5*i]   += sum1;
753:     y[5*i+1] += sum2;
754:     y[5*i+2] += sum3;
755:     y[5*i+3] += sum4;
756:     y[5*i+4] += sum5;
757:   }

759:   PetscLogFlops(10*a->nz);
760:   VecRestoreArray(xx,&x);
761:   VecRestoreArray(zz,&y);
762:   return(0);
763: }

767: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
768: {
769:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
770:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
771:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
773:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

776:   if (yy != zz) {VecCopy(yy,zz);}
777:   VecGetArray(xx,&x);
778:   VecGetArray(zz,&y);
779: 
780:   for (i=0; i<m; i++) {
781:     idx    = a->j + a->i[i] ;
782:     v      = a->a + a->i[i] ;
783:     n      = a->i[i+1] - a->i[i];
784:     alpha1 = x[5*i];
785:     alpha2 = x[5*i+1];
786:     alpha3 = x[5*i+2];
787:     alpha4 = x[5*i+3];
788:     alpha5 = x[5*i+4];
789:     while (n-->0) {
790:       y[5*(*idx)]   += alpha1*(*v);
791:       y[5*(*idx)+1] += alpha2*(*v);
792:       y[5*(*idx)+2] += alpha3*(*v);
793:       y[5*(*idx)+3] += alpha4*(*v);
794:       y[5*(*idx)+4] += alpha5*(*v);
795:       idx++; v++;
796:     }
797:   }
798:   PetscLogFlops(10*a->nz);
799:   VecRestoreArray(xx,&x);
800:   VecRestoreArray(zz,&y);
801:   return(0);
802: }

804: /* ------------------------------------------------------------------------------*/
807: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
808: {
809:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
810:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
811:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
813:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
814:   PetscInt       n,i,jrow,j;

817:   VecGetArray(xx,&x);
818:   VecGetArray(yy,&y);
819:   idx  = a->j;
820:   v    = a->a;
821:   ii   = a->i;

823:   for (i=0; i<m; i++) {
824:     jrow = ii[i];
825:     n    = ii[i+1] - jrow;
826:     sum1  = 0.0;
827:     sum2  = 0.0;
828:     sum3  = 0.0;
829:     sum4  = 0.0;
830:     sum5  = 0.0;
831:     sum6  = 0.0;
832:     nonzerorow += (n>0);
833:     for (j=0; j<n; j++) {
834:       sum1 += v[jrow]*x[6*idx[jrow]];
835:       sum2 += v[jrow]*x[6*idx[jrow]+1];
836:       sum3 += v[jrow]*x[6*idx[jrow]+2];
837:       sum4 += v[jrow]*x[6*idx[jrow]+3];
838:       sum5 += v[jrow]*x[6*idx[jrow]+4];
839:       sum6 += v[jrow]*x[6*idx[jrow]+5];
840:       jrow++;
841:      }
842:     y[6*i]   = sum1;
843:     y[6*i+1] = sum2;
844:     y[6*i+2] = sum3;
845:     y[6*i+3] = sum4;
846:     y[6*i+4] = sum5;
847:     y[6*i+5] = sum6;
848:   }

850:   PetscLogFlops(12*a->nz - 6*nonzerorow);
851:   VecRestoreArray(xx,&x);
852:   VecRestoreArray(yy,&y);
853:   return(0);
854: }

858: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
859: {
860:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
861:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
862:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
864:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

867:   VecSet(yy,zero);
868:   VecGetArray(xx,&x);
869:   VecGetArray(yy,&y);

871:   for (i=0; i<m; i++) {
872:     idx    = a->j + a->i[i] ;
873:     v      = a->a + a->i[i] ;
874:     n      = a->i[i+1] - a->i[i];
875:     alpha1 = x[6*i];
876:     alpha2 = x[6*i+1];
877:     alpha3 = x[6*i+2];
878:     alpha4 = x[6*i+3];
879:     alpha5 = x[6*i+4];
880:     alpha6 = x[6*i+5];
881:     while (n-->0) {
882:       y[6*(*idx)]   += alpha1*(*v);
883:       y[6*(*idx)+1] += alpha2*(*v);
884:       y[6*(*idx)+2] += alpha3*(*v);
885:       y[6*(*idx)+3] += alpha4*(*v);
886:       y[6*(*idx)+4] += alpha5*(*v);
887:       y[6*(*idx)+5] += alpha6*(*v);
888:       idx++; v++;
889:     }
890:   }
891:   PetscLogFlops(12*a->nz);
892:   VecRestoreArray(xx,&x);
893:   VecRestoreArray(yy,&y);
894:   return(0);
895: }

899: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
900: {
901:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
902:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
903:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
905:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
906:   PetscInt       n,i,jrow,j;

909:   if (yy != zz) {VecCopy(yy,zz);}
910:   VecGetArray(xx,&x);
911:   VecGetArray(zz,&y);
912:   idx  = a->j;
913:   v    = a->a;
914:   ii   = a->i;

916:   for (i=0; i<m; i++) {
917:     jrow = ii[i];
918:     n    = ii[i+1] - jrow;
919:     sum1  = 0.0;
920:     sum2  = 0.0;
921:     sum3  = 0.0;
922:     sum4  = 0.0;
923:     sum5  = 0.0;
924:     sum6  = 0.0;
925:     for (j=0; j<n; j++) {
926:       sum1 += v[jrow]*x[6*idx[jrow]];
927:       sum2 += v[jrow]*x[6*idx[jrow]+1];
928:       sum3 += v[jrow]*x[6*idx[jrow]+2];
929:       sum4 += v[jrow]*x[6*idx[jrow]+3];
930:       sum5 += v[jrow]*x[6*idx[jrow]+4];
931:       sum6 += v[jrow]*x[6*idx[jrow]+5];
932:       jrow++;
933:      }
934:     y[6*i]   += sum1;
935:     y[6*i+1] += sum2;
936:     y[6*i+2] += sum3;
937:     y[6*i+3] += sum4;
938:     y[6*i+4] += sum5;
939:     y[6*i+5] += sum6;
940:   }

942:   PetscLogFlops(12*a->nz);
943:   VecRestoreArray(xx,&x);
944:   VecRestoreArray(zz,&y);
945:   return(0);
946: }

950: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
951: {
952:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
953:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
954:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
956:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

959:   if (yy != zz) {VecCopy(yy,zz);}
960:   VecGetArray(xx,&x);
961:   VecGetArray(zz,&y);
962: 
963:   for (i=0; i<m; i++) {
964:     idx    = a->j + a->i[i] ;
965:     v      = a->a + a->i[i] ;
966:     n      = a->i[i+1] - a->i[i];
967:     alpha1 = x[6*i];
968:     alpha2 = x[6*i+1];
969:     alpha3 = x[6*i+2];
970:     alpha4 = x[6*i+3];
971:     alpha5 = x[6*i+4];
972:     alpha6 = x[6*i+5];
973:     while (n-->0) {
974:       y[6*(*idx)]   += alpha1*(*v);
975:       y[6*(*idx)+1] += alpha2*(*v);
976:       y[6*(*idx)+2] += alpha3*(*v);
977:       y[6*(*idx)+3] += alpha4*(*v);
978:       y[6*(*idx)+4] += alpha5*(*v);
979:       y[6*(*idx)+5] += alpha6*(*v);
980:       idx++; v++;
981:     }
982:   }
983:   PetscLogFlops(12*a->nz);
984:   VecRestoreArray(xx,&x);
985:   VecRestoreArray(zz,&y);
986:   return(0);
987: }

989: /* ------------------------------------------------------------------------------*/
992: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
993: {
994:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
995:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
996:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
998:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
999:   PetscInt       n,i,jrow,j;

1002:   VecGetArray(xx,&x);
1003:   VecGetArray(yy,&y);
1004:   idx  = a->j;
1005:   v    = a->a;
1006:   ii   = a->i;

1008:   for (i=0; i<m; i++) {
1009:     jrow = ii[i];
1010:     n    = ii[i+1] - jrow;
1011:     sum1  = 0.0;
1012:     sum2  = 0.0;
1013:     sum3  = 0.0;
1014:     sum4  = 0.0;
1015:     sum5  = 0.0;
1016:     sum6  = 0.0;
1017:     sum7  = 0.0;
1018:     nonzerorow += (n>0);
1019:     for (j=0; j<n; j++) {
1020:       sum1 += v[jrow]*x[7*idx[jrow]];
1021:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1022:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1023:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1024:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1025:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1026:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1027:       jrow++;
1028:      }
1029:     y[7*i]   = sum1;
1030:     y[7*i+1] = sum2;
1031:     y[7*i+2] = sum3;
1032:     y[7*i+3] = sum4;
1033:     y[7*i+4] = sum5;
1034:     y[7*i+5] = sum6;
1035:     y[7*i+6] = sum7;
1036:   }

1038:   PetscLogFlops(14*a->nz - 7*nonzerorow);
1039:   VecRestoreArray(xx,&x);
1040:   VecRestoreArray(yy,&y);
1041:   return(0);
1042: }

1046: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1047: {
1048:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1049:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1050:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1052:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1055:   VecSet(yy,zero);
1056:   VecGetArray(xx,&x);
1057:   VecGetArray(yy,&y);

1059:   for (i=0; i<m; i++) {
1060:     idx    = a->j + a->i[i] ;
1061:     v      = a->a + a->i[i] ;
1062:     n      = a->i[i+1] - a->i[i];
1063:     alpha1 = x[7*i];
1064:     alpha2 = x[7*i+1];
1065:     alpha3 = x[7*i+2];
1066:     alpha4 = x[7*i+3];
1067:     alpha5 = x[7*i+4];
1068:     alpha6 = x[7*i+5];
1069:     alpha7 = x[7*i+6];
1070:     while (n-->0) {
1071:       y[7*(*idx)]   += alpha1*(*v);
1072:       y[7*(*idx)+1] += alpha2*(*v);
1073:       y[7*(*idx)+2] += alpha3*(*v);
1074:       y[7*(*idx)+3] += alpha4*(*v);
1075:       y[7*(*idx)+4] += alpha5*(*v);
1076:       y[7*(*idx)+5] += alpha6*(*v);
1077:       y[7*(*idx)+6] += alpha7*(*v);
1078:       idx++; v++;
1079:     }
1080:   }
1081:   PetscLogFlops(14*a->nz);
1082:   VecRestoreArray(xx,&x);
1083:   VecRestoreArray(yy,&y);
1084:   return(0);
1085: }

1089: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1090: {
1091:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1092:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1093:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1095:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1096:   PetscInt       n,i,jrow,j;

1099:   if (yy != zz) {VecCopy(yy,zz);}
1100:   VecGetArray(xx,&x);
1101:   VecGetArray(zz,&y);
1102:   idx  = a->j;
1103:   v    = a->a;
1104:   ii   = a->i;

1106:   for (i=0; i<m; i++) {
1107:     jrow = ii[i];
1108:     n    = ii[i+1] - jrow;
1109:     sum1  = 0.0;
1110:     sum2  = 0.0;
1111:     sum3  = 0.0;
1112:     sum4  = 0.0;
1113:     sum5  = 0.0;
1114:     sum6  = 0.0;
1115:     sum7  = 0.0;
1116:     for (j=0; j<n; j++) {
1117:       sum1 += v[jrow]*x[7*idx[jrow]];
1118:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1119:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1120:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1121:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1122:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1123:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1124:       jrow++;
1125:      }
1126:     y[7*i]   += sum1;
1127:     y[7*i+1] += sum2;
1128:     y[7*i+2] += sum3;
1129:     y[7*i+3] += sum4;
1130:     y[7*i+4] += sum5;
1131:     y[7*i+5] += sum6;
1132:     y[7*i+6] += sum7;
1133:   }

1135:   PetscLogFlops(14*a->nz);
1136:   VecRestoreArray(xx,&x);
1137:   VecRestoreArray(zz,&y);
1138:   return(0);
1139: }

1143: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1144: {
1145:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1146:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1147:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1149:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1152:   if (yy != zz) {VecCopy(yy,zz);}
1153:   VecGetArray(xx,&x);
1154:   VecGetArray(zz,&y);
1155:   for (i=0; i<m; i++) {
1156:     idx    = a->j + a->i[i] ;
1157:     v      = a->a + a->i[i] ;
1158:     n      = a->i[i+1] - a->i[i];
1159:     alpha1 = x[7*i];
1160:     alpha2 = x[7*i+1];
1161:     alpha3 = x[7*i+2];
1162:     alpha4 = x[7*i+3];
1163:     alpha5 = x[7*i+4];
1164:     alpha6 = x[7*i+5];
1165:     alpha7 = x[7*i+6];
1166:     while (n-->0) {
1167:       y[7*(*idx)]   += alpha1*(*v);
1168:       y[7*(*idx)+1] += alpha2*(*v);
1169:       y[7*(*idx)+2] += alpha3*(*v);
1170:       y[7*(*idx)+3] += alpha4*(*v);
1171:       y[7*(*idx)+4] += alpha5*(*v);
1172:       y[7*(*idx)+5] += alpha6*(*v);
1173:       y[7*(*idx)+6] += alpha7*(*v);
1174:       idx++; v++;
1175:     }
1176:   }
1177:   PetscLogFlops(14*a->nz);
1178:   VecRestoreArray(xx,&x);
1179:   VecRestoreArray(zz,&y);
1180:   return(0);
1181: }

1185: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1186: {
1187:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1188:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1189:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1191:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1192:   PetscInt       n,i,jrow,j;

1195:   VecGetArray(xx,&x);
1196:   VecGetArray(yy,&y);
1197:   idx  = a->j;
1198:   v    = a->a;
1199:   ii   = a->i;

1201:   for (i=0; i<m; i++) {
1202:     jrow = ii[i];
1203:     n    = ii[i+1] - jrow;
1204:     sum1  = 0.0;
1205:     sum2  = 0.0;
1206:     sum3  = 0.0;
1207:     sum4  = 0.0;
1208:     sum5  = 0.0;
1209:     sum6  = 0.0;
1210:     sum7  = 0.0;
1211:     sum8  = 0.0;
1212:     nonzerorow += (n>0);
1213:     for (j=0; j<n; j++) {
1214:       sum1 += v[jrow]*x[8*idx[jrow]];
1215:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1216:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1217:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1218:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1219:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1220:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1221:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1222:       jrow++;
1223:      }
1224:     y[8*i]   = sum1;
1225:     y[8*i+1] = sum2;
1226:     y[8*i+2] = sum3;
1227:     y[8*i+3] = sum4;
1228:     y[8*i+4] = sum5;
1229:     y[8*i+5] = sum6;
1230:     y[8*i+6] = sum7;
1231:     y[8*i+7] = sum8;
1232:   }

1234:   PetscLogFlops(16*a->nz - 8*nonzerorow);
1235:   VecRestoreArray(xx,&x);
1236:   VecRestoreArray(yy,&y);
1237:   return(0);
1238: }

1242: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1243: {
1244:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1245:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1246:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1248:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1251:   VecSet(yy,zero);
1252:   VecGetArray(xx,&x);
1253:   VecGetArray(yy,&y);

1255:   for (i=0; i<m; i++) {
1256:     idx    = a->j + a->i[i] ;
1257:     v      = a->a + a->i[i] ;
1258:     n      = a->i[i+1] - a->i[i];
1259:     alpha1 = x[8*i];
1260:     alpha2 = x[8*i+1];
1261:     alpha3 = x[8*i+2];
1262:     alpha4 = x[8*i+3];
1263:     alpha5 = x[8*i+4];
1264:     alpha6 = x[8*i+5];
1265:     alpha7 = x[8*i+6];
1266:     alpha8 = x[8*i+7];
1267:     while (n-->0) {
1268:       y[8*(*idx)]   += alpha1*(*v);
1269:       y[8*(*idx)+1] += alpha2*(*v);
1270:       y[8*(*idx)+2] += alpha3*(*v);
1271:       y[8*(*idx)+3] += alpha4*(*v);
1272:       y[8*(*idx)+4] += alpha5*(*v);
1273:       y[8*(*idx)+5] += alpha6*(*v);
1274:       y[8*(*idx)+6] += alpha7*(*v);
1275:       y[8*(*idx)+7] += alpha8*(*v);
1276:       idx++; v++;
1277:     }
1278:   }
1279:   PetscLogFlops(16*a->nz);
1280:   VecRestoreArray(xx,&x);
1281:   VecRestoreArray(yy,&y);
1282:   return(0);
1283: }

1287: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1288: {
1289:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1290:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1291:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1293:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1294:   PetscInt       n,i,jrow,j;

1297:   if (yy != zz) {VecCopy(yy,zz);}
1298:   VecGetArray(xx,&x);
1299:   VecGetArray(zz,&y);
1300:   idx  = a->j;
1301:   v    = a->a;
1302:   ii   = a->i;

1304:   for (i=0; i<m; i++) {
1305:     jrow = ii[i];
1306:     n    = ii[i+1] - jrow;
1307:     sum1  = 0.0;
1308:     sum2  = 0.0;
1309:     sum3  = 0.0;
1310:     sum4  = 0.0;
1311:     sum5  = 0.0;
1312:     sum6  = 0.0;
1313:     sum7  = 0.0;
1314:     sum8  = 0.0;
1315:     for (j=0; j<n; j++) {
1316:       sum1 += v[jrow]*x[8*idx[jrow]];
1317:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1318:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1319:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1320:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1321:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1322:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1323:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1324:       jrow++;
1325:      }
1326:     y[8*i]   += sum1;
1327:     y[8*i+1] += sum2;
1328:     y[8*i+2] += sum3;
1329:     y[8*i+3] += sum4;
1330:     y[8*i+4] += sum5;
1331:     y[8*i+5] += sum6;
1332:     y[8*i+6] += sum7;
1333:     y[8*i+7] += sum8;
1334:   }

1336:   PetscLogFlops(16*a->nz);
1337:   VecRestoreArray(xx,&x);
1338:   VecRestoreArray(zz,&y);
1339:   return(0);
1340: }

1344: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1345: {
1346:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1347:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1348:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1350:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1353:   if (yy != zz) {VecCopy(yy,zz);}
1354:   VecGetArray(xx,&x);
1355:   VecGetArray(zz,&y);
1356:   for (i=0; i<m; i++) {
1357:     idx    = a->j + a->i[i] ;
1358:     v      = a->a + a->i[i] ;
1359:     n      = a->i[i+1] - a->i[i];
1360:     alpha1 = x[8*i];
1361:     alpha2 = x[8*i+1];
1362:     alpha3 = x[8*i+2];
1363:     alpha4 = x[8*i+3];
1364:     alpha5 = x[8*i+4];
1365:     alpha6 = x[8*i+5];
1366:     alpha7 = x[8*i+6];
1367:     alpha8 = x[8*i+7];
1368:     while (n-->0) {
1369:       y[8*(*idx)]   += alpha1*(*v);
1370:       y[8*(*idx)+1] += alpha2*(*v);
1371:       y[8*(*idx)+2] += alpha3*(*v);
1372:       y[8*(*idx)+3] += alpha4*(*v);
1373:       y[8*(*idx)+4] += alpha5*(*v);
1374:       y[8*(*idx)+5] += alpha6*(*v);
1375:       y[8*(*idx)+6] += alpha7*(*v);
1376:       y[8*(*idx)+7] += alpha8*(*v);
1377:       idx++; v++;
1378:     }
1379:   }
1380:   PetscLogFlops(16*a->nz);
1381:   VecRestoreArray(xx,&x);
1382:   VecRestoreArray(zz,&y);
1383:   return(0);
1384: }

1386: /* ------------------------------------------------------------------------------*/
1389: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1390: {
1391:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1392:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1393:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1395:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1396:   PetscInt       n,i,jrow,j;

1399:   VecGetArray(xx,&x);
1400:   VecGetArray(yy,&y);
1401:   idx  = a->j;
1402:   v    = a->a;
1403:   ii   = a->i;

1405:   for (i=0; i<m; i++) {
1406:     jrow = ii[i];
1407:     n    = ii[i+1] - jrow;
1408:     sum1  = 0.0;
1409:     sum2  = 0.0;
1410:     sum3  = 0.0;
1411:     sum4  = 0.0;
1412:     sum5  = 0.0;
1413:     sum6  = 0.0;
1414:     sum7  = 0.0;
1415:     sum8  = 0.0;
1416:     sum9  = 0.0;
1417:     nonzerorow += (n>0);
1418:     for (j=0; j<n; j++) {
1419:       sum1 += v[jrow]*x[9*idx[jrow]];
1420:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1421:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1422:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1423:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1424:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1425:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1426:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1427:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1428:       jrow++;
1429:      }
1430:     y[9*i]   = sum1;
1431:     y[9*i+1] = sum2;
1432:     y[9*i+2] = sum3;
1433:     y[9*i+3] = sum4;
1434:     y[9*i+4] = sum5;
1435:     y[9*i+5] = sum6;
1436:     y[9*i+6] = sum7;
1437:     y[9*i+7] = sum8;
1438:     y[9*i+8] = sum9;
1439:   }

1441:   PetscLogFlops(18*a->nz - 9*nonzerorow);
1442:   VecRestoreArray(xx,&x);
1443:   VecRestoreArray(yy,&y);
1444:   return(0);
1445: }

1447: /* ------------------------------------------------------------------------------*/

1451: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1452: {
1453:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1454:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1455:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1457:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1460:   VecSet(yy,zero);
1461:   VecGetArray(xx,&x);
1462:   VecGetArray(yy,&y);

1464:   for (i=0; i<m; i++) {
1465:     idx    = a->j + a->i[i] ;
1466:     v      = a->a + a->i[i] ;
1467:     n      = a->i[i+1] - a->i[i];
1468:     alpha1 = x[9*i];
1469:     alpha2 = x[9*i+1];
1470:     alpha3 = x[9*i+2];
1471:     alpha4 = x[9*i+3];
1472:     alpha5 = x[9*i+4];
1473:     alpha6 = x[9*i+5];
1474:     alpha7 = x[9*i+6];
1475:     alpha8 = x[9*i+7];
1476:     alpha9 = x[9*i+8];
1477:     while (n-->0) {
1478:       y[9*(*idx)]   += alpha1*(*v);
1479:       y[9*(*idx)+1] += alpha2*(*v);
1480:       y[9*(*idx)+2] += alpha3*(*v);
1481:       y[9*(*idx)+3] += alpha4*(*v);
1482:       y[9*(*idx)+4] += alpha5*(*v);
1483:       y[9*(*idx)+5] += alpha6*(*v);
1484:       y[9*(*idx)+6] += alpha7*(*v);
1485:       y[9*(*idx)+7] += alpha8*(*v);
1486:       y[9*(*idx)+8] += alpha9*(*v);
1487:       idx++; v++;
1488:     }
1489:   }
1490:   PetscLogFlops(18*a->nz);
1491:   VecRestoreArray(xx,&x);
1492:   VecRestoreArray(yy,&y);
1493:   return(0);
1494: }

1498: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1499: {
1500:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1501:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1502:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1504:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1505:   PetscInt       n,i,jrow,j;

1508:   if (yy != zz) {VecCopy(yy,zz);}
1509:   VecGetArray(xx,&x);
1510:   VecGetArray(zz,&y);
1511:   idx  = a->j;
1512:   v    = a->a;
1513:   ii   = a->i;

1515:   for (i=0; i<m; i++) {
1516:     jrow = ii[i];
1517:     n    = ii[i+1] - jrow;
1518:     sum1  = 0.0;
1519:     sum2  = 0.0;
1520:     sum3  = 0.0;
1521:     sum4  = 0.0;
1522:     sum5  = 0.0;
1523:     sum6  = 0.0;
1524:     sum7  = 0.0;
1525:     sum8  = 0.0;
1526:     sum9  = 0.0;
1527:     for (j=0; j<n; j++) {
1528:       sum1 += v[jrow]*x[9*idx[jrow]];
1529:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1530:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1531:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1532:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1533:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1534:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1535:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1536:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1537:       jrow++;
1538:      }
1539:     y[9*i]   += sum1;
1540:     y[9*i+1] += sum2;
1541:     y[9*i+2] += sum3;
1542:     y[9*i+3] += sum4;
1543:     y[9*i+4] += sum5;
1544:     y[9*i+5] += sum6;
1545:     y[9*i+6] += sum7;
1546:     y[9*i+7] += sum8;
1547:     y[9*i+8] += sum9;
1548:   }

1550:   PetscLogFlops(18*a->nz);
1551:   VecRestoreArray(xx,&x);
1552:   VecRestoreArray(zz,&y);
1553:   return(0);
1554: }

1558: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1559: {
1560:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1561:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1562:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1564:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1567:   if (yy != zz) {VecCopy(yy,zz);}
1568:   VecGetArray(xx,&x);
1569:   VecGetArray(zz,&y);
1570:   for (i=0; i<m; i++) {
1571:     idx    = a->j + a->i[i] ;
1572:     v      = a->a + a->i[i] ;
1573:     n      = a->i[i+1] - a->i[i];
1574:     alpha1 = x[9*i];
1575:     alpha2 = x[9*i+1];
1576:     alpha3 = x[9*i+2];
1577:     alpha4 = x[9*i+3];
1578:     alpha5 = x[9*i+4];
1579:     alpha6 = x[9*i+5];
1580:     alpha7 = x[9*i+6];
1581:     alpha8 = x[9*i+7];
1582:     alpha9 = x[9*i+8];
1583:     while (n-->0) {
1584:       y[9*(*idx)]   += alpha1*(*v);
1585:       y[9*(*idx)+1] += alpha2*(*v);
1586:       y[9*(*idx)+2] += alpha3*(*v);
1587:       y[9*(*idx)+3] += alpha4*(*v);
1588:       y[9*(*idx)+4] += alpha5*(*v);
1589:       y[9*(*idx)+5] += alpha6*(*v);
1590:       y[9*(*idx)+6] += alpha7*(*v);
1591:       y[9*(*idx)+7] += alpha8*(*v);
1592:       y[9*(*idx)+8] += alpha9*(*v);
1593:       idx++; v++;
1594:     }
1595:   }
1596:   PetscLogFlops(18*a->nz);
1597:   VecRestoreArray(xx,&x);
1598:   VecRestoreArray(zz,&y);
1599:   return(0);
1600: }
1603: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1604: {
1605:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1606:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1607:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1609:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1610:   PetscInt       n,i,jrow,j;

1613:   VecGetArray(xx,&x);
1614:   VecGetArray(yy,&y);
1615:   idx  = a->j;
1616:   v    = a->a;
1617:   ii   = a->i;

1619:   for (i=0; i<m; i++) {
1620:     jrow = ii[i];
1621:     n    = ii[i+1] - jrow;
1622:     sum1  = 0.0;
1623:     sum2  = 0.0;
1624:     sum3  = 0.0;
1625:     sum4  = 0.0;
1626:     sum5  = 0.0;
1627:     sum6  = 0.0;
1628:     sum7  = 0.0;
1629:     sum8  = 0.0;
1630:     sum9  = 0.0;
1631:     sum10 = 0.0;
1632:     nonzerorow += (n>0);
1633:     for (j=0; j<n; j++) {
1634:       sum1  += v[jrow]*x[10*idx[jrow]];
1635:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1636:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1637:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1638:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1639:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1640:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1641:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1642:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1643:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1644:       jrow++;
1645:      }
1646:     y[10*i]   = sum1;
1647:     y[10*i+1] = sum2;
1648:     y[10*i+2] = sum3;
1649:     y[10*i+3] = sum4;
1650:     y[10*i+4] = sum5;
1651:     y[10*i+5] = sum6;
1652:     y[10*i+6] = sum7;
1653:     y[10*i+7] = sum8;
1654:     y[10*i+8] = sum9;
1655:     y[10*i+9] = sum10;
1656:   }

1658:   PetscLogFlops(20*a->nz - 10*nonzerorow);
1659:   VecRestoreArray(xx,&x);
1660:   VecRestoreArray(yy,&y);
1661:   return(0);
1662: }

1666: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1667: {
1668:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1669:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1670:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1672:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1673:   PetscInt       n,i,jrow,j;

1676:   if (yy != zz) {VecCopy(yy,zz);}
1677:   VecGetArray(xx,&x);
1678:   VecGetArray(zz,&y);
1679:   idx  = a->j;
1680:   v    = a->a;
1681:   ii   = a->i;

1683:   for (i=0; i<m; i++) {
1684:     jrow = ii[i];
1685:     n    = ii[i+1] - jrow;
1686:     sum1  = 0.0;
1687:     sum2  = 0.0;
1688:     sum3  = 0.0;
1689:     sum4  = 0.0;
1690:     sum5  = 0.0;
1691:     sum6  = 0.0;
1692:     sum7  = 0.0;
1693:     sum8  = 0.0;
1694:     sum9  = 0.0;
1695:     sum10 = 0.0;
1696:     for (j=0; j<n; j++) {
1697:       sum1  += v[jrow]*x[10*idx[jrow]];
1698:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1699:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1700:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1701:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1702:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1703:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1704:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1705:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1706:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1707:       jrow++;
1708:      }
1709:     y[10*i]   += sum1;
1710:     y[10*i+1] += sum2;
1711:     y[10*i+2] += sum3;
1712:     y[10*i+3] += sum4;
1713:     y[10*i+4] += sum5;
1714:     y[10*i+5] += sum6;
1715:     y[10*i+6] += sum7;
1716:     y[10*i+7] += sum8;
1717:     y[10*i+8] += sum9;
1718:     y[10*i+9] += sum10;
1719:   }

1721:   PetscLogFlops(20*a->nz);
1722:   VecRestoreArray(xx,&x);
1723:   VecRestoreArray(yy,&y);
1724:   return(0);
1725: }

1729: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1730: {
1731:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1732:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1733:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1735:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1738:   VecSet(yy,zero);
1739:   VecGetArray(xx,&x);
1740:   VecGetArray(yy,&y);

1742:   for (i=0; i<m; i++) {
1743:     idx    = a->j + a->i[i] ;
1744:     v      = a->a + a->i[i] ;
1745:     n      = a->i[i+1] - a->i[i];
1746:     alpha1 = x[10*i];
1747:     alpha2 = x[10*i+1];
1748:     alpha3 = x[10*i+2];
1749:     alpha4 = x[10*i+3];
1750:     alpha5 = x[10*i+4];
1751:     alpha6 = x[10*i+5];
1752:     alpha7 = x[10*i+6];
1753:     alpha8 = x[10*i+7];
1754:     alpha9 = x[10*i+8];
1755:     alpha10 = x[10*i+9];
1756:     while (n-->0) {
1757:       y[10*(*idx)]   += alpha1*(*v);
1758:       y[10*(*idx)+1] += alpha2*(*v);
1759:       y[10*(*idx)+2] += alpha3*(*v);
1760:       y[10*(*idx)+3] += alpha4*(*v);
1761:       y[10*(*idx)+4] += alpha5*(*v);
1762:       y[10*(*idx)+5] += alpha6*(*v);
1763:       y[10*(*idx)+6] += alpha7*(*v);
1764:       y[10*(*idx)+7] += alpha8*(*v);
1765:       y[10*(*idx)+8] += alpha9*(*v);
1766:       y[10*(*idx)+9] += alpha10*(*v);
1767:       idx++; v++;
1768:     }
1769:   }
1770:   PetscLogFlops(20*a->nz);
1771:   VecRestoreArray(xx,&x);
1772:   VecRestoreArray(yy,&y);
1773:   return(0);
1774: }

1778: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1779: {
1780:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1781:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1782:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1784:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1787:   if (yy != zz) {VecCopy(yy,zz);}
1788:   VecGetArray(xx,&x);
1789:   VecGetArray(zz,&y);
1790:   for (i=0; i<m; i++) {
1791:     idx    = a->j + a->i[i] ;
1792:     v      = a->a + a->i[i] ;
1793:     n      = a->i[i+1] - a->i[i];
1794:     alpha1 = x[10*i];
1795:     alpha2 = x[10*i+1];
1796:     alpha3 = x[10*i+2];
1797:     alpha4 = x[10*i+3];
1798:     alpha5 = x[10*i+4];
1799:     alpha6 = x[10*i+5];
1800:     alpha7 = x[10*i+6];
1801:     alpha8 = x[10*i+7];
1802:     alpha9 = x[10*i+8];
1803:     alpha10 = x[10*i+9];
1804:     while (n-->0) {
1805:       y[10*(*idx)]   += alpha1*(*v);
1806:       y[10*(*idx)+1] += alpha2*(*v);
1807:       y[10*(*idx)+2] += alpha3*(*v);
1808:       y[10*(*idx)+3] += alpha4*(*v);
1809:       y[10*(*idx)+4] += alpha5*(*v);
1810:       y[10*(*idx)+5] += alpha6*(*v);
1811:       y[10*(*idx)+6] += alpha7*(*v);
1812:       y[10*(*idx)+7] += alpha8*(*v);
1813:       y[10*(*idx)+8] += alpha9*(*v);
1814:       y[10*(*idx)+9] += alpha10*(*v);
1815:       idx++; v++;
1816:     }
1817:   }
1818:   PetscLogFlops(20*a->nz);
1819:   VecRestoreArray(xx,&x);
1820:   VecRestoreArray(zz,&y);
1821:   return(0);
1822: }


1825: /*--------------------------------------------------------------------------------------------*/
1828: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1829: {
1830:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1831:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1832:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1834:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1835:   PetscInt       n,i,jrow,j;

1838:   VecGetArray(xx,&x);
1839:   VecGetArray(yy,&y);
1840:   idx  = a->j;
1841:   v    = a->a;
1842:   ii   = a->i;

1844:   for (i=0; i<m; i++) {
1845:     jrow = ii[i];
1846:     n    = ii[i+1] - jrow;
1847:     sum1  = 0.0;
1848:     sum2  = 0.0;
1849:     sum3  = 0.0;
1850:     sum4  = 0.0;
1851:     sum5  = 0.0;
1852:     sum6  = 0.0;
1853:     sum7  = 0.0;
1854:     sum8  = 0.0;
1855:     sum9  = 0.0;
1856:     sum10 = 0.0;
1857:     sum11 = 0.0;
1858:     nonzerorow += (n>0);
1859:     for (j=0; j<n; j++) {
1860:       sum1  += v[jrow]*x[11*idx[jrow]];
1861:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1862:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1863:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1864:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1865:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1866:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1867:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1868:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1869:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1870:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1871:       jrow++;
1872:      }
1873:     y[11*i]   = sum1;
1874:     y[11*i+1] = sum2;
1875:     y[11*i+2] = sum3;
1876:     y[11*i+3] = sum4;
1877:     y[11*i+4] = sum5;
1878:     y[11*i+5] = sum6;
1879:     y[11*i+6] = sum7;
1880:     y[11*i+7] = sum8;
1881:     y[11*i+8] = sum9;
1882:     y[11*i+9] = sum10;
1883:     y[11*i+10] = sum11;
1884:   }

1886:   PetscLogFlops(22*a->nz - 11*nonzerorow);
1887:   VecRestoreArray(xx,&x);
1888:   VecRestoreArray(yy,&y);
1889:   return(0);
1890: }

1894: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1895: {
1896:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1897:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1898:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1900:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
1901:   PetscInt       n,i,jrow,j;

1904:   if (yy != zz) {VecCopy(yy,zz);}
1905:   VecGetArray(xx,&x);
1906:   VecGetArray(zz,&y);
1907:   idx  = a->j;
1908:   v    = a->a;
1909:   ii   = a->i;

1911:   for (i=0; i<m; i++) {
1912:     jrow = ii[i];
1913:     n    = ii[i+1] - jrow;
1914:     sum1  = 0.0;
1915:     sum2  = 0.0;
1916:     sum3  = 0.0;
1917:     sum4  = 0.0;
1918:     sum5  = 0.0;
1919:     sum6  = 0.0;
1920:     sum7  = 0.0;
1921:     sum8  = 0.0;
1922:     sum9  = 0.0;
1923:     sum10 = 0.0;
1924:     sum11 = 0.0;
1925:     for (j=0; j<n; j++) {
1926:       sum1  += v[jrow]*x[11*idx[jrow]];
1927:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1928:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1929:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1930:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1931:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1932:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1933:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1934:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1935:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1936:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1937:       jrow++;
1938:      }
1939:     y[11*i]   += sum1;
1940:     y[11*i+1] += sum2;
1941:     y[11*i+2] += sum3;
1942:     y[11*i+3] += sum4;
1943:     y[11*i+4] += sum5;
1944:     y[11*i+5] += sum6;
1945:     y[11*i+6] += sum7;
1946:     y[11*i+7] += sum8;
1947:     y[11*i+8] += sum9;
1948:     y[11*i+9] += sum10;
1949:     y[11*i+10] += sum11;
1950:   }

1952:   PetscLogFlops(22*a->nz);
1953:   VecRestoreArray(xx,&x);
1954:   VecRestoreArray(yy,&y);
1955:   return(0);
1956: }

1960: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1961: {
1962:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1963:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1964:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11,zero = 0.0;
1966:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

1969:   VecSet(yy,zero);
1970:   VecGetArray(xx,&x);
1971:   VecGetArray(yy,&y);

1973:   for (i=0; i<m; i++) {
1974:     idx    = a->j + a->i[i] ;
1975:     v      = a->a + a->i[i] ;
1976:     n      = a->i[i+1] - a->i[i];
1977:     alpha1 = x[11*i];
1978:     alpha2 = x[11*i+1];
1979:     alpha3 = x[11*i+2];
1980:     alpha4 = x[11*i+3];
1981:     alpha5 = x[11*i+4];
1982:     alpha6 = x[11*i+5];
1983:     alpha7 = x[11*i+6];
1984:     alpha8 = x[11*i+7];
1985:     alpha9 = x[11*i+8];
1986:     alpha10 = x[11*i+9];
1987:     alpha11 = x[11*i+10];
1988:     while (n-->0) {
1989:       y[11*(*idx)]   += alpha1*(*v);
1990:       y[11*(*idx)+1] += alpha2*(*v);
1991:       y[11*(*idx)+2] += alpha3*(*v);
1992:       y[11*(*idx)+3] += alpha4*(*v);
1993:       y[11*(*idx)+4] += alpha5*(*v);
1994:       y[11*(*idx)+5] += alpha6*(*v);
1995:       y[11*(*idx)+6] += alpha7*(*v);
1996:       y[11*(*idx)+7] += alpha8*(*v);
1997:       y[11*(*idx)+8] += alpha9*(*v);
1998:       y[11*(*idx)+9] += alpha10*(*v);
1999:       y[11*(*idx)+10] += alpha11*(*v);
2000:       idx++; v++;
2001:     }
2002:   }
2003:   PetscLogFlops(22*a->nz);
2004:   VecRestoreArray(xx,&x);
2005:   VecRestoreArray(yy,&y);
2006:   return(0);
2007: }

2011: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2012: {
2013:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2014:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2015:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2017:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

2020:   if (yy != zz) {VecCopy(yy,zz);}
2021:   VecGetArray(xx,&x);
2022:   VecGetArray(zz,&y);
2023:   for (i=0; i<m; i++) {
2024:     idx    = a->j + a->i[i] ;
2025:     v      = a->a + a->i[i] ;
2026:     n      = a->i[i+1] - a->i[i];
2027:     alpha1 = x[11*i];
2028:     alpha2 = x[11*i+1];
2029:     alpha3 = x[11*i+2];
2030:     alpha4 = x[11*i+3];
2031:     alpha5 = x[11*i+4];
2032:     alpha6 = x[11*i+5];
2033:     alpha7 = x[11*i+6];
2034:     alpha8 = x[11*i+7];
2035:     alpha9 = x[11*i+8];
2036:     alpha10 = x[11*i+9];
2037:     alpha11 = x[11*i+10];
2038:     while (n-->0) {
2039:       y[11*(*idx)]   += alpha1*(*v);
2040:       y[11*(*idx)+1] += alpha2*(*v);
2041:       y[11*(*idx)+2] += alpha3*(*v);
2042:       y[11*(*idx)+3] += alpha4*(*v);
2043:       y[11*(*idx)+4] += alpha5*(*v);
2044:       y[11*(*idx)+5] += alpha6*(*v);
2045:       y[11*(*idx)+6] += alpha7*(*v);
2046:       y[11*(*idx)+7] += alpha8*(*v);
2047:       y[11*(*idx)+8] += alpha9*(*v);
2048:       y[11*(*idx)+9] += alpha10*(*v);
2049:       y[11*(*idx)+10] += alpha11*(*v);
2050:       idx++; v++;
2051:     }
2052:   }
2053:   PetscLogFlops(22*a->nz);
2054:   VecRestoreArray(xx,&x);
2055:   VecRestoreArray(zz,&y);
2056:   return(0);
2057: }


2060: /*--------------------------------------------------------------------------------------------*/
2063: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2064: {
2065:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2066:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2067:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2068:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2070:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
2071:   PetscInt       n,i,jrow,j;

2074:   VecGetArray(xx,&x);
2075:   VecGetArray(yy,&y);
2076:   idx  = a->j;
2077:   v    = a->a;
2078:   ii   = a->i;

2080:   for (i=0; i<m; i++) {
2081:     jrow = ii[i];
2082:     n    = ii[i+1] - jrow;
2083:     sum1  = 0.0;
2084:     sum2  = 0.0;
2085:     sum3  = 0.0;
2086:     sum4  = 0.0;
2087:     sum5  = 0.0;
2088:     sum6  = 0.0;
2089:     sum7  = 0.0;
2090:     sum8  = 0.0;
2091:     sum9  = 0.0;
2092:     sum10 = 0.0;
2093:     sum11 = 0.0;
2094:     sum12 = 0.0;
2095:     sum13 = 0.0;
2096:     sum14 = 0.0;
2097:     sum15 = 0.0;
2098:     sum16 = 0.0;
2099:     nonzerorow += (n>0);
2100:     for (j=0; j<n; j++) {
2101:       sum1  += v[jrow]*x[16*idx[jrow]];
2102:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2103:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2104:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2105:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2106:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2107:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2108:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2109:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2110:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2111:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2112:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2113:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2114:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2115:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2116:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2117:       jrow++;
2118:      }
2119:     y[16*i]    = sum1;
2120:     y[16*i+1]  = sum2;
2121:     y[16*i+2]  = sum3;
2122:     y[16*i+3]  = sum4;
2123:     y[16*i+4]  = sum5;
2124:     y[16*i+5]  = sum6;
2125:     y[16*i+6]  = sum7;
2126:     y[16*i+7]  = sum8;
2127:     y[16*i+8]  = sum9;
2128:     y[16*i+9]  = sum10;
2129:     y[16*i+10] = sum11;
2130:     y[16*i+11] = sum12;
2131:     y[16*i+12] = sum13;
2132:     y[16*i+13] = sum14;
2133:     y[16*i+14] = sum15;
2134:     y[16*i+15] = sum16;
2135:   }

2137:   PetscLogFlops(32*a->nz - 16*nonzerorow);
2138:   VecRestoreArray(xx,&x);
2139:   VecRestoreArray(yy,&y);
2140:   return(0);
2141: }

2145: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2146: {
2147:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2148:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2149:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2150:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2152:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

2155:   VecSet(yy,zero);
2156:   VecGetArray(xx,&x);
2157:   VecGetArray(yy,&y);

2159:   for (i=0; i<m; i++) {
2160:     idx    = a->j + a->i[i] ;
2161:     v      = a->a + a->i[i] ;
2162:     n      = a->i[i+1] - a->i[i];
2163:     alpha1  = x[16*i];
2164:     alpha2  = x[16*i+1];
2165:     alpha3  = x[16*i+2];
2166:     alpha4  = x[16*i+3];
2167:     alpha5  = x[16*i+4];
2168:     alpha6  = x[16*i+5];
2169:     alpha7  = x[16*i+6];
2170:     alpha8  = x[16*i+7];
2171:     alpha9  = x[16*i+8];
2172:     alpha10 = x[16*i+9];
2173:     alpha11 = x[16*i+10];
2174:     alpha12 = x[16*i+11];
2175:     alpha13 = x[16*i+12];
2176:     alpha14 = x[16*i+13];
2177:     alpha15 = x[16*i+14];
2178:     alpha16 = x[16*i+15];
2179:     while (n-->0) {
2180:       y[16*(*idx)]    += alpha1*(*v);
2181:       y[16*(*idx)+1]  += alpha2*(*v);
2182:       y[16*(*idx)+2]  += alpha3*(*v);
2183:       y[16*(*idx)+3]  += alpha4*(*v);
2184:       y[16*(*idx)+4]  += alpha5*(*v);
2185:       y[16*(*idx)+5]  += alpha6*(*v);
2186:       y[16*(*idx)+6]  += alpha7*(*v);
2187:       y[16*(*idx)+7]  += alpha8*(*v);
2188:       y[16*(*idx)+8]  += alpha9*(*v);
2189:       y[16*(*idx)+9]  += alpha10*(*v);
2190:       y[16*(*idx)+10] += alpha11*(*v);
2191:       y[16*(*idx)+11] += alpha12*(*v);
2192:       y[16*(*idx)+12] += alpha13*(*v);
2193:       y[16*(*idx)+13] += alpha14*(*v);
2194:       y[16*(*idx)+14] += alpha15*(*v);
2195:       y[16*(*idx)+15] += alpha16*(*v);
2196:       idx++; v++;
2197:     }
2198:   }
2199:   PetscLogFlops(32*a->nz);
2200:   VecRestoreArray(xx,&x);
2201:   VecRestoreArray(yy,&y);
2202:   return(0);
2203: }

2207: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2208: {
2209:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2210:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2211:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2212:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2214:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
2215:   PetscInt       n,i,jrow,j;

2218:   if (yy != zz) {VecCopy(yy,zz);}
2219:   VecGetArray(xx,&x);
2220:   VecGetArray(zz,&y);
2221:   idx  = a->j;
2222:   v    = a->a;
2223:   ii   = a->i;

2225:   for (i=0; i<m; i++) {
2226:     jrow = ii[i];
2227:     n    = ii[i+1] - jrow;
2228:     sum1  = 0.0;
2229:     sum2  = 0.0;
2230:     sum3  = 0.0;
2231:     sum4  = 0.0;
2232:     sum5  = 0.0;
2233:     sum6  = 0.0;
2234:     sum7  = 0.0;
2235:     sum8  = 0.0;
2236:     sum9  = 0.0;
2237:     sum10 = 0.0;
2238:     sum11 = 0.0;
2239:     sum12 = 0.0;
2240:     sum13 = 0.0;
2241:     sum14 = 0.0;
2242:     sum15 = 0.0;
2243:     sum16 = 0.0;
2244:     for (j=0; j<n; j++) {
2245:       sum1  += v[jrow]*x[16*idx[jrow]];
2246:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2247:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2248:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2249:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2250:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2251:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2252:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2253:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2254:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2255:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2256:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2257:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2258:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2259:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2260:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2261:       jrow++;
2262:      }
2263:     y[16*i]    += sum1;
2264:     y[16*i+1]  += sum2;
2265:     y[16*i+2]  += sum3;
2266:     y[16*i+3]  += sum4;
2267:     y[16*i+4]  += sum5;
2268:     y[16*i+5]  += sum6;
2269:     y[16*i+6]  += sum7;
2270:     y[16*i+7]  += sum8;
2271:     y[16*i+8]  += sum9;
2272:     y[16*i+9]  += sum10;
2273:     y[16*i+10] += sum11;
2274:     y[16*i+11] += sum12;
2275:     y[16*i+12] += sum13;
2276:     y[16*i+13] += sum14;
2277:     y[16*i+14] += sum15;
2278:     y[16*i+15] += sum16;
2279:   }

2281:   PetscLogFlops(32*a->nz);
2282:   VecRestoreArray(xx,&x);
2283:   VecRestoreArray(zz,&y);
2284:   return(0);
2285: }

2289: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2290: {
2291:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2292:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2293:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2294:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2296:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

2299:   if (yy != zz) {VecCopy(yy,zz);}
2300:   VecGetArray(xx,&x);
2301:   VecGetArray(zz,&y);
2302:   for (i=0; i<m; i++) {
2303:     idx    = a->j + a->i[i] ;
2304:     v      = a->a + a->i[i] ;
2305:     n      = a->i[i+1] - a->i[i];
2306:     alpha1 = x[16*i];
2307:     alpha2 = x[16*i+1];
2308:     alpha3 = x[16*i+2];
2309:     alpha4 = x[16*i+3];
2310:     alpha5 = x[16*i+4];
2311:     alpha6 = x[16*i+5];
2312:     alpha7 = x[16*i+6];
2313:     alpha8 = x[16*i+7];
2314:     alpha9  = x[16*i+8];
2315:     alpha10 = x[16*i+9];
2316:     alpha11 = x[16*i+10];
2317:     alpha12 = x[16*i+11];
2318:     alpha13 = x[16*i+12];
2319:     alpha14 = x[16*i+13];
2320:     alpha15 = x[16*i+14];
2321:     alpha16 = x[16*i+15];
2322:     while (n-->0) {
2323:       y[16*(*idx)]   += alpha1*(*v);
2324:       y[16*(*idx)+1] += alpha2*(*v);
2325:       y[16*(*idx)+2] += alpha3*(*v);
2326:       y[16*(*idx)+3] += alpha4*(*v);
2327:       y[16*(*idx)+4] += alpha5*(*v);
2328:       y[16*(*idx)+5] += alpha6*(*v);
2329:       y[16*(*idx)+6] += alpha7*(*v);
2330:       y[16*(*idx)+7] += alpha8*(*v);
2331:       y[16*(*idx)+8]  += alpha9*(*v);
2332:       y[16*(*idx)+9]  += alpha10*(*v);
2333:       y[16*(*idx)+10] += alpha11*(*v);
2334:       y[16*(*idx)+11] += alpha12*(*v);
2335:       y[16*(*idx)+12] += alpha13*(*v);
2336:       y[16*(*idx)+13] += alpha14*(*v);
2337:       y[16*(*idx)+14] += alpha15*(*v);
2338:       y[16*(*idx)+15] += alpha16*(*v);
2339:       idx++; v++;
2340:     }
2341:   }
2342:   PetscLogFlops(32*a->nz);
2343:   VecRestoreArray(xx,&x);
2344:   VecRestoreArray(zz,&y);
2345:   return(0);
2346: }

2348: /*--------------------------------------------------------------------------------------------*/
2351: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2352: {
2353:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2354:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2355:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2356:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2358:   PetscInt       m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
2359:   PetscInt       n,i,jrow,j;

2362:   VecGetArray(xx,&x);
2363:   VecGetArray(yy,&y);
2364:   idx  = a->j;
2365:   v    = a->a;
2366:   ii   = a->i;

2368:   for (i=0; i<m; i++) {
2369:     jrow = ii[i];
2370:     n    = ii[i+1] - jrow;
2371:     sum1  = 0.0;
2372:     sum2  = 0.0;
2373:     sum3  = 0.0;
2374:     sum4  = 0.0;
2375:     sum5  = 0.0;
2376:     sum6  = 0.0;
2377:     sum7  = 0.0;
2378:     sum8  = 0.0;
2379:     sum9  = 0.0;
2380:     sum10 = 0.0;
2381:     sum11 = 0.0;
2382:     sum12 = 0.0;
2383:     sum13 = 0.0;
2384:     sum14 = 0.0;
2385:     sum15 = 0.0;
2386:     sum16 = 0.0;
2387:     sum17 = 0.0;
2388:     sum18 = 0.0;
2389:     nonzerorow += (n>0);
2390:     for (j=0; j<n; j++) {
2391:       sum1  += v[jrow]*x[18*idx[jrow]];
2392:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2393:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2394:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2395:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2396:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2397:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2398:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2399:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2400:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2401:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2402:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2403:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2404:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2405:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2406:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2407:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2408:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2409:       jrow++;
2410:      }
2411:     y[18*i]    = sum1;
2412:     y[18*i+1]  = sum2;
2413:     y[18*i+2]  = sum3;
2414:     y[18*i+3]  = sum4;
2415:     y[18*i+4]  = sum5;
2416:     y[18*i+5]  = sum6;
2417:     y[18*i+6]  = sum7;
2418:     y[18*i+7]  = sum8;
2419:     y[18*i+8]  = sum9;
2420:     y[18*i+9]  = sum10;
2421:     y[18*i+10] = sum11;
2422:     y[18*i+11] = sum12;
2423:     y[18*i+12] = sum13;
2424:     y[18*i+13] = sum14;
2425:     y[18*i+14] = sum15;
2426:     y[18*i+15] = sum16;
2427:     y[18*i+16] = sum17;
2428:     y[18*i+17] = sum18;
2429:   }

2431:   PetscLogFlops(36*a->nz - 18*nonzerorow);
2432:   VecRestoreArray(xx,&x);
2433:   VecRestoreArray(yy,&y);
2434:   return(0);
2435: }

2439: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2440: {
2441:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2442:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2443:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2444:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2446:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

2449:   VecSet(yy,zero);
2450:   VecGetArray(xx,&x);
2451:   VecGetArray(yy,&y);

2453:   for (i=0; i<m; i++) {
2454:     idx    = a->j + a->i[i] ;
2455:     v      = a->a + a->i[i] ;
2456:     n      = a->i[i+1] - a->i[i];
2457:     alpha1  = x[18*i];
2458:     alpha2  = x[18*i+1];
2459:     alpha3  = x[18*i+2];
2460:     alpha4  = x[18*i+3];
2461:     alpha5  = x[18*i+4];
2462:     alpha6  = x[18*i+5];
2463:     alpha7  = x[18*i+6];
2464:     alpha8  = x[18*i+7];
2465:     alpha9  = x[18*i+8];
2466:     alpha10 = x[18*i+9];
2467:     alpha11 = x[18*i+10];
2468:     alpha12 = x[18*i+11];
2469:     alpha13 = x[18*i+12];
2470:     alpha14 = x[18*i+13];
2471:     alpha15 = x[18*i+14];
2472:     alpha16 = x[18*i+15];
2473:     alpha17 = x[18*i+16];
2474:     alpha18 = x[18*i+17];
2475:     while (n-->0) {
2476:       y[18*(*idx)]    += alpha1*(*v);
2477:       y[18*(*idx)+1]  += alpha2*(*v);
2478:       y[18*(*idx)+2]  += alpha3*(*v);
2479:       y[18*(*idx)+3]  += alpha4*(*v);
2480:       y[18*(*idx)+4]  += alpha5*(*v);
2481:       y[18*(*idx)+5]  += alpha6*(*v);
2482:       y[18*(*idx)+6]  += alpha7*(*v);
2483:       y[18*(*idx)+7]  += alpha8*(*v);
2484:       y[18*(*idx)+8]  += alpha9*(*v);
2485:       y[18*(*idx)+9]  += alpha10*(*v);
2486:       y[18*(*idx)+10] += alpha11*(*v);
2487:       y[18*(*idx)+11] += alpha12*(*v);
2488:       y[18*(*idx)+12] += alpha13*(*v);
2489:       y[18*(*idx)+13] += alpha14*(*v);
2490:       y[18*(*idx)+14] += alpha15*(*v);
2491:       y[18*(*idx)+15] += alpha16*(*v);
2492:       y[18*(*idx)+16] += alpha17*(*v);
2493:       y[18*(*idx)+17] += alpha18*(*v);
2494:       idx++; v++;
2495:     }
2496:   }
2497:   PetscLogFlops(36*a->nz);
2498:   VecRestoreArray(xx,&x);
2499:   VecRestoreArray(yy,&y);
2500:   return(0);
2501: }

2505: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2506: {
2507:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2508:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2509:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2510:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2512:   PetscInt       m = b->AIJ->rmap->n,*idx,*ii;
2513:   PetscInt       n,i,jrow,j;

2516:   if (yy != zz) {VecCopy(yy,zz);}
2517:   VecGetArray(xx,&x);
2518:   VecGetArray(zz,&y);
2519:   idx  = a->j;
2520:   v    = a->a;
2521:   ii   = a->i;

2523:   for (i=0; i<m; i++) {
2524:     jrow = ii[i];
2525:     n    = ii[i+1] - jrow;
2526:     sum1  = 0.0;
2527:     sum2  = 0.0;
2528:     sum3  = 0.0;
2529:     sum4  = 0.0;
2530:     sum5  = 0.0;
2531:     sum6  = 0.0;
2532:     sum7  = 0.0;
2533:     sum8  = 0.0;
2534:     sum9  = 0.0;
2535:     sum10 = 0.0;
2536:     sum11 = 0.0;
2537:     sum12 = 0.0;
2538:     sum13 = 0.0;
2539:     sum14 = 0.0;
2540:     sum15 = 0.0;
2541:     sum16 = 0.0;
2542:     sum17 = 0.0;
2543:     sum18 = 0.0;
2544:     for (j=0; j<n; j++) {
2545:       sum1  += v[jrow]*x[18*idx[jrow]];
2546:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2547:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2548:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2549:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2550:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2551:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2552:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2553:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2554:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2555:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2556:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2557:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2558:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2559:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2560:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2561:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2562:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2563:       jrow++;
2564:      }
2565:     y[18*i]    += sum1;
2566:     y[18*i+1]  += sum2;
2567:     y[18*i+2]  += sum3;
2568:     y[18*i+3]  += sum4;
2569:     y[18*i+4]  += sum5;
2570:     y[18*i+5]  += sum6;
2571:     y[18*i+6]  += sum7;
2572:     y[18*i+7]  += sum8;
2573:     y[18*i+8]  += sum9;
2574:     y[18*i+9]  += sum10;
2575:     y[18*i+10] += sum11;
2576:     y[18*i+11] += sum12;
2577:     y[18*i+12] += sum13;
2578:     y[18*i+13] += sum14;
2579:     y[18*i+14] += sum15;
2580:     y[18*i+15] += sum16;
2581:     y[18*i+16] += sum17;
2582:     y[18*i+17] += sum18;
2583:   }

2585:   PetscLogFlops(36*a->nz);
2586:   VecRestoreArray(xx,&x);
2587:   VecRestoreArray(zz,&y);
2588:   return(0);
2589: }

2593: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2594: {
2595:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2596:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2597:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2598:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2600:   PetscInt       m = b->AIJ->rmap->n,n,i,*idx;

2603:   if (yy != zz) {VecCopy(yy,zz);}
2604:   VecGetArray(xx,&x);
2605:   VecGetArray(zz,&y);
2606:   for (i=0; i<m; i++) {
2607:     idx    = a->j + a->i[i] ;
2608:     v      = a->a + a->i[i] ;
2609:     n      = a->i[i+1] - a->i[i];
2610:     alpha1 = x[18*i];
2611:     alpha2 = x[18*i+1];
2612:     alpha3 = x[18*i+2];
2613:     alpha4 = x[18*i+3];
2614:     alpha5 = x[18*i+4];
2615:     alpha6 = x[18*i+5];
2616:     alpha7 = x[18*i+6];
2617:     alpha8 = x[18*i+7];
2618:     alpha9  = x[18*i+8];
2619:     alpha10 = x[18*i+9];
2620:     alpha11 = x[18*i+10];
2621:     alpha12 = x[18*i+11];
2622:     alpha13 = x[18*i+12];
2623:     alpha14 = x[18*i+13];
2624:     alpha15 = x[18*i+14];
2625:     alpha16 = x[18*i+15];
2626:     alpha17 = x[18*i+16];
2627:     alpha18 = x[18*i+17];
2628:     while (n-->0) {
2629:       y[18*(*idx)]   += alpha1*(*v);
2630:       y[18*(*idx)+1] += alpha2*(*v);
2631:       y[18*(*idx)+2] += alpha3*(*v);
2632:       y[18*(*idx)+3] += alpha4*(*v);
2633:       y[18*(*idx)+4] += alpha5*(*v);
2634:       y[18*(*idx)+5] += alpha6*(*v);
2635:       y[18*(*idx)+6] += alpha7*(*v);
2636:       y[18*(*idx)+7] += alpha8*(*v);
2637:       y[18*(*idx)+8]  += alpha9*(*v);
2638:       y[18*(*idx)+9]  += alpha10*(*v);
2639:       y[18*(*idx)+10] += alpha11*(*v);
2640:       y[18*(*idx)+11] += alpha12*(*v);
2641:       y[18*(*idx)+12] += alpha13*(*v);
2642:       y[18*(*idx)+13] += alpha14*(*v);
2643:       y[18*(*idx)+14] += alpha15*(*v);
2644:       y[18*(*idx)+15] += alpha16*(*v);
2645:       y[18*(*idx)+16] += alpha17*(*v);
2646:       y[18*(*idx)+17] += alpha18*(*v);
2647:       idx++; v++;
2648:     }
2649:   }
2650:   PetscLogFlops(36*a->nz);
2651:   VecRestoreArray(xx,&x);
2652:   VecRestoreArray(zz,&y);
2653:   return(0);
2654: }

2656: /*===================================================================================*/
2659: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2660: {
2661:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2665:   /* start the scatter */
2666:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2667:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2668:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2669:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2670:   return(0);
2671: }

2675: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2676: {
2677:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2681:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2682:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2683:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2684:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2685:   return(0);
2686: }

2690: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2691: {
2692:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2696:   /* start the scatter */
2697:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2698:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2699:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2700:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2701:   return(0);
2702: }

2706: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2707: {
2708:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2712:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2713:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2714:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2715:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2716:   return(0);
2717: }

2719: /* ----------------------------------------------------------------*/
2722: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2723: {
2724:   /* This routine requires testing -- but it's getting better. */
2725:   PetscErrorCode     ierr;
2726:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2727:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2728:   Mat                P=pp->AIJ;
2729:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2730:   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2731:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2732:   PetscInt           an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn;
2733:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2734:   MatScalar          *ca;

2737:   /* Start timer */
2738:   PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);

2740:   /* Get ij structure of P^T */
2741:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2743:   cn = pn*ppdof;
2744:   /* Allocate ci array, arrays for fill computation and */
2745:   /* free space for accumulating nonzero column info */
2746:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2747:   ci[0] = 0;

2749:   /* Work arrays for rows of P^T*A */
2750:   PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2751:   PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2752:   ptasparserow = ptadenserow  + an;
2753:   denserow     = ptasparserow + an;
2754:   sparserow    = denserow     + cn;

2756:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2757:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2758:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2759:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2760:   current_space = free_space;

2762:   /* Determine symbolic info for each row of C: */
2763:   for (i=0;i<pn;i++) {
2764:     ptnzi  = pti[i+1] - pti[i];
2765:     ptJ    = ptj + pti[i];
2766:     for (dof=0;dof<ppdof;dof++) {
2767:       ptanzi = 0;
2768:       /* Determine symbolic row of PtA: */
2769:       for (j=0;j<ptnzi;j++) {
2770:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2771:         arow = ptJ[j]*ppdof + dof;
2772:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2773:         anzj = ai[arow+1] - ai[arow];
2774:         ajj  = aj + ai[arow];
2775:         for (k=0;k<anzj;k++) {
2776:           if (!ptadenserow[ajj[k]]) {
2777:             ptadenserow[ajj[k]]    = -1;
2778:             ptasparserow[ptanzi++] = ajj[k];
2779:           }
2780:         }
2781:       }
2782:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2783:       ptaj = ptasparserow;
2784:       cnzi   = 0;
2785:       for (j=0;j<ptanzi;j++) {
2786:         /* Get offset within block of P */
2787:         pshift = *ptaj%ppdof;
2788:         /* Get block row of P */
2789:         prow = (*ptaj++)/ppdof; /* integer division */
2790:         /* P has same number of nonzeros per row as the compressed form */
2791:         pnzj = pi[prow+1] - pi[prow];
2792:         pjj  = pj + pi[prow];
2793:         for (k=0;k<pnzj;k++) {
2794:           /* Locations in C are shifted by the offset within the block */
2795:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2796:           if (!denserow[pjj[k]*ppdof+pshift]) {
2797:             denserow[pjj[k]*ppdof+pshift] = -1;
2798:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2799:           }
2800:         }
2801:       }

2803:       /* sort sparserow */
2804:       PetscSortInt(cnzi,sparserow);
2805: 
2806:       /* If free space is not available, make more free space */
2807:       /* Double the amount of total space in the list */
2808:       if (current_space->local_remaining<cnzi) {
2809:         PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
2810:       }

2812:       /* Copy data into free space, and zero out denserows */
2813:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2814:       current_space->array           += cnzi;
2815:       current_space->local_used      += cnzi;
2816:       current_space->local_remaining -= cnzi;

2818:       for (j=0;j<ptanzi;j++) {
2819:         ptadenserow[ptasparserow[j]] = 0;
2820:       }
2821:       for (j=0;j<cnzi;j++) {
2822:         denserow[sparserow[j]] = 0;
2823:       }
2824:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2825:       /*        For now, we will recompute what is needed. */
2826:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2827:     }
2828:   }
2829:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2830:   /* Allocate space for cj, initialize cj, and */
2831:   /* destroy list of free space and other temporary array(s) */
2832:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2833:   PetscFreeSpaceContiguous(&free_space,cj);
2834:   PetscFree(ptadenserow);
2835: 
2836:   /* Allocate space for ca */
2837:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2838:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2839: 
2840:   /* put together the new matrix */
2841:   MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);

2843:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2844:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2845:   c          = (Mat_SeqAIJ *)((*C)->data);
2846:   c->free_a  = PETSC_TRUE;
2847:   c->free_ij = PETSC_TRUE;
2848:   c->nonew   = 0;

2850:   /* Clean up. */
2851:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2853:   PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
2854:   return(0);
2855: }

2859: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2860: {
2861:   /* This routine requires testing -- first draft only */
2863:   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2864:   Mat            P=pp->AIJ;
2865:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2866:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2867:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2868:   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2869:   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2870:   PetscInt       am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
2871:   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2872:   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;

2875:   /* Allocate temporary array for storage of one row of A*P */
2876:   PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2877:   PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));

2879:   apj      = (PetscInt *)(apa + cn);
2880:   apjdense = apj + cn;

2882:   /* Clear old values in C */
2883:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

2885:   for (i=0;i<am;i++) {
2886:     /* Form sparse row of A*P */
2887:     anzi  = ai[i+1] - ai[i];
2888:     apnzj = 0;
2889:     for (j=0;j<anzi;j++) {
2890:       /* Get offset within block of P */
2891:       pshift = *aj%ppdof;
2892:       /* Get block row of P */
2893:       prow   = *aj++/ppdof; /* integer division */
2894:       pnzj = pi[prow+1] - pi[prow];
2895:       pjj  = pj + pi[prow];
2896:       paj  = pa + pi[prow];
2897:       for (k=0;k<pnzj;k++) {
2898:         poffset = pjj[k]*ppdof+pshift;
2899:         if (!apjdense[poffset]) {
2900:           apjdense[poffset] = -1;
2901:           apj[apnzj++]      = poffset;
2902:         }
2903:         apa[poffset] += (*aa)*paj[k];
2904:       }
2905:       PetscLogFlops(2*pnzj);
2906:       aa++;
2907:     }

2909:     /* Sort the j index array for quick sparse axpy. */
2910:     /* Note: a array does not need sorting as it is in dense storage locations. */
2911:     PetscSortInt(apnzj,apj);

2913:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2914:     prow    = i/ppdof; /* integer division */
2915:     pshift  = i%ppdof;
2916:     poffset = pi[prow];
2917:     pnzi = pi[prow+1] - poffset;
2918:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2919:     pJ   = pj+poffset;
2920:     pA   = pa+poffset;
2921:     for (j=0;j<pnzi;j++) {
2922:       crow   = (*pJ)*ppdof+pshift;
2923:       cjj    = cj + ci[crow];
2924:       caj    = ca + ci[crow];
2925:       pJ++;
2926:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2927:       for (k=0,nextap=0;nextap<apnzj;k++) {
2928:         if (cjj[k]==apj[nextap]) {
2929:           caj[k] += (*pA)*apa[apj[nextap++]];
2930:         }
2931:       }
2932:       PetscLogFlops(2*apnzj);
2933:       pA++;
2934:     }

2936:     /* Zero the current row info for A*P */
2937:     for (j=0;j<apnzj;j++) {
2938:       apa[apj[j]]      = 0.;
2939:       apjdense[apj[j]] = 0;
2940:     }
2941:   }

2943:   /* Assemble the final matrix and clean up */
2944:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2945:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2946:   PetscFree(apa);
2947:   return(0);
2948: }

2952: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2953: {
2954:   PetscErrorCode    ierr;

2957:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2958:   MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2959:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2960:   return(0);
2961: }

2965: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2966: {
2968:   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2969:   return(0);
2970: }

2975: PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2976: {
2977:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2978:   Mat               a = b->AIJ,B;
2979:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2980:   PetscErrorCode    ierr;
2981:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2982:   PetscInt          *cols;
2983:   PetscScalar       *vals;

2986:   MatGetSize(a,&m,&n);
2987:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2988:   for (i=0; i<m; i++) {
2989:     nmax = PetscMax(nmax,aij->ilen[i]);
2990:     for (j=0; j<dof; j++) {
2991:       ilen[dof*i+j] = aij->ilen[i];
2992:     }
2993:   }
2994:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2995:   PetscFree(ilen);
2996:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
2997:   ii   = 0;
2998:   for (i=0; i<m; i++) {
2999:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3000:     for (j=0; j<dof; j++) {
3001:       for (k=0; k<ncols; k++) {
3002:         icols[k] = dof*cols[k]+j;
3003:       }
3004:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3005:       ii++;
3006:     }
3007:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3008:   }
3009:   PetscFree(icols);
3010:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3011:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3013:   if (reuse == MAT_REUSE_MATRIX) {
3014:     MatHeaderReplace(A,B);
3015:   } else {
3016:     *newmat = B;
3017:   }
3018:   return(0);
3019: }

3022:  #include ../src/mat/impls/aij/mpi/mpiaij.h

3027: PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3028: {
3029:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3030:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3031:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3032:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3033:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3034:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3035:   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3036:   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3037:   PetscInt          rstart,cstart,*garray,ii,k;
3038:   PetscErrorCode    ierr;
3039:   PetscScalar       *vals,*ovals;

3042:   PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
3043:   for (i=0; i<A->rmap->n/dof; i++) {
3044:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3045:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3046:     for (j=0; j<dof; j++) {
3047:       dnz[dof*i+j] = AIJ->ilen[i];
3048:       onz[dof*i+j] = OAIJ->ilen[i];
3049:     }
3050:   }
3051:   MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3052:   PetscFree2(dnz,onz);

3054:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
3055:   rstart = dof*maij->A->rmap->rstart;
3056:   cstart = dof*maij->A->cmap->rstart;
3057:   garray = mpiaij->garray;

3059:   ii = rstart;
3060:   for (i=0; i<A->rmap->n/dof; i++) {
3061:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3062:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3063:     for (j=0; j<dof; j++) {
3064:       for (k=0; k<ncols; k++) {
3065:         icols[k] = cstart + dof*cols[k]+j;
3066:       }
3067:       for (k=0; k<oncols; k++) {
3068:         oicols[k] = dof*garray[ocols[k]]+j;
3069:       }
3070:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3071:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3072:       ii++;
3073:     }
3074:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3075:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3076:   }
3077:   PetscFree2(icols,oicols);

3079:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3080:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3082:   if (reuse == MAT_REUSE_MATRIX) {
3083:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3084:     ((PetscObject)A)->refct = 1;
3085:     MatHeaderReplace(A,B);
3086:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3087:   } else {
3088:     *newmat = B;
3089:   }
3090:   return(0);
3091: }


3095: /* ---------------------------------------------------------------------------------- */
3096: /*MC
3097:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
3098:   operations for multicomponent problems.  It interpolates each component the same
3099:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3100:   and MATMPIAIJ for distributed matrices.

3102:   Operations provided:
3103: + MatMult
3104: . MatMultTranspose
3105: . MatMultAdd
3106: . MatMultTransposeAdd
3107: - MatView

3109:   Level: advanced

3111: M*/
3114: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3115: {
3117:   PetscMPIInt    size;
3118:   PetscInt       n;
3119:   Mat_MPIMAIJ    *b;
3120:   Mat            B;

3123:   PetscObjectReference((PetscObject)A);

3125:   if (dof == 1) {
3126:     *maij = A;
3127:   } else {
3128:     MatCreate(((PetscObject)A)->comm,&B);
3129:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3130:     B->assembled    = PETSC_TRUE;

3132:     MPI_Comm_size(((PetscObject)A)->comm,&size);
3133:     if (size == 1) {
3134:       MatSetType(B,MATSEQMAIJ);
3135:       B->ops->destroy = MatDestroy_SeqMAIJ;
3136:       B->ops->view    = MatView_SeqMAIJ;
3137:       b      = (Mat_MPIMAIJ*)B->data;
3138:       b->dof = dof;
3139:       b->AIJ = A;
3140:       if (dof == 2) {
3141:         B->ops->mult             = MatMult_SeqMAIJ_2;
3142:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3143:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3144:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3145:       } else if (dof == 3) {
3146:         B->ops->mult             = MatMult_SeqMAIJ_3;
3147:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3148:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3149:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3150:       } else if (dof == 4) {
3151:         B->ops->mult             = MatMult_SeqMAIJ_4;
3152:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3153:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3154:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3155:       } else if (dof == 5) {
3156:         B->ops->mult             = MatMult_SeqMAIJ_5;
3157:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3158:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3159:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3160:       } else if (dof == 6) {
3161:         B->ops->mult             = MatMult_SeqMAIJ_6;
3162:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3163:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3164:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3165:       } else if (dof == 7) {
3166:         B->ops->mult             = MatMult_SeqMAIJ_7;
3167:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3168:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3169:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3170:       } else if (dof == 8) {
3171:         B->ops->mult             = MatMult_SeqMAIJ_8;
3172:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3173:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3174:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3175:       } else if (dof == 9) {
3176:         B->ops->mult             = MatMult_SeqMAIJ_9;
3177:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3178:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3179:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3180:       } else if (dof == 10) {
3181:         B->ops->mult             = MatMult_SeqMAIJ_10;
3182:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3183:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3184:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3185:       } else if (dof == 11) {
3186:         B->ops->mult             = MatMult_SeqMAIJ_11;
3187:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3188:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3189:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3190:       } else if (dof == 16) {
3191:         B->ops->mult             = MatMult_SeqMAIJ_16;
3192:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3193:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3194:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3195:       } else if (dof == 18) {
3196:         B->ops->mult             = MatMult_SeqMAIJ_18;
3197:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3198:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3199:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3200:       } else {
3201:         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
3202:       }
3203:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3204:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3205:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
3206:     } else {
3207:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
3208:       IS         from,to;
3209:       Vec        gvec;
3210:       PetscInt   *garray,i;

3212:       MatSetType(B,MATMPIMAIJ);
3213:       B->ops->destroy = MatDestroy_MPIMAIJ;
3214:       B->ops->view    = MatView_MPIMAIJ;
3215:       b      = (Mat_MPIMAIJ*)B->data;
3216:       b->dof = dof;
3217:       b->A   = A;
3218:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3219:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

3221:       VecGetSize(mpiaij->lvec,&n);
3222:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
3223:       VecSetBlockSize(b->w,dof);

3225:       /* create two temporary Index sets for build scatter gather */
3226:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
3227:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
3228:       ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
3229:       PetscFree(garray);
3230:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3232:       /* create temporary global vector to generate scatter context */
3233:       VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);
3234:       VecSetBlockSize(gvec,dof);

3236:       /* generate the scatter context */
3237:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3239:       ISDestroy(from);
3240:       ISDestroy(to);
3241:       VecDestroy(gvec);

3243:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3244:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3245:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3246:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3247:       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3248:       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3249:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3250:     }
3251:     *maij = B;
3252:     MatView_Private(B);
3253:   }
3254:   return(0);
3255: }