Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include src/mat/impls/maij/maij.h
 21:  #include src/mat/utils/freespace.h
 22:  #include private/vecimpl.h

 26: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 27: {
 29:   PetscTruth     ismpimaij,isseqmaij;

 32:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 33:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 34:   if (ismpimaij) {
 35:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 37:     *B = b->A;
 38:   } else if (isseqmaij) {
 39:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 41:     *B = b->AIJ;
 42:   } else {
 43:     *B = A;
 44:   }
 45:   return(0);
 46: }

 50: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 51: {
 53:   Mat            Aij;

 56:   MatMAIJGetAIJ(A,&Aij);
 57:   MatCreateMAIJ(Aij,dof,B);
 58:   return(0);
 59: }

 63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 64: {
 66:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 69:   if (b->AIJ) {
 70:     MatDestroy(b->AIJ);
 71:   }
 72:   PetscFree(b);
 73:   return(0);
 74: }

 78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
 79: {
 81:   Mat            B;

 84:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
 85:   MatView(B,viewer);
 86:   MatDestroy(B);
 87:   return(0);
 88: }

 92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
 93: {
 95:   Mat            B;

 98:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
 99:   MatView(B,viewer);
100:   MatDestroy(B);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

112:   if (b->AIJ) {
113:     MatDestroy(b->AIJ);
114:   }
115:   if (b->OAIJ) {
116:     MatDestroy(b->OAIJ);
117:   }
118:   if (b->A) {
119:     MatDestroy(b->A);
120:   }
121:   if (b->ctx) {
122:     VecScatterDestroy(b->ctx);
123:   }
124:   if (b->w) {
125:     VecDestroy(b->w);
126:   }
127:   PetscFree(b);
128:   return(0);
129: }

131: /*MC
132:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
133:   multicomponent problems, interpolating or restricting each component the same way independently.
134:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

136:   Operations provided:
137: . MatMult
138: . MatMultTranspose
139: . MatMultAdd
140: . MatMultTransposeAdd

142:   Level: advanced

144: .seealso: MatCreateSeqDense
145: M*/

150: PetscErrorCode  MatCreate_MAIJ(Mat A)
151: {
153:   Mat_MPIMAIJ    *b;
154:   PetscMPIInt    size;

157:   PetscNew(Mat_MPIMAIJ,&b);
158:   A->data  = (void*)b;
159:   PetscMemzero(A->ops,sizeof(struct _MatOps));
160:   A->factor           = 0;
161:   A->mapping          = 0;

163:   b->AIJ  = 0;
164:   b->dof  = 0;
165:   b->OAIJ = 0;
166:   b->ctx  = 0;
167:   b->w    = 0;
168:   MPI_Comm_size(A->comm,&size);
169:   if (size == 1){
170:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
171:   } else {
172:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
173:   }
174:   return(0);
175: }

178: /* --------------------------------------------------------------------------------------*/
181: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
182: {
183:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
184:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
185:   PetscScalar    *x,*y,*v,sum1, sum2;
187:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
188:   PetscInt       n,i,jrow,j;

191:   VecGetArray(xx,&x);
192:   VecGetArray(yy,&y);
193:   idx  = a->j;
194:   v    = a->a;
195:   ii   = a->i;

197:   for (i=0; i<m; i++) {
198:     jrow = ii[i];
199:     n    = ii[i+1] - jrow;
200:     sum1  = 0.0;
201:     sum2  = 0.0;
202:     for (j=0; j<n; j++) {
203:       sum1 += v[jrow]*x[2*idx[jrow]];
204:       sum2 += v[jrow]*x[2*idx[jrow]+1];
205:       jrow++;
206:      }
207:     y[2*i]   = sum1;
208:     y[2*i+1] = sum2;
209:   }

211:   PetscLogFlops(4*a->nz - 2*m);
212:   VecRestoreArray(xx,&x);
213:   VecRestoreArray(yy,&y);
214:   return(0);
215: }

219: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
220: {
221:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
222:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
223:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
225:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

228:   VecSet(yy,zero);
229:   VecGetArray(xx,&x);
230:   VecGetArray(yy,&y);
231: 
232:   for (i=0; i<m; i++) {
233:     idx    = a->j + a->i[i] ;
234:     v      = a->a + a->i[i] ;
235:     n      = a->i[i+1] - a->i[i];
236:     alpha1 = x[2*i];
237:     alpha2 = x[2*i+1];
238:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
239:   }
240:   PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
241:   VecRestoreArray(xx,&x);
242:   VecRestoreArray(yy,&y);
243:   return(0);
244: }

248: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
249: {
250:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
251:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
252:   PetscScalar    *x,*y,*v,sum1, sum2;
254:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
255:   PetscInt       n,i,jrow,j;

258:   if (yy != zz) {VecCopy(yy,zz);}
259:   VecGetArray(xx,&x);
260:   VecGetArray(zz,&y);
261:   idx  = a->j;
262:   v    = a->a;
263:   ii   = a->i;

265:   for (i=0; i<m; i++) {
266:     jrow = ii[i];
267:     n    = ii[i+1] - jrow;
268:     sum1  = 0.0;
269:     sum2  = 0.0;
270:     for (j=0; j<n; j++) {
271:       sum1 += v[jrow]*x[2*idx[jrow]];
272:       sum2 += v[jrow]*x[2*idx[jrow]+1];
273:       jrow++;
274:      }
275:     y[2*i]   += sum1;
276:     y[2*i+1] += sum2;
277:   }

279:   PetscLogFlops(4*a->nz - 2*m);
280:   VecRestoreArray(xx,&x);
281:   VecRestoreArray(zz,&y);
282:   return(0);
283: }
286: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
287: {
288:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
289:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
290:   PetscScalar    *x,*y,*v,alpha1,alpha2;
292:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

295:   if (yy != zz) {VecCopy(yy,zz);}
296:   VecGetArray(xx,&x);
297:   VecGetArray(zz,&y);
298: 
299:   for (i=0; i<m; i++) {
300:     idx   = a->j + a->i[i] ;
301:     v     = a->a + a->i[i] ;
302:     n     = a->i[i+1] - a->i[i];
303:     alpha1 = x[2*i];
304:     alpha2 = x[2*i+1];
305:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
306:   }
307:   PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
308:   VecRestoreArray(xx,&x);
309:   VecRestoreArray(zz,&y);
310:   return(0);
311: }
312: /* --------------------------------------------------------------------------------------*/
315: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316: {
317:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
318:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
319:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
321:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
322:   PetscInt       n,i,jrow,j;

325:   VecGetArray(xx,&x);
326:   VecGetArray(yy,&y);
327:   idx  = a->j;
328:   v    = a->a;
329:   ii   = a->i;

331:   for (i=0; i<m; i++) {
332:     jrow = ii[i];
333:     n    = ii[i+1] - jrow;
334:     sum1  = 0.0;
335:     sum2  = 0.0;
336:     sum3  = 0.0;
337:     for (j=0; j<n; j++) {
338:       sum1 += v[jrow]*x[3*idx[jrow]];
339:       sum2 += v[jrow]*x[3*idx[jrow]+1];
340:       sum3 += v[jrow]*x[3*idx[jrow]+2];
341:       jrow++;
342:      }
343:     y[3*i]   = sum1;
344:     y[3*i+1] = sum2;
345:     y[3*i+2] = sum3;
346:   }

348:   PetscLogFlops(6*a->nz - 3*m);
349:   VecRestoreArray(xx,&x);
350:   VecRestoreArray(yy,&y);
351:   return(0);
352: }

356: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
357: {
358:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
359:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
360:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
362:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

365:   VecSet(yy,zero);
366:   VecGetArray(xx,&x);
367:   VecGetArray(yy,&y);
368: 
369:   for (i=0; i<m; i++) {
370:     idx    = a->j + a->i[i];
371:     v      = a->a + a->i[i];
372:     n      = a->i[i+1] - a->i[i];
373:     alpha1 = x[3*i];
374:     alpha2 = x[3*i+1];
375:     alpha3 = x[3*i+2];
376:     while (n-->0) {
377:       y[3*(*idx)]   += alpha1*(*v);
378:       y[3*(*idx)+1] += alpha2*(*v);
379:       y[3*(*idx)+2] += alpha3*(*v);
380:       idx++; v++;
381:     }
382:   }
383:   PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);
384:   VecRestoreArray(xx,&x);
385:   VecRestoreArray(yy,&y);
386:   return(0);
387: }

391: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
392: {
393:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
394:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
395:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
397:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
398:   PetscInt       n,i,jrow,j;

401:   if (yy != zz) {VecCopy(yy,zz);}
402:   VecGetArray(xx,&x);
403:   VecGetArray(zz,&y);
404:   idx  = a->j;
405:   v    = a->a;
406:   ii   = a->i;

408:   for (i=0; i<m; i++) {
409:     jrow = ii[i];
410:     n    = ii[i+1] - jrow;
411:     sum1  = 0.0;
412:     sum2  = 0.0;
413:     sum3  = 0.0;
414:     for (j=0; j<n; j++) {
415:       sum1 += v[jrow]*x[3*idx[jrow]];
416:       sum2 += v[jrow]*x[3*idx[jrow]+1];
417:       sum3 += v[jrow]*x[3*idx[jrow]+2];
418:       jrow++;
419:      }
420:     y[3*i]   += sum1;
421:     y[3*i+1] += sum2;
422:     y[3*i+2] += sum3;
423:   }

425:   PetscLogFlops(6*a->nz);
426:   VecRestoreArray(xx,&x);
427:   VecRestoreArray(zz,&y);
428:   return(0);
429: }
432: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
433: {
434:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
435:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
436:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
438:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

441:   if (yy != zz) {VecCopy(yy,zz);}
442:   VecGetArray(xx,&x);
443:   VecGetArray(zz,&y);
444:   for (i=0; i<m; i++) {
445:     idx    = a->j + a->i[i] ;
446:     v      = a->a + a->i[i] ;
447:     n      = a->i[i+1] - a->i[i];
448:     alpha1 = x[3*i];
449:     alpha2 = x[3*i+1];
450:     alpha3 = x[3*i+2];
451:     while (n-->0) {
452:       y[3*(*idx)]   += alpha1*(*v);
453:       y[3*(*idx)+1] += alpha2*(*v);
454:       y[3*(*idx)+2] += alpha3*(*v);
455:       idx++; v++;
456:     }
457:   }
458:   PetscLogFlops(6*a->nz);
459:   VecRestoreArray(xx,&x);
460:   VecRestoreArray(zz,&y);
461:   return(0);
462: }

464: /* ------------------------------------------------------------------------------*/
467: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
468: {
469:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
470:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
471:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
473:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
474:   PetscInt       n,i,jrow,j;

477:   VecGetArray(xx,&x);
478:   VecGetArray(yy,&y);
479:   idx  = a->j;
480:   v    = a->a;
481:   ii   = a->i;

483:   for (i=0; i<m; i++) {
484:     jrow = ii[i];
485:     n    = ii[i+1] - jrow;
486:     sum1  = 0.0;
487:     sum2  = 0.0;
488:     sum3  = 0.0;
489:     sum4  = 0.0;
490:     for (j=0; j<n; j++) {
491:       sum1 += v[jrow]*x[4*idx[jrow]];
492:       sum2 += v[jrow]*x[4*idx[jrow]+1];
493:       sum3 += v[jrow]*x[4*idx[jrow]+2];
494:       sum4 += v[jrow]*x[4*idx[jrow]+3];
495:       jrow++;
496:      }
497:     y[4*i]   = sum1;
498:     y[4*i+1] = sum2;
499:     y[4*i+2] = sum3;
500:     y[4*i+3] = sum4;
501:   }

503:   PetscLogFlops(8*a->nz - 4*m);
504:   VecRestoreArray(xx,&x);
505:   VecRestoreArray(yy,&y);
506:   return(0);
507: }

511: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
512: {
513:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
514:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
515:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
517:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

520:   VecSet(yy,zero);
521:   VecGetArray(xx,&x);
522:   VecGetArray(yy,&y);
523:   for (i=0; i<m; i++) {
524:     idx    = a->j + a->i[i] ;
525:     v      = a->a + a->i[i] ;
526:     n      = a->i[i+1] - a->i[i];
527:     alpha1 = x[4*i];
528:     alpha2 = x[4*i+1];
529:     alpha3 = x[4*i+2];
530:     alpha4 = x[4*i+3];
531:     while (n-->0) {
532:       y[4*(*idx)]   += alpha1*(*v);
533:       y[4*(*idx)+1] += alpha2*(*v);
534:       y[4*(*idx)+2] += alpha3*(*v);
535:       y[4*(*idx)+3] += alpha4*(*v);
536:       idx++; v++;
537:     }
538:   }
539:   PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
540:   VecRestoreArray(xx,&x);
541:   VecRestoreArray(yy,&y);
542:   return(0);
543: }

547: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
548: {
549:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
550:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
551:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
553:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
554:   PetscInt       n,i,jrow,j;

557:   if (yy != zz) {VecCopy(yy,zz);}
558:   VecGetArray(xx,&x);
559:   VecGetArray(zz,&y);
560:   idx  = a->j;
561:   v    = a->a;
562:   ii   = a->i;

564:   for (i=0; i<m; i++) {
565:     jrow = ii[i];
566:     n    = ii[i+1] - jrow;
567:     sum1  = 0.0;
568:     sum2  = 0.0;
569:     sum3  = 0.0;
570:     sum4  = 0.0;
571:     for (j=0; j<n; j++) {
572:       sum1 += v[jrow]*x[4*idx[jrow]];
573:       sum2 += v[jrow]*x[4*idx[jrow]+1];
574:       sum3 += v[jrow]*x[4*idx[jrow]+2];
575:       sum4 += v[jrow]*x[4*idx[jrow]+3];
576:       jrow++;
577:      }
578:     y[4*i]   += sum1;
579:     y[4*i+1] += sum2;
580:     y[4*i+2] += sum3;
581:     y[4*i+3] += sum4;
582:   }

584:   PetscLogFlops(8*a->nz - 4*m);
585:   VecRestoreArray(xx,&x);
586:   VecRestoreArray(zz,&y);
587:   return(0);
588: }
591: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
592: {
593:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
594:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
595:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
597:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

600:   if (yy != zz) {VecCopy(yy,zz);}
601:   VecGetArray(xx,&x);
602:   VecGetArray(zz,&y);
603: 
604:   for (i=0; i<m; i++) {
605:     idx    = a->j + a->i[i] ;
606:     v      = a->a + a->i[i] ;
607:     n      = a->i[i+1] - a->i[i];
608:     alpha1 = x[4*i];
609:     alpha2 = x[4*i+1];
610:     alpha3 = x[4*i+2];
611:     alpha4 = x[4*i+3];
612:     while (n-->0) {
613:       y[4*(*idx)]   += alpha1*(*v);
614:       y[4*(*idx)+1] += alpha2*(*v);
615:       y[4*(*idx)+2] += alpha3*(*v);
616:       y[4*(*idx)+3] += alpha4*(*v);
617:       idx++; v++;
618:     }
619:   }
620:   PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
621:   VecRestoreArray(xx,&x);
622:   VecRestoreArray(zz,&y);
623:   return(0);
624: }
625: /* ------------------------------------------------------------------------------*/

629: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
630: {
631:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
632:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
633:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
635:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
636:   PetscInt       n,i,jrow,j;

639:   VecGetArray(xx,&x);
640:   VecGetArray(yy,&y);
641:   idx  = a->j;
642:   v    = a->a;
643:   ii   = a->i;

645:   for (i=0; i<m; i++) {
646:     jrow = ii[i];
647:     n    = ii[i+1] - jrow;
648:     sum1  = 0.0;
649:     sum2  = 0.0;
650:     sum3  = 0.0;
651:     sum4  = 0.0;
652:     sum5  = 0.0;
653:     for (j=0; j<n; j++) {
654:       sum1 += v[jrow]*x[5*idx[jrow]];
655:       sum2 += v[jrow]*x[5*idx[jrow]+1];
656:       sum3 += v[jrow]*x[5*idx[jrow]+2];
657:       sum4 += v[jrow]*x[5*idx[jrow]+3];
658:       sum5 += v[jrow]*x[5*idx[jrow]+4];
659:       jrow++;
660:      }
661:     y[5*i]   = sum1;
662:     y[5*i+1] = sum2;
663:     y[5*i+2] = sum3;
664:     y[5*i+3] = sum4;
665:     y[5*i+4] = sum5;
666:   }

668:   PetscLogFlops(10*a->nz - 5*m);
669:   VecRestoreArray(xx,&x);
670:   VecRestoreArray(yy,&y);
671:   return(0);
672: }

676: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
677: {
678:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
679:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
680:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
682:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

685:   VecSet(yy,zero);
686:   VecGetArray(xx,&x);
687:   VecGetArray(yy,&y);
688: 
689:   for (i=0; i<m; i++) {
690:     idx    = a->j + a->i[i] ;
691:     v      = a->a + a->i[i] ;
692:     n      = a->i[i+1] - a->i[i];
693:     alpha1 = x[5*i];
694:     alpha2 = x[5*i+1];
695:     alpha3 = x[5*i+2];
696:     alpha4 = x[5*i+3];
697:     alpha5 = x[5*i+4];
698:     while (n-->0) {
699:       y[5*(*idx)]   += alpha1*(*v);
700:       y[5*(*idx)+1] += alpha2*(*v);
701:       y[5*(*idx)+2] += alpha3*(*v);
702:       y[5*(*idx)+3] += alpha4*(*v);
703:       y[5*(*idx)+4] += alpha5*(*v);
704:       idx++; v++;
705:     }
706:   }
707:   PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);
708:   VecRestoreArray(xx,&x);
709:   VecRestoreArray(yy,&y);
710:   return(0);
711: }

715: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
716: {
717:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
718:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
719:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
721:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
722:   PetscInt       n,i,jrow,j;

725:   if (yy != zz) {VecCopy(yy,zz);}
726:   VecGetArray(xx,&x);
727:   VecGetArray(zz,&y);
728:   idx  = a->j;
729:   v    = a->a;
730:   ii   = a->i;

732:   for (i=0; i<m; i++) {
733:     jrow = ii[i];
734:     n    = ii[i+1] - jrow;
735:     sum1  = 0.0;
736:     sum2  = 0.0;
737:     sum3  = 0.0;
738:     sum4  = 0.0;
739:     sum5  = 0.0;
740:     for (j=0; j<n; j++) {
741:       sum1 += v[jrow]*x[5*idx[jrow]];
742:       sum2 += v[jrow]*x[5*idx[jrow]+1];
743:       sum3 += v[jrow]*x[5*idx[jrow]+2];
744:       sum4 += v[jrow]*x[5*idx[jrow]+3];
745:       sum5 += v[jrow]*x[5*idx[jrow]+4];
746:       jrow++;
747:      }
748:     y[5*i]   += sum1;
749:     y[5*i+1] += sum2;
750:     y[5*i+2] += sum3;
751:     y[5*i+3] += sum4;
752:     y[5*i+4] += sum5;
753:   }

755:   PetscLogFlops(10*a->nz);
756:   VecRestoreArray(xx,&x);
757:   VecRestoreArray(zz,&y);
758:   return(0);
759: }

763: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
764: {
765:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
766:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
767:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
769:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

772:   if (yy != zz) {VecCopy(yy,zz);}
773:   VecGetArray(xx,&x);
774:   VecGetArray(zz,&y);
775: 
776:   for (i=0; i<m; i++) {
777:     idx    = a->j + a->i[i] ;
778:     v      = a->a + a->i[i] ;
779:     n      = a->i[i+1] - a->i[i];
780:     alpha1 = x[5*i];
781:     alpha2 = x[5*i+1];
782:     alpha3 = x[5*i+2];
783:     alpha4 = x[5*i+3];
784:     alpha5 = x[5*i+4];
785:     while (n-->0) {
786:       y[5*(*idx)]   += alpha1*(*v);
787:       y[5*(*idx)+1] += alpha2*(*v);
788:       y[5*(*idx)+2] += alpha3*(*v);
789:       y[5*(*idx)+3] += alpha4*(*v);
790:       y[5*(*idx)+4] += alpha5*(*v);
791:       idx++; v++;
792:     }
793:   }
794:   PetscLogFlops(10*a->nz);
795:   VecRestoreArray(xx,&x);
796:   VecRestoreArray(zz,&y);
797:   return(0);
798: }

800: /* ------------------------------------------------------------------------------*/
803: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
804: {
805:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
806:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
807:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
809:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
810:   PetscInt       n,i,jrow,j;

813:   VecGetArray(xx,&x);
814:   VecGetArray(yy,&y);
815:   idx  = a->j;
816:   v    = a->a;
817:   ii   = a->i;

819:   for (i=0; i<m; i++) {
820:     jrow = ii[i];
821:     n    = ii[i+1] - jrow;
822:     sum1  = 0.0;
823:     sum2  = 0.0;
824:     sum3  = 0.0;
825:     sum4  = 0.0;
826:     sum5  = 0.0;
827:     sum6  = 0.0;
828:     for (j=0; j<n; j++) {
829:       sum1 += v[jrow]*x[6*idx[jrow]];
830:       sum2 += v[jrow]*x[6*idx[jrow]+1];
831:       sum3 += v[jrow]*x[6*idx[jrow]+2];
832:       sum4 += v[jrow]*x[6*idx[jrow]+3];
833:       sum5 += v[jrow]*x[6*idx[jrow]+4];
834:       sum6 += v[jrow]*x[6*idx[jrow]+5];
835:       jrow++;
836:      }
837:     y[6*i]   = sum1;
838:     y[6*i+1] = sum2;
839:     y[6*i+2] = sum3;
840:     y[6*i+3] = sum4;
841:     y[6*i+4] = sum5;
842:     y[6*i+5] = sum6;
843:   }

845:   PetscLogFlops(12*a->nz - 6*m);
846:   VecRestoreArray(xx,&x);
847:   VecRestoreArray(yy,&y);
848:   return(0);
849: }

853: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
854: {
855:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
856:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
857:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
859:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

862:   VecSet(yy,zero);
863:   VecGetArray(xx,&x);
864:   VecGetArray(yy,&y);

866:   for (i=0; i<m; i++) {
867:     idx    = a->j + a->i[i] ;
868:     v      = a->a + a->i[i] ;
869:     n      = a->i[i+1] - a->i[i];
870:     alpha1 = x[6*i];
871:     alpha2 = x[6*i+1];
872:     alpha3 = x[6*i+2];
873:     alpha4 = x[6*i+3];
874:     alpha5 = x[6*i+4];
875:     alpha6 = x[6*i+5];
876:     while (n-->0) {
877:       y[6*(*idx)]   += alpha1*(*v);
878:       y[6*(*idx)+1] += alpha2*(*v);
879:       y[6*(*idx)+2] += alpha3*(*v);
880:       y[6*(*idx)+3] += alpha4*(*v);
881:       y[6*(*idx)+4] += alpha5*(*v);
882:       y[6*(*idx)+5] += alpha6*(*v);
883:       idx++; v++;
884:     }
885:   }
886:   PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);
887:   VecRestoreArray(xx,&x);
888:   VecRestoreArray(yy,&y);
889:   return(0);
890: }

894: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
895: {
896:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
897:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
898:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
900:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
901:   PetscInt       n,i,jrow,j;

904:   if (yy != zz) {VecCopy(yy,zz);}
905:   VecGetArray(xx,&x);
906:   VecGetArray(zz,&y);
907:   idx  = a->j;
908:   v    = a->a;
909:   ii   = a->i;

911:   for (i=0; i<m; i++) {
912:     jrow = ii[i];
913:     n    = ii[i+1] - jrow;
914:     sum1  = 0.0;
915:     sum2  = 0.0;
916:     sum3  = 0.0;
917:     sum4  = 0.0;
918:     sum5  = 0.0;
919:     sum6  = 0.0;
920:     for (j=0; j<n; j++) {
921:       sum1 += v[jrow]*x[6*idx[jrow]];
922:       sum2 += v[jrow]*x[6*idx[jrow]+1];
923:       sum3 += v[jrow]*x[6*idx[jrow]+2];
924:       sum4 += v[jrow]*x[6*idx[jrow]+3];
925:       sum5 += v[jrow]*x[6*idx[jrow]+4];
926:       sum6 += v[jrow]*x[6*idx[jrow]+5];
927:       jrow++;
928:      }
929:     y[6*i]   += sum1;
930:     y[6*i+1] += sum2;
931:     y[6*i+2] += sum3;
932:     y[6*i+3] += sum4;
933:     y[6*i+4] += sum5;
934:     y[6*i+5] += sum6;
935:   }

937:   PetscLogFlops(12*a->nz);
938:   VecRestoreArray(xx,&x);
939:   VecRestoreArray(zz,&y);
940:   return(0);
941: }

945: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
946: {
947:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
948:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
949:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
951:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

954:   if (yy != zz) {VecCopy(yy,zz);}
955:   VecGetArray(xx,&x);
956:   VecGetArray(zz,&y);
957: 
958:   for (i=0; i<m; i++) {
959:     idx    = a->j + a->i[i] ;
960:     v      = a->a + a->i[i] ;
961:     n      = a->i[i+1] - a->i[i];
962:     alpha1 = x[6*i];
963:     alpha2 = x[6*i+1];
964:     alpha3 = x[6*i+2];
965:     alpha4 = x[6*i+3];
966:     alpha5 = x[6*i+4];
967:     alpha6 = x[6*i+5];
968:     while (n-->0) {
969:       y[6*(*idx)]   += alpha1*(*v);
970:       y[6*(*idx)+1] += alpha2*(*v);
971:       y[6*(*idx)+2] += alpha3*(*v);
972:       y[6*(*idx)+3] += alpha4*(*v);
973:       y[6*(*idx)+4] += alpha5*(*v);
974:       y[6*(*idx)+5] += alpha6*(*v);
975:       idx++; v++;
976:     }
977:   }
978:   PetscLogFlops(12*a->nz);
979:   VecRestoreArray(xx,&x);
980:   VecRestoreArray(zz,&y);
981:   return(0);
982: }

984: /* ------------------------------------------------------------------------------*/
987: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
988: {
989:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
990:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
991:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
993:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
994:   PetscInt       n,i,jrow,j;

997:   VecGetArray(xx,&x);
998:   VecGetArray(yy,&y);
999:   idx  = a->j;
1000:   v    = a->a;
1001:   ii   = a->i;

1003:   for (i=0; i<m; i++) {
1004:     jrow = ii[i];
1005:     n    = ii[i+1] - jrow;
1006:     sum1  = 0.0;
1007:     sum2  = 0.0;
1008:     sum3  = 0.0;
1009:     sum4  = 0.0;
1010:     sum5  = 0.0;
1011:     sum6  = 0.0;
1012:     sum7  = 0.0;
1013:     for (j=0; j<n; j++) {
1014:       sum1 += v[jrow]*x[7*idx[jrow]];
1015:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1016:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1017:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1018:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1019:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1020:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1021:       jrow++;
1022:      }
1023:     y[7*i]   = sum1;
1024:     y[7*i+1] = sum2;
1025:     y[7*i+2] = sum3;
1026:     y[7*i+3] = sum4;
1027:     y[7*i+4] = sum5;
1028:     y[7*i+5] = sum6;
1029:     y[7*i+6] = sum7;
1030:   }

1032:   PetscLogFlops(14*a->nz - 7*m);
1033:   VecRestoreArray(xx,&x);
1034:   VecRestoreArray(yy,&y);
1035:   return(0);
1036: }

1040: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1041: {
1042:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1043:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1044:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1046:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1049:   VecSet(yy,zero);
1050:   VecGetArray(xx,&x);
1051:   VecGetArray(yy,&y);

1053:   for (i=0; i<m; i++) {
1054:     idx    = a->j + a->i[i] ;
1055:     v      = a->a + a->i[i] ;
1056:     n      = a->i[i+1] - a->i[i];
1057:     alpha1 = x[7*i];
1058:     alpha2 = x[7*i+1];
1059:     alpha3 = x[7*i+2];
1060:     alpha4 = x[7*i+3];
1061:     alpha5 = x[7*i+4];
1062:     alpha6 = x[7*i+5];
1063:     alpha7 = x[7*i+6];
1064:     while (n-->0) {
1065:       y[7*(*idx)]   += alpha1*(*v);
1066:       y[7*(*idx)+1] += alpha2*(*v);
1067:       y[7*(*idx)+2] += alpha3*(*v);
1068:       y[7*(*idx)+3] += alpha4*(*v);
1069:       y[7*(*idx)+4] += alpha5*(*v);
1070:       y[7*(*idx)+5] += alpha6*(*v);
1071:       y[7*(*idx)+6] += alpha7*(*v);
1072:       idx++; v++;
1073:     }
1074:   }
1075:   PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);
1076:   VecRestoreArray(xx,&x);
1077:   VecRestoreArray(yy,&y);
1078:   return(0);
1079: }

1083: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1084: {
1085:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1086:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1087:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1089:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1090:   PetscInt       n,i,jrow,j;

1093:   if (yy != zz) {VecCopy(yy,zz);}
1094:   VecGetArray(xx,&x);
1095:   VecGetArray(zz,&y);
1096:   idx  = a->j;
1097:   v    = a->a;
1098:   ii   = a->i;

1100:   for (i=0; i<m; i++) {
1101:     jrow = ii[i];
1102:     n    = ii[i+1] - jrow;
1103:     sum1  = 0.0;
1104:     sum2  = 0.0;
1105:     sum3  = 0.0;
1106:     sum4  = 0.0;
1107:     sum5  = 0.0;
1108:     sum6  = 0.0;
1109:     sum7  = 0.0;
1110:     for (j=0; j<n; j++) {
1111:       sum1 += v[jrow]*x[7*idx[jrow]];
1112:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1113:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1114:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1115:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1116:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1117:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1118:       jrow++;
1119:      }
1120:     y[7*i]   += sum1;
1121:     y[7*i+1] += sum2;
1122:     y[7*i+2] += sum3;
1123:     y[7*i+3] += sum4;
1124:     y[7*i+4] += sum5;
1125:     y[7*i+5] += sum6;
1126:     y[7*i+6] += sum7;
1127:   }

1129:   PetscLogFlops(14*a->nz);
1130:   VecRestoreArray(xx,&x);
1131:   VecRestoreArray(zz,&y);
1132:   return(0);
1133: }

1137: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1138: {
1139:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1140:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1141:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1143:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1146:   if (yy != zz) {VecCopy(yy,zz);}
1147:   VecGetArray(xx,&x);
1148:   VecGetArray(zz,&y);
1149:   for (i=0; i<m; i++) {
1150:     idx    = a->j + a->i[i] ;
1151:     v      = a->a + a->i[i] ;
1152:     n      = a->i[i+1] - a->i[i];
1153:     alpha1 = x[7*i];
1154:     alpha2 = x[7*i+1];
1155:     alpha3 = x[7*i+2];
1156:     alpha4 = x[7*i+3];
1157:     alpha5 = x[7*i+4];
1158:     alpha6 = x[7*i+5];
1159:     alpha7 = x[7*i+6];
1160:     while (n-->0) {
1161:       y[7*(*idx)]   += alpha1*(*v);
1162:       y[7*(*idx)+1] += alpha2*(*v);
1163:       y[7*(*idx)+2] += alpha3*(*v);
1164:       y[7*(*idx)+3] += alpha4*(*v);
1165:       y[7*(*idx)+4] += alpha5*(*v);
1166:       y[7*(*idx)+5] += alpha6*(*v);
1167:       y[7*(*idx)+6] += alpha7*(*v);
1168:       idx++; v++;
1169:     }
1170:   }
1171:   PetscLogFlops(14*a->nz);
1172:   VecRestoreArray(xx,&x);
1173:   VecRestoreArray(zz,&y);
1174:   return(0);
1175: }

1179: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1180: {
1181:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1182:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1183:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1185:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1186:   PetscInt       n,i,jrow,j;

1189:   VecGetArray(xx,&x);
1190:   VecGetArray(yy,&y);
1191:   idx  = a->j;
1192:   v    = a->a;
1193:   ii   = a->i;

1195:   for (i=0; i<m; i++) {
1196:     jrow = ii[i];
1197:     n    = ii[i+1] - jrow;
1198:     sum1  = 0.0;
1199:     sum2  = 0.0;
1200:     sum3  = 0.0;
1201:     sum4  = 0.0;
1202:     sum5  = 0.0;
1203:     sum6  = 0.0;
1204:     sum7  = 0.0;
1205:     sum8  = 0.0;
1206:     for (j=0; j<n; j++) {
1207:       sum1 += v[jrow]*x[8*idx[jrow]];
1208:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1209:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1210:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1211:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1212:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1213:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1214:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1215:       jrow++;
1216:      }
1217:     y[8*i]   = sum1;
1218:     y[8*i+1] = sum2;
1219:     y[8*i+2] = sum3;
1220:     y[8*i+3] = sum4;
1221:     y[8*i+4] = sum5;
1222:     y[8*i+5] = sum6;
1223:     y[8*i+6] = sum7;
1224:     y[8*i+7] = sum8;
1225:   }

1227:   PetscLogFlops(16*a->nz - 8*m);
1228:   VecRestoreArray(xx,&x);
1229:   VecRestoreArray(yy,&y);
1230:   return(0);
1231: }

1235: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1236: {
1237:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1238:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1239:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1241:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1244:   VecSet(yy,zero);
1245:   VecGetArray(xx,&x);
1246:   VecGetArray(yy,&y);

1248:   for (i=0; i<m; i++) {
1249:     idx    = a->j + a->i[i] ;
1250:     v      = a->a + a->i[i] ;
1251:     n      = a->i[i+1] - a->i[i];
1252:     alpha1 = x[8*i];
1253:     alpha2 = x[8*i+1];
1254:     alpha3 = x[8*i+2];
1255:     alpha4 = x[8*i+3];
1256:     alpha5 = x[8*i+4];
1257:     alpha6 = x[8*i+5];
1258:     alpha7 = x[8*i+6];
1259:     alpha8 = x[8*i+7];
1260:     while (n-->0) {
1261:       y[8*(*idx)]   += alpha1*(*v);
1262:       y[8*(*idx)+1] += alpha2*(*v);
1263:       y[8*(*idx)+2] += alpha3*(*v);
1264:       y[8*(*idx)+3] += alpha4*(*v);
1265:       y[8*(*idx)+4] += alpha5*(*v);
1266:       y[8*(*idx)+5] += alpha6*(*v);
1267:       y[8*(*idx)+6] += alpha7*(*v);
1268:       y[8*(*idx)+7] += alpha8*(*v);
1269:       idx++; v++;
1270:     }
1271:   }
1272:   PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);
1273:   VecRestoreArray(xx,&x);
1274:   VecRestoreArray(yy,&y);
1275:   return(0);
1276: }

1280: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1281: {
1282:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1283:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1284:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1286:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1287:   PetscInt       n,i,jrow,j;

1290:   if (yy != zz) {VecCopy(yy,zz);}
1291:   VecGetArray(xx,&x);
1292:   VecGetArray(zz,&y);
1293:   idx  = a->j;
1294:   v    = a->a;
1295:   ii   = a->i;

1297:   for (i=0; i<m; i++) {
1298:     jrow = ii[i];
1299:     n    = ii[i+1] - jrow;
1300:     sum1  = 0.0;
1301:     sum2  = 0.0;
1302:     sum3  = 0.0;
1303:     sum4  = 0.0;
1304:     sum5  = 0.0;
1305:     sum6  = 0.0;
1306:     sum7  = 0.0;
1307:     sum8  = 0.0;
1308:     for (j=0; j<n; j++) {
1309:       sum1 += v[jrow]*x[8*idx[jrow]];
1310:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1311:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1312:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1313:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1314:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1315:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1316:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1317:       jrow++;
1318:      }
1319:     y[8*i]   += sum1;
1320:     y[8*i+1] += sum2;
1321:     y[8*i+2] += sum3;
1322:     y[8*i+3] += sum4;
1323:     y[8*i+4] += sum5;
1324:     y[8*i+5] += sum6;
1325:     y[8*i+6] += sum7;
1326:     y[8*i+7] += sum8;
1327:   }

1329:   PetscLogFlops(16*a->nz);
1330:   VecRestoreArray(xx,&x);
1331:   VecRestoreArray(zz,&y);
1332:   return(0);
1333: }

1337: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1338: {
1339:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1340:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1341:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1343:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1346:   if (yy != zz) {VecCopy(yy,zz);}
1347:   VecGetArray(xx,&x);
1348:   VecGetArray(zz,&y);
1349:   for (i=0; i<m; i++) {
1350:     idx    = a->j + a->i[i] ;
1351:     v      = a->a + a->i[i] ;
1352:     n      = a->i[i+1] - a->i[i];
1353:     alpha1 = x[8*i];
1354:     alpha2 = x[8*i+1];
1355:     alpha3 = x[8*i+2];
1356:     alpha4 = x[8*i+3];
1357:     alpha5 = x[8*i+4];
1358:     alpha6 = x[8*i+5];
1359:     alpha7 = x[8*i+6];
1360:     alpha8 = x[8*i+7];
1361:     while (n-->0) {
1362:       y[8*(*idx)]   += alpha1*(*v);
1363:       y[8*(*idx)+1] += alpha2*(*v);
1364:       y[8*(*idx)+2] += alpha3*(*v);
1365:       y[8*(*idx)+3] += alpha4*(*v);
1366:       y[8*(*idx)+4] += alpha5*(*v);
1367:       y[8*(*idx)+5] += alpha6*(*v);
1368:       y[8*(*idx)+6] += alpha7*(*v);
1369:       y[8*(*idx)+7] += alpha8*(*v);
1370:       idx++; v++;
1371:     }
1372:   }
1373:   PetscLogFlops(16*a->nz);
1374:   VecRestoreArray(xx,&x);
1375:   VecRestoreArray(zz,&y);
1376:   return(0);
1377: }

1379: /* ------------------------------------------------------------------------------*/
1382: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1383: {
1384:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1385:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1386:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1388:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1389:   PetscInt       n,i,jrow,j;

1392:   VecGetArray(xx,&x);
1393:   VecGetArray(yy,&y);
1394:   idx  = a->j;
1395:   v    = a->a;
1396:   ii   = a->i;

1398:   for (i=0; i<m; i++) {
1399:     jrow = ii[i];
1400:     n    = ii[i+1] - jrow;
1401:     sum1  = 0.0;
1402:     sum2  = 0.0;
1403:     sum3  = 0.0;
1404:     sum4  = 0.0;
1405:     sum5  = 0.0;
1406:     sum6  = 0.0;
1407:     sum7  = 0.0;
1408:     sum8  = 0.0;
1409:     sum9  = 0.0;
1410:     for (j=0; j<n; j++) {
1411:       sum1 += v[jrow]*x[9*idx[jrow]];
1412:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1413:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1414:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1415:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1416:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1417:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1418:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1419:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1420:       jrow++;
1421:      }
1422:     y[9*i]   = sum1;
1423:     y[9*i+1] = sum2;
1424:     y[9*i+2] = sum3;
1425:     y[9*i+3] = sum4;
1426:     y[9*i+4] = sum5;
1427:     y[9*i+5] = sum6;
1428:     y[9*i+6] = sum7;
1429:     y[9*i+7] = sum8;
1430:     y[9*i+8] = sum9;
1431:   }

1433:   PetscLogFlops(18*a->nz - 9*m);
1434:   VecRestoreArray(xx,&x);
1435:   VecRestoreArray(yy,&y);
1436:   return(0);
1437: }

1439: /* ------------------------------------------------------------------------------*/

1443: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1444: {
1445:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1446:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1447:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1449:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1452:   VecSet(yy,zero);
1453:   VecGetArray(xx,&x);
1454:   VecGetArray(yy,&y);

1456:   for (i=0; i<m; i++) {
1457:     idx    = a->j + a->i[i] ;
1458:     v      = a->a + a->i[i] ;
1459:     n      = a->i[i+1] - a->i[i];
1460:     alpha1 = x[9*i];
1461:     alpha2 = x[9*i+1];
1462:     alpha3 = x[9*i+2];
1463:     alpha4 = x[9*i+3];
1464:     alpha5 = x[9*i+4];
1465:     alpha6 = x[9*i+5];
1466:     alpha7 = x[9*i+6];
1467:     alpha8 = x[9*i+7];
1468:     alpha9 = x[9*i+8];
1469:     while (n-->0) {
1470:       y[9*(*idx)]   += alpha1*(*v);
1471:       y[9*(*idx)+1] += alpha2*(*v);
1472:       y[9*(*idx)+2] += alpha3*(*v);
1473:       y[9*(*idx)+3] += alpha4*(*v);
1474:       y[9*(*idx)+4] += alpha5*(*v);
1475:       y[9*(*idx)+5] += alpha6*(*v);
1476:       y[9*(*idx)+6] += alpha7*(*v);
1477:       y[9*(*idx)+7] += alpha8*(*v);
1478:       y[9*(*idx)+8] += alpha9*(*v);
1479:       idx++; v++;
1480:     }
1481:   }
1482:   PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);
1483:   VecRestoreArray(xx,&x);
1484:   VecRestoreArray(yy,&y);
1485:   return(0);
1486: }

1490: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1491: {
1492:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1493:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1494:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1496:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1497:   PetscInt       n,i,jrow,j;

1500:   if (yy != zz) {VecCopy(yy,zz);}
1501:   VecGetArray(xx,&x);
1502:   VecGetArray(zz,&y);
1503:   idx  = a->j;
1504:   v    = a->a;
1505:   ii   = a->i;

1507:   for (i=0; i<m; i++) {
1508:     jrow = ii[i];
1509:     n    = ii[i+1] - jrow;
1510:     sum1  = 0.0;
1511:     sum2  = 0.0;
1512:     sum3  = 0.0;
1513:     sum4  = 0.0;
1514:     sum5  = 0.0;
1515:     sum6  = 0.0;
1516:     sum7  = 0.0;
1517:     sum8  = 0.0;
1518:     sum9  = 0.0;
1519:     for (j=0; j<n; j++) {
1520:       sum1 += v[jrow]*x[9*idx[jrow]];
1521:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1522:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1523:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1524:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1525:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1526:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1527:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1528:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1529:       jrow++;
1530:      }
1531:     y[9*i]   += sum1;
1532:     y[9*i+1] += sum2;
1533:     y[9*i+2] += sum3;
1534:     y[9*i+3] += sum4;
1535:     y[9*i+4] += sum5;
1536:     y[9*i+5] += sum6;
1537:     y[9*i+6] += sum7;
1538:     y[9*i+7] += sum8;
1539:     y[9*i+8] += sum9;
1540:   }

1542:   PetscLogFlops(18*a->nz);
1543:   VecRestoreArray(xx,&x);
1544:   VecRestoreArray(zz,&y);
1545:   return(0);
1546: }

1550: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1551: {
1552:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1553:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1554:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1556:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1559:   if (yy != zz) {VecCopy(yy,zz);}
1560:   VecGetArray(xx,&x);
1561:   VecGetArray(zz,&y);
1562:   for (i=0; i<m; i++) {
1563:     idx    = a->j + a->i[i] ;
1564:     v      = a->a + a->i[i] ;
1565:     n      = a->i[i+1] - a->i[i];
1566:     alpha1 = x[9*i];
1567:     alpha2 = x[9*i+1];
1568:     alpha3 = x[9*i+2];
1569:     alpha4 = x[9*i+3];
1570:     alpha5 = x[9*i+4];
1571:     alpha6 = x[9*i+5];
1572:     alpha7 = x[9*i+6];
1573:     alpha8 = x[9*i+7];
1574:     alpha9 = x[9*i+8];
1575:     while (n-->0) {
1576:       y[9*(*idx)]   += alpha1*(*v);
1577:       y[9*(*idx)+1] += alpha2*(*v);
1578:       y[9*(*idx)+2] += alpha3*(*v);
1579:       y[9*(*idx)+3] += alpha4*(*v);
1580:       y[9*(*idx)+4] += alpha5*(*v);
1581:       y[9*(*idx)+5] += alpha6*(*v);
1582:       y[9*(*idx)+6] += alpha7*(*v);
1583:       y[9*(*idx)+7] += alpha8*(*v);
1584:       y[9*(*idx)+8] += alpha9*(*v);
1585:       idx++; v++;
1586:     }
1587:   }
1588:   PetscLogFlops(18*a->nz);
1589:   VecRestoreArray(xx,&x);
1590:   VecRestoreArray(zz,&y);
1591:   return(0);
1592: }
1593: /*--------------------------------------------------------------------------------------------*/
1596: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1597: {
1598:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1599:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1600:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1602:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1603:   PetscInt       n,i,jrow,j;

1606:   VecGetArray(xx,&x);
1607:   VecGetArray(yy,&y);
1608:   idx  = a->j;
1609:   v    = a->a;
1610:   ii   = a->i;

1612:   for (i=0; i<m; i++) {
1613:     jrow = ii[i];
1614:     n    = ii[i+1] - jrow;
1615:     sum1  = 0.0;
1616:     sum2  = 0.0;
1617:     sum3  = 0.0;
1618:     sum4  = 0.0;
1619:     sum5  = 0.0;
1620:     sum6  = 0.0;
1621:     sum7  = 0.0;
1622:     sum8  = 0.0;
1623:     sum9  = 0.0;
1624:     sum10 = 0.0;
1625:     for (j=0; j<n; j++) {
1626:       sum1  += v[jrow]*x[10*idx[jrow]];
1627:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1628:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1629:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1630:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1631:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1632:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1633:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1634:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1635:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1636:       jrow++;
1637:      }
1638:     y[10*i]   = sum1;
1639:     y[10*i+1] = sum2;
1640:     y[10*i+2] = sum3;
1641:     y[10*i+3] = sum4;
1642:     y[10*i+4] = sum5;
1643:     y[10*i+5] = sum6;
1644:     y[10*i+6] = sum7;
1645:     y[10*i+7] = sum8;
1646:     y[10*i+8] = sum9;
1647:     y[10*i+9] = sum10;
1648:   }

1650:   PetscLogFlops(20*a->nz - 10*m);
1651:   VecRestoreArray(xx,&x);
1652:   VecRestoreArray(yy,&y);
1653:   return(0);
1654: }

1658: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1659: {
1660:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1661:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1662:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1664:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1665:   PetscInt       n,i,jrow,j;

1668:   if (yy != zz) {VecCopy(yy,zz);}
1669:   VecGetArray(xx,&x);
1670:   VecGetArray(zz,&y);
1671:   idx  = a->j;
1672:   v    = a->a;
1673:   ii   = a->i;

1675:   for (i=0; i<m; i++) {
1676:     jrow = ii[i];
1677:     n    = ii[i+1] - jrow;
1678:     sum1  = 0.0;
1679:     sum2  = 0.0;
1680:     sum3  = 0.0;
1681:     sum4  = 0.0;
1682:     sum5  = 0.0;
1683:     sum6  = 0.0;
1684:     sum7  = 0.0;
1685:     sum8  = 0.0;
1686:     sum9  = 0.0;
1687:     sum10 = 0.0;
1688:     for (j=0; j<n; j++) {
1689:       sum1  += v[jrow]*x[10*idx[jrow]];
1690:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1691:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1692:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1693:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1694:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1695:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1696:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1697:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1698:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1699:       jrow++;
1700:      }
1701:     y[10*i]   += sum1;
1702:     y[10*i+1] += sum2;
1703:     y[10*i+2] += sum3;
1704:     y[10*i+3] += sum4;
1705:     y[10*i+4] += sum5;
1706:     y[10*i+5] += sum6;
1707:     y[10*i+6] += sum7;
1708:     y[10*i+7] += sum8;
1709:     y[10*i+8] += sum9;
1710:     y[10*i+9] += sum10;
1711:   }

1713:   PetscLogFlops(20*a->nz - 10*m);
1714:   VecRestoreArray(xx,&x);
1715:   VecRestoreArray(yy,&y);
1716:   return(0);
1717: }

1721: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1722: {
1723:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1724:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1725:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1727:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1730:   VecSet(yy,zero);
1731:   VecGetArray(xx,&x);
1732:   VecGetArray(yy,&y);

1734:   for (i=0; i<m; i++) {
1735:     idx    = a->j + a->i[i] ;
1736:     v      = a->a + a->i[i] ;
1737:     n      = a->i[i+1] - a->i[i];
1738:     alpha1 = x[10*i];
1739:     alpha2 = x[10*i+1];
1740:     alpha3 = x[10*i+2];
1741:     alpha4 = x[10*i+3];
1742:     alpha5 = x[10*i+4];
1743:     alpha6 = x[10*i+5];
1744:     alpha7 = x[10*i+6];
1745:     alpha8 = x[10*i+7];
1746:     alpha9 = x[10*i+8];
1747:     alpha10 = x[10*i+9];
1748:     while (n-->0) {
1749:       y[10*(*idx)]   += alpha1*(*v);
1750:       y[10*(*idx)+1] += alpha2*(*v);
1751:       y[10*(*idx)+2] += alpha3*(*v);
1752:       y[10*(*idx)+3] += alpha4*(*v);
1753:       y[10*(*idx)+4] += alpha5*(*v);
1754:       y[10*(*idx)+5] += alpha6*(*v);
1755:       y[10*(*idx)+6] += alpha7*(*v);
1756:       y[10*(*idx)+7] += alpha8*(*v);
1757:       y[10*(*idx)+8] += alpha9*(*v);
1758:       y[10*(*idx)+9] += alpha10*(*v);
1759:       idx++; v++;
1760:     }
1761:   }
1762:   PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);
1763:   VecRestoreArray(xx,&x);
1764:   VecRestoreArray(yy,&y);
1765:   return(0);
1766: }

1770: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1771: {
1772:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1773:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1774:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1776:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1779:   if (yy != zz) {VecCopy(yy,zz);}
1780:   VecGetArray(xx,&x);
1781:   VecGetArray(zz,&y);
1782:   for (i=0; i<m; i++) {
1783:     idx    = a->j + a->i[i] ;
1784:     v      = a->a + a->i[i] ;
1785:     n      = a->i[i+1] - a->i[i];
1786:     alpha1 = x[10*i];
1787:     alpha2 = x[10*i+1];
1788:     alpha3 = x[10*i+2];
1789:     alpha4 = x[10*i+3];
1790:     alpha5 = x[10*i+4];
1791:     alpha6 = x[10*i+5];
1792:     alpha7 = x[10*i+6];
1793:     alpha8 = x[10*i+7];
1794:     alpha9 = x[10*i+8];
1795:     alpha10 = x[10*i+9];
1796:     while (n-->0) {
1797:       y[10*(*idx)]   += alpha1*(*v);
1798:       y[10*(*idx)+1] += alpha2*(*v);
1799:       y[10*(*idx)+2] += alpha3*(*v);
1800:       y[10*(*idx)+3] += alpha4*(*v);
1801:       y[10*(*idx)+4] += alpha5*(*v);
1802:       y[10*(*idx)+5] += alpha6*(*v);
1803:       y[10*(*idx)+6] += alpha7*(*v);
1804:       y[10*(*idx)+7] += alpha8*(*v);
1805:       y[10*(*idx)+8] += alpha9*(*v);
1806:       y[10*(*idx)+9] += alpha10*(*v);
1807:       idx++; v++;
1808:     }
1809:   }
1810:   PetscLogFlops(20*a->nz);
1811:   VecRestoreArray(xx,&x);
1812:   VecRestoreArray(zz,&y);
1813:   return(0);
1814: }


1817: /*--------------------------------------------------------------------------------------------*/
1820: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1821: {
1822:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1823:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1824:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1825:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1827:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1828:   PetscInt       n,i,jrow,j;

1831:   VecGetArray(xx,&x);
1832:   VecGetArray(yy,&y);
1833:   idx  = a->j;
1834:   v    = a->a;
1835:   ii   = a->i;

1837:   for (i=0; i<m; i++) {
1838:     jrow = ii[i];
1839:     n    = ii[i+1] - jrow;
1840:     sum1  = 0.0;
1841:     sum2  = 0.0;
1842:     sum3  = 0.0;
1843:     sum4  = 0.0;
1844:     sum5  = 0.0;
1845:     sum6  = 0.0;
1846:     sum7  = 0.0;
1847:     sum8  = 0.0;
1848:     sum9  = 0.0;
1849:     sum10 = 0.0;
1850:     sum11 = 0.0;
1851:     sum12 = 0.0;
1852:     sum13 = 0.0;
1853:     sum14 = 0.0;
1854:     sum15 = 0.0;
1855:     sum16 = 0.0;
1856:     for (j=0; j<n; j++) {
1857:       sum1  += v[jrow]*x[16*idx[jrow]];
1858:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1859:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1860:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1861:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1862:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1863:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1864:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1865:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1866:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1867:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1868:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1869:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1870:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1871:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1872:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1873:       jrow++;
1874:      }
1875:     y[16*i]    = sum1;
1876:     y[16*i+1]  = sum2;
1877:     y[16*i+2]  = sum3;
1878:     y[16*i+3]  = sum4;
1879:     y[16*i+4]  = sum5;
1880:     y[16*i+5]  = sum6;
1881:     y[16*i+6]  = sum7;
1882:     y[16*i+7]  = sum8;
1883:     y[16*i+8]  = sum9;
1884:     y[16*i+9]  = sum10;
1885:     y[16*i+10] = sum11;
1886:     y[16*i+11] = sum12;
1887:     y[16*i+12] = sum13;
1888:     y[16*i+13] = sum14;
1889:     y[16*i+14] = sum15;
1890:     y[16*i+15] = sum16;
1891:   }

1893:   PetscLogFlops(32*a->nz - 16*m);
1894:   VecRestoreArray(xx,&x);
1895:   VecRestoreArray(yy,&y);
1896:   return(0);
1897: }

1901: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1902: {
1903:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1904:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1905:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1906:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1908:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1911:   VecSet(yy,zero);
1912:   VecGetArray(xx,&x);
1913:   VecGetArray(yy,&y);

1915:   for (i=0; i<m; i++) {
1916:     idx    = a->j + a->i[i] ;
1917:     v      = a->a + a->i[i] ;
1918:     n      = a->i[i+1] - a->i[i];
1919:     alpha1  = x[16*i];
1920:     alpha2  = x[16*i+1];
1921:     alpha3  = x[16*i+2];
1922:     alpha4  = x[16*i+3];
1923:     alpha5  = x[16*i+4];
1924:     alpha6  = x[16*i+5];
1925:     alpha7  = x[16*i+6];
1926:     alpha8  = x[16*i+7];
1927:     alpha9  = x[16*i+8];
1928:     alpha10 = x[16*i+9];
1929:     alpha11 = x[16*i+10];
1930:     alpha12 = x[16*i+11];
1931:     alpha13 = x[16*i+12];
1932:     alpha14 = x[16*i+13];
1933:     alpha15 = x[16*i+14];
1934:     alpha16 = x[16*i+15];
1935:     while (n-->0) {
1936:       y[16*(*idx)]    += alpha1*(*v);
1937:       y[16*(*idx)+1]  += alpha2*(*v);
1938:       y[16*(*idx)+2]  += alpha3*(*v);
1939:       y[16*(*idx)+3]  += alpha4*(*v);
1940:       y[16*(*idx)+4]  += alpha5*(*v);
1941:       y[16*(*idx)+5]  += alpha6*(*v);
1942:       y[16*(*idx)+6]  += alpha7*(*v);
1943:       y[16*(*idx)+7]  += alpha8*(*v);
1944:       y[16*(*idx)+8]  += alpha9*(*v);
1945:       y[16*(*idx)+9]  += alpha10*(*v);
1946:       y[16*(*idx)+10] += alpha11*(*v);
1947:       y[16*(*idx)+11] += alpha12*(*v);
1948:       y[16*(*idx)+12] += alpha13*(*v);
1949:       y[16*(*idx)+13] += alpha14*(*v);
1950:       y[16*(*idx)+14] += alpha15*(*v);
1951:       y[16*(*idx)+15] += alpha16*(*v);
1952:       idx++; v++;
1953:     }
1954:   }
1955:   PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);
1956:   VecRestoreArray(xx,&x);
1957:   VecRestoreArray(yy,&y);
1958:   return(0);
1959: }

1963: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1964: {
1965:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1966:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1967:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1968:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1970:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1971:   PetscInt       n,i,jrow,j;

1974:   if (yy != zz) {VecCopy(yy,zz);}
1975:   VecGetArray(xx,&x);
1976:   VecGetArray(zz,&y);
1977:   idx  = a->j;
1978:   v    = a->a;
1979:   ii   = a->i;

1981:   for (i=0; i<m; i++) {
1982:     jrow = ii[i];
1983:     n    = ii[i+1] - jrow;
1984:     sum1  = 0.0;
1985:     sum2  = 0.0;
1986:     sum3  = 0.0;
1987:     sum4  = 0.0;
1988:     sum5  = 0.0;
1989:     sum6  = 0.0;
1990:     sum7  = 0.0;
1991:     sum8  = 0.0;
1992:     sum9  = 0.0;
1993:     sum10 = 0.0;
1994:     sum11 = 0.0;
1995:     sum12 = 0.0;
1996:     sum13 = 0.0;
1997:     sum14 = 0.0;
1998:     sum15 = 0.0;
1999:     sum16 = 0.0;
2000:     for (j=0; j<n; j++) {
2001:       sum1  += v[jrow]*x[16*idx[jrow]];
2002:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2003:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2004:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2005:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2006:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2007:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2008:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2009:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2010:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2011:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2012:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2013:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2014:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2015:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2016:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2017:       jrow++;
2018:      }
2019:     y[16*i]    += sum1;
2020:     y[16*i+1]  += sum2;
2021:     y[16*i+2]  += sum3;
2022:     y[16*i+3]  += sum4;
2023:     y[16*i+4]  += sum5;
2024:     y[16*i+5]  += sum6;
2025:     y[16*i+6]  += sum7;
2026:     y[16*i+7]  += sum8;
2027:     y[16*i+8]  += sum9;
2028:     y[16*i+9]  += sum10;
2029:     y[16*i+10] += sum11;
2030:     y[16*i+11] += sum12;
2031:     y[16*i+12] += sum13;
2032:     y[16*i+13] += sum14;
2033:     y[16*i+14] += sum15;
2034:     y[16*i+15] += sum16;
2035:   }

2037:   PetscLogFlops(32*a->nz);
2038:   VecRestoreArray(xx,&x);
2039:   VecRestoreArray(zz,&y);
2040:   return(0);
2041: }

2045: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2046: {
2047:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2048:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2049:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2050:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2052:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

2055:   if (yy != zz) {VecCopy(yy,zz);}
2056:   VecGetArray(xx,&x);
2057:   VecGetArray(zz,&y);
2058:   for (i=0; i<m; i++) {
2059:     idx    = a->j + a->i[i] ;
2060:     v      = a->a + a->i[i] ;
2061:     n      = a->i[i+1] - a->i[i];
2062:     alpha1 = x[16*i];
2063:     alpha2 = x[16*i+1];
2064:     alpha3 = x[16*i+2];
2065:     alpha4 = x[16*i+3];
2066:     alpha5 = x[16*i+4];
2067:     alpha6 = x[16*i+5];
2068:     alpha7 = x[16*i+6];
2069:     alpha8 = x[16*i+7];
2070:     alpha9  = x[16*i+8];
2071:     alpha10 = x[16*i+9];
2072:     alpha11 = x[16*i+10];
2073:     alpha12 = x[16*i+11];
2074:     alpha13 = x[16*i+12];
2075:     alpha14 = x[16*i+13];
2076:     alpha15 = x[16*i+14];
2077:     alpha16 = x[16*i+15];
2078:     while (n-->0) {
2079:       y[16*(*idx)]   += alpha1*(*v);
2080:       y[16*(*idx)+1] += alpha2*(*v);
2081:       y[16*(*idx)+2] += alpha3*(*v);
2082:       y[16*(*idx)+3] += alpha4*(*v);
2083:       y[16*(*idx)+4] += alpha5*(*v);
2084:       y[16*(*idx)+5] += alpha6*(*v);
2085:       y[16*(*idx)+6] += alpha7*(*v);
2086:       y[16*(*idx)+7] += alpha8*(*v);
2087:       y[16*(*idx)+8]  += alpha9*(*v);
2088:       y[16*(*idx)+9]  += alpha10*(*v);
2089:       y[16*(*idx)+10] += alpha11*(*v);
2090:       y[16*(*idx)+11] += alpha12*(*v);
2091:       y[16*(*idx)+12] += alpha13*(*v);
2092:       y[16*(*idx)+13] += alpha14*(*v);
2093:       y[16*(*idx)+14] += alpha15*(*v);
2094:       y[16*(*idx)+15] += alpha16*(*v);
2095:       idx++; v++;
2096:     }
2097:   }
2098:   PetscLogFlops(32*a->nz);
2099:   VecRestoreArray(xx,&x);
2100:   VecRestoreArray(zz,&y);
2101:   return(0);
2102: }

2104: /*===================================================================================*/
2107: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2108: {
2109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2113:   /* start the scatter */
2114:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2115:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2116:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2117:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2118:   return(0);
2119: }

2123: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2124: {
2125:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2129:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2130:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2131:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2132:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2133:   return(0);
2134: }

2138: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2139: {
2140:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2144:   /* start the scatter */
2145:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2146:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2147:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2148:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2149:   return(0);
2150: }

2154: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2155: {
2156:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2160:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2161:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2162:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2163:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2164:   return(0);
2165: }

2169: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2170: {
2171:   /* This routine requires testing -- but it's getting better. */
2172:   PetscErrorCode     ierr;
2173:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2174:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2175:   Mat                P=pp->AIJ;
2176:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2177:   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2178:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2179:   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2180:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2181:   MatScalar          *ca;

2184:   /* Start timer */

2187:   /* Get ij structure of P^T */
2188:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2190:   cn = pn*ppdof;
2191:   /* Allocate ci array, arrays for fill computation and */
2192:   /* free space for accumulating nonzero column info */
2193:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2194:   ci[0] = 0;

2196:   /* Work arrays for rows of P^T*A */
2197:   PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2198:   PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2199:   ptasparserow = ptadenserow  + an;
2200:   denserow     = ptasparserow + an;
2201:   sparserow    = denserow     + cn;

2203:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2204:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2205:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2206:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2207:   current_space = free_space;

2209:   /* Determine symbolic info for each row of C: */
2210:   for (i=0;i<pn;i++) {
2211:     ptnzi  = pti[i+1] - pti[i];
2212:     ptJ    = ptj + pti[i];
2213:     for (dof=0;dof<ppdof;dof++) {
2214:       ptanzi = 0;
2215:       /* Determine symbolic row of PtA: */
2216:       for (j=0;j<ptnzi;j++) {
2217:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2218:         arow = ptJ[j]*ppdof + dof;
2219:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2220:         anzj = ai[arow+1] - ai[arow];
2221:         ajj  = aj + ai[arow];
2222:         for (k=0;k<anzj;k++) {
2223:           if (!ptadenserow[ajj[k]]) {
2224:             ptadenserow[ajj[k]]    = -1;
2225:             ptasparserow[ptanzi++] = ajj[k];
2226:           }
2227:         }
2228:       }
2229:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2230:       ptaj = ptasparserow;
2231:       cnzi   = 0;
2232:       for (j=0;j<ptanzi;j++) {
2233:         /* Get offset within block of P */
2234:         pshift = *ptaj%ppdof;
2235:         /* Get block row of P */
2236:         prow = (*ptaj++)/ppdof; /* integer division */
2237:         /* P has same number of nonzeros per row as the compressed form */
2238:         pnzj = pi[prow+1] - pi[prow];
2239:         pjj  = pj + pi[prow];
2240:         for (k=0;k<pnzj;k++) {
2241:           /* Locations in C are shifted by the offset within the block */
2242:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2243:           if (!denserow[pjj[k]*ppdof+pshift]) {
2244:             denserow[pjj[k]*ppdof+pshift] = -1;
2245:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2246:           }
2247:         }
2248:       }

2250:       /* sort sparserow */
2251:       PetscSortInt(cnzi,sparserow);
2252: 
2253:       /* If free space is not available, make more free space */
2254:       /* Double the amount of total space in the list */
2255:       if (current_space->local_remaining<cnzi) {
2256:         PetscFreeSpaceGet(current_space->total_array_size,&current_space);
2257:       }

2259:       /* Copy data into free space, and zero out denserows */
2260:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2261:       current_space->array           += cnzi;
2262:       current_space->local_used      += cnzi;
2263:       current_space->local_remaining -= cnzi;

2265:       for (j=0;j<ptanzi;j++) {
2266:         ptadenserow[ptasparserow[j]] = 0;
2267:       }
2268:       for (j=0;j<cnzi;j++) {
2269:         denserow[sparserow[j]] = 0;
2270:       }
2271:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2272:       /*        For now, we will recompute what is needed. */
2273:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2274:     }
2275:   }
2276:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2277:   /* Allocate space for cj, initialize cj, and */
2278:   /* destroy list of free space and other temporary array(s) */
2279:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2280:   PetscFreeSpaceContiguous(&free_space,cj);
2281:   PetscFree(ptadenserow);
2282: 
2283:   /* Allocate space for ca */
2284:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2285:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2286: 
2287:   /* put together the new matrix */
2288:   MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);

2290:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2291:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2292:   c          = (Mat_SeqAIJ *)((*C)->data);
2293:   c->free_a  = PETSC_TRUE;
2294:   c->free_ij = PETSC_TRUE;
2295:   c->nonew   = 0;

2297:   /* Clean up. */
2298:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2301:   return(0);
2302: }

2306: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2307: {
2308:   /* This routine requires testing -- first draft only */
2310:   PetscInt       flops=0;
2311:   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2312:   Mat            P=pp->AIJ;
2313:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2314:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2315:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2316:   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2317:   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2318:   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2319:   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2320:   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;

2323:   /* Allocate temporary array for storage of one row of A*P */
2324:   PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2325:   PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));

2327:   apj      = (PetscInt *)(apa + cn);
2328:   apjdense = apj + cn;

2330:   /* Clear old values in C */
2331:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

2333:   for (i=0;i<am;i++) {
2334:     /* Form sparse row of A*P */
2335:     anzi  = ai[i+1] - ai[i];
2336:     apnzj = 0;
2337:     for (j=0;j<anzi;j++) {
2338:       /* Get offset within block of P */
2339:       pshift = *aj%ppdof;
2340:       /* Get block row of P */
2341:       prow   = *aj++/ppdof; /* integer division */
2342:       pnzj = pi[prow+1] - pi[prow];
2343:       pjj  = pj + pi[prow];
2344:       paj  = pa + pi[prow];
2345:       for (k=0;k<pnzj;k++) {
2346:         poffset = pjj[k]*ppdof+pshift;
2347:         if (!apjdense[poffset]) {
2348:           apjdense[poffset] = -1;
2349:           apj[apnzj++]      = poffset;
2350:         }
2351:         apa[poffset] += (*aa)*paj[k];
2352:       }
2353:       flops += 2*pnzj;
2354:       aa++;
2355:     }

2357:     /* Sort the j index array for quick sparse axpy. */
2358:     /* Note: a array does not need sorting as it is in dense storage locations. */
2359:     PetscSortInt(apnzj,apj);

2361:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2362:     prow    = i/ppdof; /* integer division */
2363:     pshift  = i%ppdof;
2364:     poffset = pi[prow];
2365:     pnzi = pi[prow+1] - poffset;
2366:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2367:     pJ   = pj+poffset;
2368:     pA   = pa+poffset;
2369:     for (j=0;j<pnzi;j++) {
2370:       crow   = (*pJ)*ppdof+pshift;
2371:       cjj    = cj + ci[crow];
2372:       caj    = ca + ci[crow];
2373:       pJ++;
2374:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2375:       for (k=0,nextap=0;nextap<apnzj;k++) {
2376:         if (cjj[k]==apj[nextap]) {
2377:           caj[k] += (*pA)*apa[apj[nextap++]];
2378:         }
2379:       }
2380:       flops += 2*apnzj;
2381:       pA++;
2382:     }

2384:     /* Zero the current row info for A*P */
2385:     for (j=0;j<apnzj;j++) {
2386:       apa[apj[j]]      = 0.;
2387:       apjdense[apj[j]] = 0;
2388:     }
2389:   }

2391:   /* Assemble the final matrix and clean up */
2392:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2393:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2394:   PetscFree(apa);
2395:   PetscLogFlops(flops);

2397:   return(0);
2398: }

2403: PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2404: {
2405:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2406:   Mat               a = b->AIJ,B;
2407:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2408:   PetscErrorCode    ierr;
2409:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2410:   PetscInt          *cols;
2411:   PetscScalar       *vals;

2414:   MatGetSize(a,&m,&n);
2415:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2416:   for (i=0; i<m; i++) {
2417:     nmax = PetscMax(nmax,aij->ilen[i]);
2418:     for (j=0; j<dof; j++) {
2419:       ilen[dof*i+j] = aij->ilen[i];
2420:     }
2421:   }
2422:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2423:   MatSetOption(B,MAT_COLUMNS_SORTED);
2424:   PetscFree(ilen);
2425:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
2426:   ii   = 0;
2427:   for (i=0; i<m; i++) {
2428:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2429:     for (j=0; j<dof; j++) {
2430:       for (k=0; k<ncols; k++) {
2431:         icols[k] = dof*cols[k]+j;
2432:       }
2433:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2434:       ii++;
2435:     }
2436:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2437:   }
2438:   PetscFree(icols);
2439:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2440:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2442:   if (reuse == MAT_REUSE_MATRIX) {
2443:     MatHeaderReplace(A,B);
2444:   } else {
2445:     *newmat = B;
2446:   }
2447:   return(0);
2448: }

2451:  #include src/mat/impls/aij/mpi/mpiaij.h

2456: PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2457: {
2458:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2459:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2460:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2461:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2462:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2463:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2464:   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2465:   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2466:   PetscInt          rstart,cstart,*garray,ii,k;
2467:   PetscErrorCode    ierr;
2468:   PetscScalar       *vals,*ovals;

2471:   PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2472:   for (i=0; i<A->rmap.n/dof; i++) {
2473:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2474:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2475:     for (j=0; j<dof; j++) {
2476:       dnz[dof*i+j] = AIJ->ilen[i];
2477:       onz[dof*i+j] = OAIJ->ilen[i];
2478:     }
2479:   }
2480:   MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2481:   MatSetOption(B,MAT_COLUMNS_SORTED);
2482:   PetscFree2(dnz,onz);

2484:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2485:   rstart = dof*maij->A->rmap.rstart;
2486:   cstart = dof*maij->A->cmap.rstart;
2487:   garray = mpiaij->garray;

2489:   ii = rstart;
2490:   for (i=0; i<A->rmap.n/dof; i++) {
2491:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2492:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2493:     for (j=0; j<dof; j++) {
2494:       for (k=0; k<ncols; k++) {
2495:         icols[k] = cstart + dof*cols[k]+j;
2496:       }
2497:       for (k=0; k<oncols; k++) {
2498:         oicols[k] = dof*garray[ocols[k]]+j;
2499:       }
2500:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2501:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2502:       ii++;
2503:     }
2504:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2505:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2506:   }
2507:   PetscFree2(icols,oicols);

2509:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2510:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2512:   if (reuse == MAT_REUSE_MATRIX) {
2513:     MatHeaderReplace(A,B);
2514:   } else {
2515:     *newmat = B;
2516:   }
2517:   return(0);
2518: }


2522: /* ---------------------------------------------------------------------------------- */
2523: /*MC
2524:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
2525:   operations for multicomponent problems.  It interpolates each component the same
2526:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2527:   and MATMPIAIJ for distributed matrices.

2529:   Operations provided:
2530: + MatMult
2531: . MatMultTranspose
2532: . MatMultAdd
2533: . MatMultTransposeAdd
2534: - MatView

2536:   Level: advanced

2538: M*/
2541: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2542: {
2544:   PetscMPIInt    size;
2545:   PetscInt       n;
2546:   Mat_MPIMAIJ    *b;
2547:   Mat            B;

2550:   PetscObjectReference((PetscObject)A);

2552:   if (dof == 1) {
2553:     *maij = A;
2554:   } else {
2555:     MatCreate(A->comm,&B);
2556:     MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2557:     B->assembled    = PETSC_TRUE;

2559:     MPI_Comm_size(A->comm,&size);
2560:     if (size == 1) {
2561:       MatSetType(B,MATSEQMAIJ);
2562:       B->ops->destroy = MatDestroy_SeqMAIJ;
2563:       B->ops->view    = MatView_SeqMAIJ;
2564:       b      = (Mat_MPIMAIJ*)B->data;
2565:       b->dof = dof;
2566:       b->AIJ = A;
2567:       if (dof == 2) {
2568:         B->ops->mult             = MatMult_SeqMAIJ_2;
2569:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2570:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2571:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2572:       } else if (dof == 3) {
2573:         B->ops->mult             = MatMult_SeqMAIJ_3;
2574:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2575:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2576:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2577:       } else if (dof == 4) {
2578:         B->ops->mult             = MatMult_SeqMAIJ_4;
2579:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2580:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2581:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2582:       } else if (dof == 5) {
2583:         B->ops->mult             = MatMult_SeqMAIJ_5;
2584:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2585:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2586:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2587:       } else if (dof == 6) {
2588:         B->ops->mult             = MatMult_SeqMAIJ_6;
2589:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2590:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2591:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2592:       } else if (dof == 7) {
2593:         B->ops->mult             = MatMult_SeqMAIJ_7;
2594:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2595:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2596:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2597:       } else if (dof == 8) {
2598:         B->ops->mult             = MatMult_SeqMAIJ_8;
2599:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2600:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2601:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2602:       } else if (dof == 9) {
2603:         B->ops->mult             = MatMult_SeqMAIJ_9;
2604:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2605:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2606:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2607:       } else if (dof == 10) {
2608:         B->ops->mult             = MatMult_SeqMAIJ_10;
2609:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2610:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2611:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2612:       } else if (dof == 16) {
2613:         B->ops->mult             = MatMult_SeqMAIJ_16;
2614:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2615:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2616:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2617:       } else {
2618:         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2619:       }
2620:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2621:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2622:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2623:     } else {
2624:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2625:       IS         from,to;
2626:       Vec        gvec;
2627:       PetscInt   *garray,i;

2629:       MatSetType(B,MATMPIMAIJ);
2630:       B->ops->destroy = MatDestroy_MPIMAIJ;
2631:       B->ops->view    = MatView_MPIMAIJ;
2632:       b      = (Mat_MPIMAIJ*)B->data;
2633:       b->dof = dof;
2634:       b->A   = A;
2635:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2636:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

2638:       VecGetSize(mpiaij->lvec,&n);
2639:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2640:       VecSetBlockSize(b->w,dof);

2642:       /* create two temporary Index sets for build scatter gather */
2643:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2644:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2645:       ISCreateBlock(A->comm,dof,n,garray,&from);
2646:       PetscFree(garray);
2647:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

2649:       /* create temporary global vector to generate scatter context */
2650:       VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);
2651:       VecSetBlockSize(gvec,dof);

2653:       /* generate the scatter context */
2654:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

2656:       ISDestroy(from);
2657:       ISDestroy(to);
2658:       VecDestroy(gvec);

2660:       B->ops->mult             = MatMult_MPIMAIJ_dof;
2661:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2662:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2663:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2664:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2665:     }
2666:     *maij = B;
2667:     MatView_Private(B);
2668:   }
2669:   return(0);
2670: }