Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include ../src/mat/impls/maij/maij.h
21: #include ../src/mat/utils/freespace.h
22: #include private/vecimpl.h
26: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
27: {
29: PetscTruth ismpimaij,isseqmaij;
32: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
33: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
34: if (ismpimaij) {
35: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
37: *B = b->A;
38: } else if (isseqmaij) {
39: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
41: *B = b->AIJ;
42: } else {
43: *B = A;
44: }
45: return(0);
46: }
50: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51: {
53: Mat Aij;
56: MatMAIJGetAIJ(A,&Aij);
57: MatCreateMAIJ(Aij,dof,B);
58: return(0);
59: }
63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64: {
66: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
69: if (b->AIJ) {
70: MatDestroy(b->AIJ);
71: }
72: PetscFree(b);
73: return(0);
74: }
78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79: {
81: Mat B;
84: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85: MatView(B,viewer);
86: MatDestroy(B);
87: return(0);
88: }
92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93: {
95: Mat B;
98: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99: MatView(B,viewer);
100: MatDestroy(B);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
112: if (b->AIJ) {
113: MatDestroy(b->AIJ);
114: }
115: if (b->OAIJ) {
116: MatDestroy(b->OAIJ);
117: }
118: if (b->A) {
119: MatDestroy(b->A);
120: }
121: if (b->ctx) {
122: VecScatterDestroy(b->ctx);
123: }
124: if (b->w) {
125: VecDestroy(b->w);
126: }
127: PetscFree(b);
128: PetscObjectChangeTypeName((PetscObject)A,0);
129: return(0);
130: }
132: /*MC
133: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
134: multicomponent problems, interpolating or restricting each component the same way independently.
135: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
137: Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd
143: Level: advanced
145: .seealso: MatCreateSeqDense
146: M*/
151: PetscErrorCode MatCreate_MAIJ(Mat A)
152: {
154: Mat_MPIMAIJ *b;
155: PetscMPIInt size;
158: PetscNewLog(A,Mat_MPIMAIJ,&b);
159: A->data = (void*)b;
160: PetscMemzero(A->ops,sizeof(struct _MatOps));
161: A->mapping = 0;
163: b->AIJ = 0;
164: b->dof = 0;
165: b->OAIJ = 0;
166: b->ctx = 0;
167: b->w = 0;
168: MPI_Comm_size(((PetscObject)A)->comm,&size);
169: if (size == 1){
170: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
171: } else {
172: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
173: }
174: return(0);
175: }
178: /* --------------------------------------------------------------------------------------*/
181: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
182: {
183: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
184: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
185: PetscScalar *x,*y,*v,sum1, sum2;
187: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
188: PetscInt n,i,jrow,j;
191: VecGetArray(xx,&x);
192: VecGetArray(yy,&y);
193: idx = a->j;
194: v = a->a;
195: ii = a->i;
197: for (i=0; i<m; i++) {
198: jrow = ii[i];
199: n = ii[i+1] - jrow;
200: sum1 = 0.0;
201: sum2 = 0.0;
202: nonzerorow += (n>0);
203: for (j=0; j<n; j++) {
204: sum1 += v[jrow]*x[2*idx[jrow]];
205: sum2 += v[jrow]*x[2*idx[jrow]+1];
206: jrow++;
207: }
208: y[2*i] = sum1;
209: y[2*i+1] = sum2;
210: }
212: PetscLogFlops(4*a->nz - 2*nonzerorow);
213: VecRestoreArray(xx,&x);
214: VecRestoreArray(yy,&y);
215: return(0);
216: }
220: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
221: {
222: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
223: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
224: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
226: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
229: VecSet(yy,zero);
230: VecGetArray(xx,&x);
231: VecGetArray(yy,&y);
232:
233: for (i=0; i<m; i++) {
234: idx = a->j + a->i[i] ;
235: v = a->a + a->i[i] ;
236: n = a->i[i+1] - a->i[i];
237: alpha1 = x[2*i];
238: alpha2 = x[2*i+1];
239: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
240: }
241: PetscLogFlops(4*a->nz);
242: VecRestoreArray(xx,&x);
243: VecRestoreArray(yy,&y);
244: return(0);
245: }
249: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250: {
251: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
252: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
253: PetscScalar *x,*y,*v,sum1, sum2;
255: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
256: PetscInt n,i,jrow,j;
259: if (yy != zz) {VecCopy(yy,zz);}
260: VecGetArray(xx,&x);
261: VecGetArray(zz,&y);
262: idx = a->j;
263: v = a->a;
264: ii = a->i;
266: for (i=0; i<m; i++) {
267: jrow = ii[i];
268: n = ii[i+1] - jrow;
269: sum1 = 0.0;
270: sum2 = 0.0;
271: for (j=0; j<n; j++) {
272: sum1 += v[jrow]*x[2*idx[jrow]];
273: sum2 += v[jrow]*x[2*idx[jrow]+1];
274: jrow++;
275: }
276: y[2*i] += sum1;
277: y[2*i+1] += sum2;
278: }
280: PetscLogFlops(4*a->nz);
281: VecRestoreArray(xx,&x);
282: VecRestoreArray(zz,&y);
283: return(0);
284: }
287: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
288: {
289: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
290: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
291: PetscScalar *x,*y,*v,alpha1,alpha2;
293: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
296: if (yy != zz) {VecCopy(yy,zz);}
297: VecGetArray(xx,&x);
298: VecGetArray(zz,&y);
299:
300: for (i=0; i<m; i++) {
301: idx = a->j + a->i[i] ;
302: v = a->a + a->i[i] ;
303: n = a->i[i+1] - a->i[i];
304: alpha1 = x[2*i];
305: alpha2 = x[2*i+1];
306: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
307: }
308: PetscLogFlops(4*a->nz);
309: VecRestoreArray(xx,&x);
310: VecRestoreArray(zz,&y);
311: return(0);
312: }
313: /* --------------------------------------------------------------------------------------*/
316: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
317: {
318: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
319: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
320: PetscScalar *x,*y,*v,sum1, sum2, sum3;
322: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
323: PetscInt n,i,jrow,j;
326: VecGetArray(xx,&x);
327: VecGetArray(yy,&y);
328: idx = a->j;
329: v = a->a;
330: ii = a->i;
332: for (i=0; i<m; i++) {
333: jrow = ii[i];
334: n = ii[i+1] - jrow;
335: sum1 = 0.0;
336: sum2 = 0.0;
337: sum3 = 0.0;
338: nonzerorow += (n>0);
339: for (j=0; j<n; j++) {
340: sum1 += v[jrow]*x[3*idx[jrow]];
341: sum2 += v[jrow]*x[3*idx[jrow]+1];
342: sum3 += v[jrow]*x[3*idx[jrow]+2];
343: jrow++;
344: }
345: y[3*i] = sum1;
346: y[3*i+1] = sum2;
347: y[3*i+2] = sum3;
348: }
350: PetscLogFlops(6*a->nz - 3*nonzerorow);
351: VecRestoreArray(xx,&x);
352: VecRestoreArray(yy,&y);
353: return(0);
354: }
358: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
359: {
360: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
361: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
362: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
364: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
367: VecSet(yy,zero);
368: VecGetArray(xx,&x);
369: VecGetArray(yy,&y);
370:
371: for (i=0; i<m; i++) {
372: idx = a->j + a->i[i];
373: v = a->a + a->i[i];
374: n = a->i[i+1] - a->i[i];
375: alpha1 = x[3*i];
376: alpha2 = x[3*i+1];
377: alpha3 = x[3*i+2];
378: while (n-->0) {
379: y[3*(*idx)] += alpha1*(*v);
380: y[3*(*idx)+1] += alpha2*(*v);
381: y[3*(*idx)+2] += alpha3*(*v);
382: idx++; v++;
383: }
384: }
385: PetscLogFlops(6*a->nz);
386: VecRestoreArray(xx,&x);
387: VecRestoreArray(yy,&y);
388: return(0);
389: }
393: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
394: {
395: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
396: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
397: PetscScalar *x,*y,*v,sum1, sum2, sum3;
399: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
400: PetscInt n,i,jrow,j;
403: if (yy != zz) {VecCopy(yy,zz);}
404: VecGetArray(xx,&x);
405: VecGetArray(zz,&y);
406: idx = a->j;
407: v = a->a;
408: ii = a->i;
410: for (i=0; i<m; i++) {
411: jrow = ii[i];
412: n = ii[i+1] - jrow;
413: sum1 = 0.0;
414: sum2 = 0.0;
415: sum3 = 0.0;
416: for (j=0; j<n; j++) {
417: sum1 += v[jrow]*x[3*idx[jrow]];
418: sum2 += v[jrow]*x[3*idx[jrow]+1];
419: sum3 += v[jrow]*x[3*idx[jrow]+2];
420: jrow++;
421: }
422: y[3*i] += sum1;
423: y[3*i+1] += sum2;
424: y[3*i+2] += sum3;
425: }
427: PetscLogFlops(6*a->nz);
428: VecRestoreArray(xx,&x);
429: VecRestoreArray(zz,&y);
430: return(0);
431: }
434: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
435: {
436: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
437: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
438: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
440: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
443: if (yy != zz) {VecCopy(yy,zz);}
444: VecGetArray(xx,&x);
445: VecGetArray(zz,&y);
446: for (i=0; i<m; i++) {
447: idx = a->j + a->i[i] ;
448: v = a->a + a->i[i] ;
449: n = a->i[i+1] - a->i[i];
450: alpha1 = x[3*i];
451: alpha2 = x[3*i+1];
452: alpha3 = x[3*i+2];
453: while (n-->0) {
454: y[3*(*idx)] += alpha1*(*v);
455: y[3*(*idx)+1] += alpha2*(*v);
456: y[3*(*idx)+2] += alpha3*(*v);
457: idx++; v++;
458: }
459: }
460: PetscLogFlops(6*a->nz);
461: VecRestoreArray(xx,&x);
462: VecRestoreArray(zz,&y);
463: return(0);
464: }
466: /* ------------------------------------------------------------------------------*/
469: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
470: {
471: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
472: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
473: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
475: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
476: PetscInt n,i,jrow,j;
479: VecGetArray(xx,&x);
480: VecGetArray(yy,&y);
481: idx = a->j;
482: v = a->a;
483: ii = a->i;
485: for (i=0; i<m; i++) {
486: jrow = ii[i];
487: n = ii[i+1] - jrow;
488: sum1 = 0.0;
489: sum2 = 0.0;
490: sum3 = 0.0;
491: sum4 = 0.0;
492: nonzerorow += (n>0);
493: for (j=0; j<n; j++) {
494: sum1 += v[jrow]*x[4*idx[jrow]];
495: sum2 += v[jrow]*x[4*idx[jrow]+1];
496: sum3 += v[jrow]*x[4*idx[jrow]+2];
497: sum4 += v[jrow]*x[4*idx[jrow]+3];
498: jrow++;
499: }
500: y[4*i] = sum1;
501: y[4*i+1] = sum2;
502: y[4*i+2] = sum3;
503: y[4*i+3] = sum4;
504: }
506: PetscLogFlops(8*a->nz - 4*nonzerorow);
507: VecRestoreArray(xx,&x);
508: VecRestoreArray(yy,&y);
509: return(0);
510: }
514: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
515: {
516: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
517: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
518: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
520: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
523: VecSet(yy,zero);
524: VecGetArray(xx,&x);
525: VecGetArray(yy,&y);
526: for (i=0; i<m; i++) {
527: idx = a->j + a->i[i] ;
528: v = a->a + a->i[i] ;
529: n = a->i[i+1] - a->i[i];
530: alpha1 = x[4*i];
531: alpha2 = x[4*i+1];
532: alpha3 = x[4*i+2];
533: alpha4 = x[4*i+3];
534: while (n-->0) {
535: y[4*(*idx)] += alpha1*(*v);
536: y[4*(*idx)+1] += alpha2*(*v);
537: y[4*(*idx)+2] += alpha3*(*v);
538: y[4*(*idx)+3] += alpha4*(*v);
539: idx++; v++;
540: }
541: }
542: PetscLogFlops(8*a->nz);
543: VecRestoreArray(xx,&x);
544: VecRestoreArray(yy,&y);
545: return(0);
546: }
550: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
551: {
552: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
553: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
554: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
556: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
557: PetscInt n,i,jrow,j;
560: if (yy != zz) {VecCopy(yy,zz);}
561: VecGetArray(xx,&x);
562: VecGetArray(zz,&y);
563: idx = a->j;
564: v = a->a;
565: ii = a->i;
567: for (i=0; i<m; i++) {
568: jrow = ii[i];
569: n = ii[i+1] - jrow;
570: sum1 = 0.0;
571: sum2 = 0.0;
572: sum3 = 0.0;
573: sum4 = 0.0;
574: for (j=0; j<n; j++) {
575: sum1 += v[jrow]*x[4*idx[jrow]];
576: sum2 += v[jrow]*x[4*idx[jrow]+1];
577: sum3 += v[jrow]*x[4*idx[jrow]+2];
578: sum4 += v[jrow]*x[4*idx[jrow]+3];
579: jrow++;
580: }
581: y[4*i] += sum1;
582: y[4*i+1] += sum2;
583: y[4*i+2] += sum3;
584: y[4*i+3] += sum4;
585: }
587: PetscLogFlops(8*a->nz);
588: VecRestoreArray(xx,&x);
589: VecRestoreArray(zz,&y);
590: return(0);
591: }
594: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
595: {
596: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
597: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
598: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
600: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
603: if (yy != zz) {VecCopy(yy,zz);}
604: VecGetArray(xx,&x);
605: VecGetArray(zz,&y);
606:
607: for (i=0; i<m; i++) {
608: idx = a->j + a->i[i] ;
609: v = a->a + a->i[i] ;
610: n = a->i[i+1] - a->i[i];
611: alpha1 = x[4*i];
612: alpha2 = x[4*i+1];
613: alpha3 = x[4*i+2];
614: alpha4 = x[4*i+3];
615: while (n-->0) {
616: y[4*(*idx)] += alpha1*(*v);
617: y[4*(*idx)+1] += alpha2*(*v);
618: y[4*(*idx)+2] += alpha3*(*v);
619: y[4*(*idx)+3] += alpha4*(*v);
620: idx++; v++;
621: }
622: }
623: PetscLogFlops(8*a->nz);
624: VecRestoreArray(xx,&x);
625: VecRestoreArray(zz,&y);
626: return(0);
627: }
628: /* ------------------------------------------------------------------------------*/
632: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
633: {
634: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
635: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
636: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
638: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
639: PetscInt n,i,jrow,j;
642: VecGetArray(xx,&x);
643: VecGetArray(yy,&y);
644: idx = a->j;
645: v = a->a;
646: ii = a->i;
648: for (i=0; i<m; i++) {
649: jrow = ii[i];
650: n = ii[i+1] - jrow;
651: sum1 = 0.0;
652: sum2 = 0.0;
653: sum3 = 0.0;
654: sum4 = 0.0;
655: sum5 = 0.0;
656: nonzerorow += (n>0);
657: for (j=0; j<n; j++) {
658: sum1 += v[jrow]*x[5*idx[jrow]];
659: sum2 += v[jrow]*x[5*idx[jrow]+1];
660: sum3 += v[jrow]*x[5*idx[jrow]+2];
661: sum4 += v[jrow]*x[5*idx[jrow]+3];
662: sum5 += v[jrow]*x[5*idx[jrow]+4];
663: jrow++;
664: }
665: y[5*i] = sum1;
666: y[5*i+1] = sum2;
667: y[5*i+2] = sum3;
668: y[5*i+3] = sum4;
669: y[5*i+4] = sum5;
670: }
672: PetscLogFlops(10*a->nz - 5*nonzerorow);
673: VecRestoreArray(xx,&x);
674: VecRestoreArray(yy,&y);
675: return(0);
676: }
680: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
681: {
682: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
683: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
684: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
686: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
689: VecSet(yy,zero);
690: VecGetArray(xx,&x);
691: VecGetArray(yy,&y);
692:
693: for (i=0; i<m; i++) {
694: idx = a->j + a->i[i] ;
695: v = a->a + a->i[i] ;
696: n = a->i[i+1] - a->i[i];
697: alpha1 = x[5*i];
698: alpha2 = x[5*i+1];
699: alpha3 = x[5*i+2];
700: alpha4 = x[5*i+3];
701: alpha5 = x[5*i+4];
702: while (n-->0) {
703: y[5*(*idx)] += alpha1*(*v);
704: y[5*(*idx)+1] += alpha2*(*v);
705: y[5*(*idx)+2] += alpha3*(*v);
706: y[5*(*idx)+3] += alpha4*(*v);
707: y[5*(*idx)+4] += alpha5*(*v);
708: idx++; v++;
709: }
710: }
711: PetscLogFlops(10*a->nz);
712: VecRestoreArray(xx,&x);
713: VecRestoreArray(yy,&y);
714: return(0);
715: }
719: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
720: {
721: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
722: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
723: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
725: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
726: PetscInt n,i,jrow,j;
729: if (yy != zz) {VecCopy(yy,zz);}
730: VecGetArray(xx,&x);
731: VecGetArray(zz,&y);
732: idx = a->j;
733: v = a->a;
734: ii = a->i;
736: for (i=0; i<m; i++) {
737: jrow = ii[i];
738: n = ii[i+1] - jrow;
739: sum1 = 0.0;
740: sum2 = 0.0;
741: sum3 = 0.0;
742: sum4 = 0.0;
743: sum5 = 0.0;
744: for (j=0; j<n; j++) {
745: sum1 += v[jrow]*x[5*idx[jrow]];
746: sum2 += v[jrow]*x[5*idx[jrow]+1];
747: sum3 += v[jrow]*x[5*idx[jrow]+2];
748: sum4 += v[jrow]*x[5*idx[jrow]+3];
749: sum5 += v[jrow]*x[5*idx[jrow]+4];
750: jrow++;
751: }
752: y[5*i] += sum1;
753: y[5*i+1] += sum2;
754: y[5*i+2] += sum3;
755: y[5*i+3] += sum4;
756: y[5*i+4] += sum5;
757: }
759: PetscLogFlops(10*a->nz);
760: VecRestoreArray(xx,&x);
761: VecRestoreArray(zz,&y);
762: return(0);
763: }
767: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
768: {
769: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
770: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
771: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
773: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
776: if (yy != zz) {VecCopy(yy,zz);}
777: VecGetArray(xx,&x);
778: VecGetArray(zz,&y);
779:
780: for (i=0; i<m; i++) {
781: idx = a->j + a->i[i] ;
782: v = a->a + a->i[i] ;
783: n = a->i[i+1] - a->i[i];
784: alpha1 = x[5*i];
785: alpha2 = x[5*i+1];
786: alpha3 = x[5*i+2];
787: alpha4 = x[5*i+3];
788: alpha5 = x[5*i+4];
789: while (n-->0) {
790: y[5*(*idx)] += alpha1*(*v);
791: y[5*(*idx)+1] += alpha2*(*v);
792: y[5*(*idx)+2] += alpha3*(*v);
793: y[5*(*idx)+3] += alpha4*(*v);
794: y[5*(*idx)+4] += alpha5*(*v);
795: idx++; v++;
796: }
797: }
798: PetscLogFlops(10*a->nz);
799: VecRestoreArray(xx,&x);
800: VecRestoreArray(zz,&y);
801: return(0);
802: }
804: /* ------------------------------------------------------------------------------*/
807: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
808: {
809: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
810: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
811: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
813: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
814: PetscInt n,i,jrow,j;
817: VecGetArray(xx,&x);
818: VecGetArray(yy,&y);
819: idx = a->j;
820: v = a->a;
821: ii = a->i;
823: for (i=0; i<m; i++) {
824: jrow = ii[i];
825: n = ii[i+1] - jrow;
826: sum1 = 0.0;
827: sum2 = 0.0;
828: sum3 = 0.0;
829: sum4 = 0.0;
830: sum5 = 0.0;
831: sum6 = 0.0;
832: nonzerorow += (n>0);
833: for (j=0; j<n; j++) {
834: sum1 += v[jrow]*x[6*idx[jrow]];
835: sum2 += v[jrow]*x[6*idx[jrow]+1];
836: sum3 += v[jrow]*x[6*idx[jrow]+2];
837: sum4 += v[jrow]*x[6*idx[jrow]+3];
838: sum5 += v[jrow]*x[6*idx[jrow]+4];
839: sum6 += v[jrow]*x[6*idx[jrow]+5];
840: jrow++;
841: }
842: y[6*i] = sum1;
843: y[6*i+1] = sum2;
844: y[6*i+2] = sum3;
845: y[6*i+3] = sum4;
846: y[6*i+4] = sum5;
847: y[6*i+5] = sum6;
848: }
850: PetscLogFlops(12*a->nz - 6*nonzerorow);
851: VecRestoreArray(xx,&x);
852: VecRestoreArray(yy,&y);
853: return(0);
854: }
858: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
859: {
860: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
861: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
862: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
864: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
867: VecSet(yy,zero);
868: VecGetArray(xx,&x);
869: VecGetArray(yy,&y);
871: for (i=0; i<m; i++) {
872: idx = a->j + a->i[i] ;
873: v = a->a + a->i[i] ;
874: n = a->i[i+1] - a->i[i];
875: alpha1 = x[6*i];
876: alpha2 = x[6*i+1];
877: alpha3 = x[6*i+2];
878: alpha4 = x[6*i+3];
879: alpha5 = x[6*i+4];
880: alpha6 = x[6*i+5];
881: while (n-->0) {
882: y[6*(*idx)] += alpha1*(*v);
883: y[6*(*idx)+1] += alpha2*(*v);
884: y[6*(*idx)+2] += alpha3*(*v);
885: y[6*(*idx)+3] += alpha4*(*v);
886: y[6*(*idx)+4] += alpha5*(*v);
887: y[6*(*idx)+5] += alpha6*(*v);
888: idx++; v++;
889: }
890: }
891: PetscLogFlops(12*a->nz);
892: VecRestoreArray(xx,&x);
893: VecRestoreArray(yy,&y);
894: return(0);
895: }
899: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
900: {
901: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
902: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
903: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
905: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
906: PetscInt n,i,jrow,j;
909: if (yy != zz) {VecCopy(yy,zz);}
910: VecGetArray(xx,&x);
911: VecGetArray(zz,&y);
912: idx = a->j;
913: v = a->a;
914: ii = a->i;
916: for (i=0; i<m; i++) {
917: jrow = ii[i];
918: n = ii[i+1] - jrow;
919: sum1 = 0.0;
920: sum2 = 0.0;
921: sum3 = 0.0;
922: sum4 = 0.0;
923: sum5 = 0.0;
924: sum6 = 0.0;
925: for (j=0; j<n; j++) {
926: sum1 += v[jrow]*x[6*idx[jrow]];
927: sum2 += v[jrow]*x[6*idx[jrow]+1];
928: sum3 += v[jrow]*x[6*idx[jrow]+2];
929: sum4 += v[jrow]*x[6*idx[jrow]+3];
930: sum5 += v[jrow]*x[6*idx[jrow]+4];
931: sum6 += v[jrow]*x[6*idx[jrow]+5];
932: jrow++;
933: }
934: y[6*i] += sum1;
935: y[6*i+1] += sum2;
936: y[6*i+2] += sum3;
937: y[6*i+3] += sum4;
938: y[6*i+4] += sum5;
939: y[6*i+5] += sum6;
940: }
942: PetscLogFlops(12*a->nz);
943: VecRestoreArray(xx,&x);
944: VecRestoreArray(zz,&y);
945: return(0);
946: }
950: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
951: {
952: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
953: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
954: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
956: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
959: if (yy != zz) {VecCopy(yy,zz);}
960: VecGetArray(xx,&x);
961: VecGetArray(zz,&y);
962:
963: for (i=0; i<m; i++) {
964: idx = a->j + a->i[i] ;
965: v = a->a + a->i[i] ;
966: n = a->i[i+1] - a->i[i];
967: alpha1 = x[6*i];
968: alpha2 = x[6*i+1];
969: alpha3 = x[6*i+2];
970: alpha4 = x[6*i+3];
971: alpha5 = x[6*i+4];
972: alpha6 = x[6*i+5];
973: while (n-->0) {
974: y[6*(*idx)] += alpha1*(*v);
975: y[6*(*idx)+1] += alpha2*(*v);
976: y[6*(*idx)+2] += alpha3*(*v);
977: y[6*(*idx)+3] += alpha4*(*v);
978: y[6*(*idx)+4] += alpha5*(*v);
979: y[6*(*idx)+5] += alpha6*(*v);
980: idx++; v++;
981: }
982: }
983: PetscLogFlops(12*a->nz);
984: VecRestoreArray(xx,&x);
985: VecRestoreArray(zz,&y);
986: return(0);
987: }
989: /* ------------------------------------------------------------------------------*/
992: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
993: {
994: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
995: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
996: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
998: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
999: PetscInt n,i,jrow,j;
1002: VecGetArray(xx,&x);
1003: VecGetArray(yy,&y);
1004: idx = a->j;
1005: v = a->a;
1006: ii = a->i;
1008: for (i=0; i<m; i++) {
1009: jrow = ii[i];
1010: n = ii[i+1] - jrow;
1011: sum1 = 0.0;
1012: sum2 = 0.0;
1013: sum3 = 0.0;
1014: sum4 = 0.0;
1015: sum5 = 0.0;
1016: sum6 = 0.0;
1017: sum7 = 0.0;
1018: nonzerorow += (n>0);
1019: for (j=0; j<n; j++) {
1020: sum1 += v[jrow]*x[7*idx[jrow]];
1021: sum2 += v[jrow]*x[7*idx[jrow]+1];
1022: sum3 += v[jrow]*x[7*idx[jrow]+2];
1023: sum4 += v[jrow]*x[7*idx[jrow]+3];
1024: sum5 += v[jrow]*x[7*idx[jrow]+4];
1025: sum6 += v[jrow]*x[7*idx[jrow]+5];
1026: sum7 += v[jrow]*x[7*idx[jrow]+6];
1027: jrow++;
1028: }
1029: y[7*i] = sum1;
1030: y[7*i+1] = sum2;
1031: y[7*i+2] = sum3;
1032: y[7*i+3] = sum4;
1033: y[7*i+4] = sum5;
1034: y[7*i+5] = sum6;
1035: y[7*i+6] = sum7;
1036: }
1038: PetscLogFlops(14*a->nz - 7*nonzerorow);
1039: VecRestoreArray(xx,&x);
1040: VecRestoreArray(yy,&y);
1041: return(0);
1042: }
1046: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1047: {
1048: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1049: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1050: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1052: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1055: VecSet(yy,zero);
1056: VecGetArray(xx,&x);
1057: VecGetArray(yy,&y);
1059: for (i=0; i<m; i++) {
1060: idx = a->j + a->i[i] ;
1061: v = a->a + a->i[i] ;
1062: n = a->i[i+1] - a->i[i];
1063: alpha1 = x[7*i];
1064: alpha2 = x[7*i+1];
1065: alpha3 = x[7*i+2];
1066: alpha4 = x[7*i+3];
1067: alpha5 = x[7*i+4];
1068: alpha6 = x[7*i+5];
1069: alpha7 = x[7*i+6];
1070: while (n-->0) {
1071: y[7*(*idx)] += alpha1*(*v);
1072: y[7*(*idx)+1] += alpha2*(*v);
1073: y[7*(*idx)+2] += alpha3*(*v);
1074: y[7*(*idx)+3] += alpha4*(*v);
1075: y[7*(*idx)+4] += alpha5*(*v);
1076: y[7*(*idx)+5] += alpha6*(*v);
1077: y[7*(*idx)+6] += alpha7*(*v);
1078: idx++; v++;
1079: }
1080: }
1081: PetscLogFlops(14*a->nz);
1082: VecRestoreArray(xx,&x);
1083: VecRestoreArray(yy,&y);
1084: return(0);
1085: }
1089: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1090: {
1091: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1092: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1093: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1095: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1096: PetscInt n,i,jrow,j;
1099: if (yy != zz) {VecCopy(yy,zz);}
1100: VecGetArray(xx,&x);
1101: VecGetArray(zz,&y);
1102: idx = a->j;
1103: v = a->a;
1104: ii = a->i;
1106: for (i=0; i<m; i++) {
1107: jrow = ii[i];
1108: n = ii[i+1] - jrow;
1109: sum1 = 0.0;
1110: sum2 = 0.0;
1111: sum3 = 0.0;
1112: sum4 = 0.0;
1113: sum5 = 0.0;
1114: sum6 = 0.0;
1115: sum7 = 0.0;
1116: for (j=0; j<n; j++) {
1117: sum1 += v[jrow]*x[7*idx[jrow]];
1118: sum2 += v[jrow]*x[7*idx[jrow]+1];
1119: sum3 += v[jrow]*x[7*idx[jrow]+2];
1120: sum4 += v[jrow]*x[7*idx[jrow]+3];
1121: sum5 += v[jrow]*x[7*idx[jrow]+4];
1122: sum6 += v[jrow]*x[7*idx[jrow]+5];
1123: sum7 += v[jrow]*x[7*idx[jrow]+6];
1124: jrow++;
1125: }
1126: y[7*i] += sum1;
1127: y[7*i+1] += sum2;
1128: y[7*i+2] += sum3;
1129: y[7*i+3] += sum4;
1130: y[7*i+4] += sum5;
1131: y[7*i+5] += sum6;
1132: y[7*i+6] += sum7;
1133: }
1135: PetscLogFlops(14*a->nz);
1136: VecRestoreArray(xx,&x);
1137: VecRestoreArray(zz,&y);
1138: return(0);
1139: }
1143: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1144: {
1145: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1146: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1147: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1149: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1152: if (yy != zz) {VecCopy(yy,zz);}
1153: VecGetArray(xx,&x);
1154: VecGetArray(zz,&y);
1155: for (i=0; i<m; i++) {
1156: idx = a->j + a->i[i] ;
1157: v = a->a + a->i[i] ;
1158: n = a->i[i+1] - a->i[i];
1159: alpha1 = x[7*i];
1160: alpha2 = x[7*i+1];
1161: alpha3 = x[7*i+2];
1162: alpha4 = x[7*i+3];
1163: alpha5 = x[7*i+4];
1164: alpha6 = x[7*i+5];
1165: alpha7 = x[7*i+6];
1166: while (n-->0) {
1167: y[7*(*idx)] += alpha1*(*v);
1168: y[7*(*idx)+1] += alpha2*(*v);
1169: y[7*(*idx)+2] += alpha3*(*v);
1170: y[7*(*idx)+3] += alpha4*(*v);
1171: y[7*(*idx)+4] += alpha5*(*v);
1172: y[7*(*idx)+5] += alpha6*(*v);
1173: y[7*(*idx)+6] += alpha7*(*v);
1174: idx++; v++;
1175: }
1176: }
1177: PetscLogFlops(14*a->nz);
1178: VecRestoreArray(xx,&x);
1179: VecRestoreArray(zz,&y);
1180: return(0);
1181: }
1185: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1186: {
1187: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1188: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1189: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1191: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1192: PetscInt n,i,jrow,j;
1195: VecGetArray(xx,&x);
1196: VecGetArray(yy,&y);
1197: idx = a->j;
1198: v = a->a;
1199: ii = a->i;
1201: for (i=0; i<m; i++) {
1202: jrow = ii[i];
1203: n = ii[i+1] - jrow;
1204: sum1 = 0.0;
1205: sum2 = 0.0;
1206: sum3 = 0.0;
1207: sum4 = 0.0;
1208: sum5 = 0.0;
1209: sum6 = 0.0;
1210: sum7 = 0.0;
1211: sum8 = 0.0;
1212: nonzerorow += (n>0);
1213: for (j=0; j<n; j++) {
1214: sum1 += v[jrow]*x[8*idx[jrow]];
1215: sum2 += v[jrow]*x[8*idx[jrow]+1];
1216: sum3 += v[jrow]*x[8*idx[jrow]+2];
1217: sum4 += v[jrow]*x[8*idx[jrow]+3];
1218: sum5 += v[jrow]*x[8*idx[jrow]+4];
1219: sum6 += v[jrow]*x[8*idx[jrow]+5];
1220: sum7 += v[jrow]*x[8*idx[jrow]+6];
1221: sum8 += v[jrow]*x[8*idx[jrow]+7];
1222: jrow++;
1223: }
1224: y[8*i] = sum1;
1225: y[8*i+1] = sum2;
1226: y[8*i+2] = sum3;
1227: y[8*i+3] = sum4;
1228: y[8*i+4] = sum5;
1229: y[8*i+5] = sum6;
1230: y[8*i+6] = sum7;
1231: y[8*i+7] = sum8;
1232: }
1234: PetscLogFlops(16*a->nz - 8*nonzerorow);
1235: VecRestoreArray(xx,&x);
1236: VecRestoreArray(yy,&y);
1237: return(0);
1238: }
1242: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1243: {
1244: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1245: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1246: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1248: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1251: VecSet(yy,zero);
1252: VecGetArray(xx,&x);
1253: VecGetArray(yy,&y);
1255: for (i=0; i<m; i++) {
1256: idx = a->j + a->i[i] ;
1257: v = a->a + a->i[i] ;
1258: n = a->i[i+1] - a->i[i];
1259: alpha1 = x[8*i];
1260: alpha2 = x[8*i+1];
1261: alpha3 = x[8*i+2];
1262: alpha4 = x[8*i+3];
1263: alpha5 = x[8*i+4];
1264: alpha6 = x[8*i+5];
1265: alpha7 = x[8*i+6];
1266: alpha8 = x[8*i+7];
1267: while (n-->0) {
1268: y[8*(*idx)] += alpha1*(*v);
1269: y[8*(*idx)+1] += alpha2*(*v);
1270: y[8*(*idx)+2] += alpha3*(*v);
1271: y[8*(*idx)+3] += alpha4*(*v);
1272: y[8*(*idx)+4] += alpha5*(*v);
1273: y[8*(*idx)+5] += alpha6*(*v);
1274: y[8*(*idx)+6] += alpha7*(*v);
1275: y[8*(*idx)+7] += alpha8*(*v);
1276: idx++; v++;
1277: }
1278: }
1279: PetscLogFlops(16*a->nz);
1280: VecRestoreArray(xx,&x);
1281: VecRestoreArray(yy,&y);
1282: return(0);
1283: }
1287: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1288: {
1289: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1290: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1291: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1293: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1294: PetscInt n,i,jrow,j;
1297: if (yy != zz) {VecCopy(yy,zz);}
1298: VecGetArray(xx,&x);
1299: VecGetArray(zz,&y);
1300: idx = a->j;
1301: v = a->a;
1302: ii = a->i;
1304: for (i=0; i<m; i++) {
1305: jrow = ii[i];
1306: n = ii[i+1] - jrow;
1307: sum1 = 0.0;
1308: sum2 = 0.0;
1309: sum3 = 0.0;
1310: sum4 = 0.0;
1311: sum5 = 0.0;
1312: sum6 = 0.0;
1313: sum7 = 0.0;
1314: sum8 = 0.0;
1315: for (j=0; j<n; j++) {
1316: sum1 += v[jrow]*x[8*idx[jrow]];
1317: sum2 += v[jrow]*x[8*idx[jrow]+1];
1318: sum3 += v[jrow]*x[8*idx[jrow]+2];
1319: sum4 += v[jrow]*x[8*idx[jrow]+3];
1320: sum5 += v[jrow]*x[8*idx[jrow]+4];
1321: sum6 += v[jrow]*x[8*idx[jrow]+5];
1322: sum7 += v[jrow]*x[8*idx[jrow]+6];
1323: sum8 += v[jrow]*x[8*idx[jrow]+7];
1324: jrow++;
1325: }
1326: y[8*i] += sum1;
1327: y[8*i+1] += sum2;
1328: y[8*i+2] += sum3;
1329: y[8*i+3] += sum4;
1330: y[8*i+4] += sum5;
1331: y[8*i+5] += sum6;
1332: y[8*i+6] += sum7;
1333: y[8*i+7] += sum8;
1334: }
1336: PetscLogFlops(16*a->nz);
1337: VecRestoreArray(xx,&x);
1338: VecRestoreArray(zz,&y);
1339: return(0);
1340: }
1344: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1345: {
1346: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1347: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1348: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1350: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1353: if (yy != zz) {VecCopy(yy,zz);}
1354: VecGetArray(xx,&x);
1355: VecGetArray(zz,&y);
1356: for (i=0; i<m; i++) {
1357: idx = a->j + a->i[i] ;
1358: v = a->a + a->i[i] ;
1359: n = a->i[i+1] - a->i[i];
1360: alpha1 = x[8*i];
1361: alpha2 = x[8*i+1];
1362: alpha3 = x[8*i+2];
1363: alpha4 = x[8*i+3];
1364: alpha5 = x[8*i+4];
1365: alpha6 = x[8*i+5];
1366: alpha7 = x[8*i+6];
1367: alpha8 = x[8*i+7];
1368: while (n-->0) {
1369: y[8*(*idx)] += alpha1*(*v);
1370: y[8*(*idx)+1] += alpha2*(*v);
1371: y[8*(*idx)+2] += alpha3*(*v);
1372: y[8*(*idx)+3] += alpha4*(*v);
1373: y[8*(*idx)+4] += alpha5*(*v);
1374: y[8*(*idx)+5] += alpha6*(*v);
1375: y[8*(*idx)+6] += alpha7*(*v);
1376: y[8*(*idx)+7] += alpha8*(*v);
1377: idx++; v++;
1378: }
1379: }
1380: PetscLogFlops(16*a->nz);
1381: VecRestoreArray(xx,&x);
1382: VecRestoreArray(zz,&y);
1383: return(0);
1384: }
1386: /* ------------------------------------------------------------------------------*/
1389: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1390: {
1391: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1392: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1393: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1395: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1396: PetscInt n,i,jrow,j;
1399: VecGetArray(xx,&x);
1400: VecGetArray(yy,&y);
1401: idx = a->j;
1402: v = a->a;
1403: ii = a->i;
1405: for (i=0; i<m; i++) {
1406: jrow = ii[i];
1407: n = ii[i+1] - jrow;
1408: sum1 = 0.0;
1409: sum2 = 0.0;
1410: sum3 = 0.0;
1411: sum4 = 0.0;
1412: sum5 = 0.0;
1413: sum6 = 0.0;
1414: sum7 = 0.0;
1415: sum8 = 0.0;
1416: sum9 = 0.0;
1417: nonzerorow += (n>0);
1418: for (j=0; j<n; j++) {
1419: sum1 += v[jrow]*x[9*idx[jrow]];
1420: sum2 += v[jrow]*x[9*idx[jrow]+1];
1421: sum3 += v[jrow]*x[9*idx[jrow]+2];
1422: sum4 += v[jrow]*x[9*idx[jrow]+3];
1423: sum5 += v[jrow]*x[9*idx[jrow]+4];
1424: sum6 += v[jrow]*x[9*idx[jrow]+5];
1425: sum7 += v[jrow]*x[9*idx[jrow]+6];
1426: sum8 += v[jrow]*x[9*idx[jrow]+7];
1427: sum9 += v[jrow]*x[9*idx[jrow]+8];
1428: jrow++;
1429: }
1430: y[9*i] = sum1;
1431: y[9*i+1] = sum2;
1432: y[9*i+2] = sum3;
1433: y[9*i+3] = sum4;
1434: y[9*i+4] = sum5;
1435: y[9*i+5] = sum6;
1436: y[9*i+6] = sum7;
1437: y[9*i+7] = sum8;
1438: y[9*i+8] = sum9;
1439: }
1441: PetscLogFlops(18*a->nz - 9*nonzerorow);
1442: VecRestoreArray(xx,&x);
1443: VecRestoreArray(yy,&y);
1444: return(0);
1445: }
1447: /* ------------------------------------------------------------------------------*/
1451: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1452: {
1453: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1454: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1455: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1457: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1460: VecSet(yy,zero);
1461: VecGetArray(xx,&x);
1462: VecGetArray(yy,&y);
1464: for (i=0; i<m; i++) {
1465: idx = a->j + a->i[i] ;
1466: v = a->a + a->i[i] ;
1467: n = a->i[i+1] - a->i[i];
1468: alpha1 = x[9*i];
1469: alpha2 = x[9*i+1];
1470: alpha3 = x[9*i+2];
1471: alpha4 = x[9*i+3];
1472: alpha5 = x[9*i+4];
1473: alpha6 = x[9*i+5];
1474: alpha7 = x[9*i+6];
1475: alpha8 = x[9*i+7];
1476: alpha9 = x[9*i+8];
1477: while (n-->0) {
1478: y[9*(*idx)] += alpha1*(*v);
1479: y[9*(*idx)+1] += alpha2*(*v);
1480: y[9*(*idx)+2] += alpha3*(*v);
1481: y[9*(*idx)+3] += alpha4*(*v);
1482: y[9*(*idx)+4] += alpha5*(*v);
1483: y[9*(*idx)+5] += alpha6*(*v);
1484: y[9*(*idx)+6] += alpha7*(*v);
1485: y[9*(*idx)+7] += alpha8*(*v);
1486: y[9*(*idx)+8] += alpha9*(*v);
1487: idx++; v++;
1488: }
1489: }
1490: PetscLogFlops(18*a->nz);
1491: VecRestoreArray(xx,&x);
1492: VecRestoreArray(yy,&y);
1493: return(0);
1494: }
1498: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1499: {
1500: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1501: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1502: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1504: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1505: PetscInt n,i,jrow,j;
1508: if (yy != zz) {VecCopy(yy,zz);}
1509: VecGetArray(xx,&x);
1510: VecGetArray(zz,&y);
1511: idx = a->j;
1512: v = a->a;
1513: ii = a->i;
1515: for (i=0; i<m; i++) {
1516: jrow = ii[i];
1517: n = ii[i+1] - jrow;
1518: sum1 = 0.0;
1519: sum2 = 0.0;
1520: sum3 = 0.0;
1521: sum4 = 0.0;
1522: sum5 = 0.0;
1523: sum6 = 0.0;
1524: sum7 = 0.0;
1525: sum8 = 0.0;
1526: sum9 = 0.0;
1527: for (j=0; j<n; j++) {
1528: sum1 += v[jrow]*x[9*idx[jrow]];
1529: sum2 += v[jrow]*x[9*idx[jrow]+1];
1530: sum3 += v[jrow]*x[9*idx[jrow]+2];
1531: sum4 += v[jrow]*x[9*idx[jrow]+3];
1532: sum5 += v[jrow]*x[9*idx[jrow]+4];
1533: sum6 += v[jrow]*x[9*idx[jrow]+5];
1534: sum7 += v[jrow]*x[9*idx[jrow]+6];
1535: sum8 += v[jrow]*x[9*idx[jrow]+7];
1536: sum9 += v[jrow]*x[9*idx[jrow]+8];
1537: jrow++;
1538: }
1539: y[9*i] += sum1;
1540: y[9*i+1] += sum2;
1541: y[9*i+2] += sum3;
1542: y[9*i+3] += sum4;
1543: y[9*i+4] += sum5;
1544: y[9*i+5] += sum6;
1545: y[9*i+6] += sum7;
1546: y[9*i+7] += sum8;
1547: y[9*i+8] += sum9;
1548: }
1550: PetscLogFlops(18*a->nz);
1551: VecRestoreArray(xx,&x);
1552: VecRestoreArray(zz,&y);
1553: return(0);
1554: }
1558: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1559: {
1560: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1561: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1562: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1564: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1567: if (yy != zz) {VecCopy(yy,zz);}
1568: VecGetArray(xx,&x);
1569: VecGetArray(zz,&y);
1570: for (i=0; i<m; i++) {
1571: idx = a->j + a->i[i] ;
1572: v = a->a + a->i[i] ;
1573: n = a->i[i+1] - a->i[i];
1574: alpha1 = x[9*i];
1575: alpha2 = x[9*i+1];
1576: alpha3 = x[9*i+2];
1577: alpha4 = x[9*i+3];
1578: alpha5 = x[9*i+4];
1579: alpha6 = x[9*i+5];
1580: alpha7 = x[9*i+6];
1581: alpha8 = x[9*i+7];
1582: alpha9 = x[9*i+8];
1583: while (n-->0) {
1584: y[9*(*idx)] += alpha1*(*v);
1585: y[9*(*idx)+1] += alpha2*(*v);
1586: y[9*(*idx)+2] += alpha3*(*v);
1587: y[9*(*idx)+3] += alpha4*(*v);
1588: y[9*(*idx)+4] += alpha5*(*v);
1589: y[9*(*idx)+5] += alpha6*(*v);
1590: y[9*(*idx)+6] += alpha7*(*v);
1591: y[9*(*idx)+7] += alpha8*(*v);
1592: y[9*(*idx)+8] += alpha9*(*v);
1593: idx++; v++;
1594: }
1595: }
1596: PetscLogFlops(18*a->nz);
1597: VecRestoreArray(xx,&x);
1598: VecRestoreArray(zz,&y);
1599: return(0);
1600: }
1603: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1604: {
1605: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1606: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1607: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1609: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1610: PetscInt n,i,jrow,j;
1613: VecGetArray(xx,&x);
1614: VecGetArray(yy,&y);
1615: idx = a->j;
1616: v = a->a;
1617: ii = a->i;
1619: for (i=0; i<m; i++) {
1620: jrow = ii[i];
1621: n = ii[i+1] - jrow;
1622: sum1 = 0.0;
1623: sum2 = 0.0;
1624: sum3 = 0.0;
1625: sum4 = 0.0;
1626: sum5 = 0.0;
1627: sum6 = 0.0;
1628: sum7 = 0.0;
1629: sum8 = 0.0;
1630: sum9 = 0.0;
1631: sum10 = 0.0;
1632: nonzerorow += (n>0);
1633: for (j=0; j<n; j++) {
1634: sum1 += v[jrow]*x[10*idx[jrow]];
1635: sum2 += v[jrow]*x[10*idx[jrow]+1];
1636: sum3 += v[jrow]*x[10*idx[jrow]+2];
1637: sum4 += v[jrow]*x[10*idx[jrow]+3];
1638: sum5 += v[jrow]*x[10*idx[jrow]+4];
1639: sum6 += v[jrow]*x[10*idx[jrow]+5];
1640: sum7 += v[jrow]*x[10*idx[jrow]+6];
1641: sum8 += v[jrow]*x[10*idx[jrow]+7];
1642: sum9 += v[jrow]*x[10*idx[jrow]+8];
1643: sum10 += v[jrow]*x[10*idx[jrow]+9];
1644: jrow++;
1645: }
1646: y[10*i] = sum1;
1647: y[10*i+1] = sum2;
1648: y[10*i+2] = sum3;
1649: y[10*i+3] = sum4;
1650: y[10*i+4] = sum5;
1651: y[10*i+5] = sum6;
1652: y[10*i+6] = sum7;
1653: y[10*i+7] = sum8;
1654: y[10*i+8] = sum9;
1655: y[10*i+9] = sum10;
1656: }
1658: PetscLogFlops(20*a->nz - 10*nonzerorow);
1659: VecRestoreArray(xx,&x);
1660: VecRestoreArray(yy,&y);
1661: return(0);
1662: }
1666: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1667: {
1668: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1669: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1670: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1672: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1673: PetscInt n,i,jrow,j;
1676: if (yy != zz) {VecCopy(yy,zz);}
1677: VecGetArray(xx,&x);
1678: VecGetArray(zz,&y);
1679: idx = a->j;
1680: v = a->a;
1681: ii = a->i;
1683: for (i=0; i<m; i++) {
1684: jrow = ii[i];
1685: n = ii[i+1] - jrow;
1686: sum1 = 0.0;
1687: sum2 = 0.0;
1688: sum3 = 0.0;
1689: sum4 = 0.0;
1690: sum5 = 0.0;
1691: sum6 = 0.0;
1692: sum7 = 0.0;
1693: sum8 = 0.0;
1694: sum9 = 0.0;
1695: sum10 = 0.0;
1696: for (j=0; j<n; j++) {
1697: sum1 += v[jrow]*x[10*idx[jrow]];
1698: sum2 += v[jrow]*x[10*idx[jrow]+1];
1699: sum3 += v[jrow]*x[10*idx[jrow]+2];
1700: sum4 += v[jrow]*x[10*idx[jrow]+3];
1701: sum5 += v[jrow]*x[10*idx[jrow]+4];
1702: sum6 += v[jrow]*x[10*idx[jrow]+5];
1703: sum7 += v[jrow]*x[10*idx[jrow]+6];
1704: sum8 += v[jrow]*x[10*idx[jrow]+7];
1705: sum9 += v[jrow]*x[10*idx[jrow]+8];
1706: sum10 += v[jrow]*x[10*idx[jrow]+9];
1707: jrow++;
1708: }
1709: y[10*i] += sum1;
1710: y[10*i+1] += sum2;
1711: y[10*i+2] += sum3;
1712: y[10*i+3] += sum4;
1713: y[10*i+4] += sum5;
1714: y[10*i+5] += sum6;
1715: y[10*i+6] += sum7;
1716: y[10*i+7] += sum8;
1717: y[10*i+8] += sum9;
1718: y[10*i+9] += sum10;
1719: }
1721: PetscLogFlops(20*a->nz);
1722: VecRestoreArray(xx,&x);
1723: VecRestoreArray(yy,&y);
1724: return(0);
1725: }
1729: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1730: {
1731: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1732: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1733: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1735: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1738: VecSet(yy,zero);
1739: VecGetArray(xx,&x);
1740: VecGetArray(yy,&y);
1742: for (i=0; i<m; i++) {
1743: idx = a->j + a->i[i] ;
1744: v = a->a + a->i[i] ;
1745: n = a->i[i+1] - a->i[i];
1746: alpha1 = x[10*i];
1747: alpha2 = x[10*i+1];
1748: alpha3 = x[10*i+2];
1749: alpha4 = x[10*i+3];
1750: alpha5 = x[10*i+4];
1751: alpha6 = x[10*i+5];
1752: alpha7 = x[10*i+6];
1753: alpha8 = x[10*i+7];
1754: alpha9 = x[10*i+8];
1755: alpha10 = x[10*i+9];
1756: while (n-->0) {
1757: y[10*(*idx)] += alpha1*(*v);
1758: y[10*(*idx)+1] += alpha2*(*v);
1759: y[10*(*idx)+2] += alpha3*(*v);
1760: y[10*(*idx)+3] += alpha4*(*v);
1761: y[10*(*idx)+4] += alpha5*(*v);
1762: y[10*(*idx)+5] += alpha6*(*v);
1763: y[10*(*idx)+6] += alpha7*(*v);
1764: y[10*(*idx)+7] += alpha8*(*v);
1765: y[10*(*idx)+8] += alpha9*(*v);
1766: y[10*(*idx)+9] += alpha10*(*v);
1767: idx++; v++;
1768: }
1769: }
1770: PetscLogFlops(20*a->nz);
1771: VecRestoreArray(xx,&x);
1772: VecRestoreArray(yy,&y);
1773: return(0);
1774: }
1778: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1779: {
1780: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1781: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1782: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1784: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1787: if (yy != zz) {VecCopy(yy,zz);}
1788: VecGetArray(xx,&x);
1789: VecGetArray(zz,&y);
1790: for (i=0; i<m; i++) {
1791: idx = a->j + a->i[i] ;
1792: v = a->a + a->i[i] ;
1793: n = a->i[i+1] - a->i[i];
1794: alpha1 = x[10*i];
1795: alpha2 = x[10*i+1];
1796: alpha3 = x[10*i+2];
1797: alpha4 = x[10*i+3];
1798: alpha5 = x[10*i+4];
1799: alpha6 = x[10*i+5];
1800: alpha7 = x[10*i+6];
1801: alpha8 = x[10*i+7];
1802: alpha9 = x[10*i+8];
1803: alpha10 = x[10*i+9];
1804: while (n-->0) {
1805: y[10*(*idx)] += alpha1*(*v);
1806: y[10*(*idx)+1] += alpha2*(*v);
1807: y[10*(*idx)+2] += alpha3*(*v);
1808: y[10*(*idx)+3] += alpha4*(*v);
1809: y[10*(*idx)+4] += alpha5*(*v);
1810: y[10*(*idx)+5] += alpha6*(*v);
1811: y[10*(*idx)+6] += alpha7*(*v);
1812: y[10*(*idx)+7] += alpha8*(*v);
1813: y[10*(*idx)+8] += alpha9*(*v);
1814: y[10*(*idx)+9] += alpha10*(*v);
1815: idx++; v++;
1816: }
1817: }
1818: PetscLogFlops(20*a->nz);
1819: VecRestoreArray(xx,&x);
1820: VecRestoreArray(zz,&y);
1821: return(0);
1822: }
1825: /*--------------------------------------------------------------------------------------------*/
1828: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1829: {
1830: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1831: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1832: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1834: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
1835: PetscInt n,i,jrow,j;
1838: VecGetArray(xx,&x);
1839: VecGetArray(yy,&y);
1840: idx = a->j;
1841: v = a->a;
1842: ii = a->i;
1844: for (i=0; i<m; i++) {
1845: jrow = ii[i];
1846: n = ii[i+1] - jrow;
1847: sum1 = 0.0;
1848: sum2 = 0.0;
1849: sum3 = 0.0;
1850: sum4 = 0.0;
1851: sum5 = 0.0;
1852: sum6 = 0.0;
1853: sum7 = 0.0;
1854: sum8 = 0.0;
1855: sum9 = 0.0;
1856: sum10 = 0.0;
1857: sum11 = 0.0;
1858: nonzerorow += (n>0);
1859: for (j=0; j<n; j++) {
1860: sum1 += v[jrow]*x[11*idx[jrow]];
1861: sum2 += v[jrow]*x[11*idx[jrow]+1];
1862: sum3 += v[jrow]*x[11*idx[jrow]+2];
1863: sum4 += v[jrow]*x[11*idx[jrow]+3];
1864: sum5 += v[jrow]*x[11*idx[jrow]+4];
1865: sum6 += v[jrow]*x[11*idx[jrow]+5];
1866: sum7 += v[jrow]*x[11*idx[jrow]+6];
1867: sum8 += v[jrow]*x[11*idx[jrow]+7];
1868: sum9 += v[jrow]*x[11*idx[jrow]+8];
1869: sum10 += v[jrow]*x[11*idx[jrow]+9];
1870: sum11 += v[jrow]*x[11*idx[jrow]+10];
1871: jrow++;
1872: }
1873: y[11*i] = sum1;
1874: y[11*i+1] = sum2;
1875: y[11*i+2] = sum3;
1876: y[11*i+3] = sum4;
1877: y[11*i+4] = sum5;
1878: y[11*i+5] = sum6;
1879: y[11*i+6] = sum7;
1880: y[11*i+7] = sum8;
1881: y[11*i+8] = sum9;
1882: y[11*i+9] = sum10;
1883: y[11*i+10] = sum11;
1884: }
1886: PetscLogFlops(22*a->nz - 11*nonzerorow);
1887: VecRestoreArray(xx,&x);
1888: VecRestoreArray(yy,&y);
1889: return(0);
1890: }
1894: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1895: {
1896: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1897: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1898: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1900: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1901: PetscInt n,i,jrow,j;
1904: if (yy != zz) {VecCopy(yy,zz);}
1905: VecGetArray(xx,&x);
1906: VecGetArray(zz,&y);
1907: idx = a->j;
1908: v = a->a;
1909: ii = a->i;
1911: for (i=0; i<m; i++) {
1912: jrow = ii[i];
1913: n = ii[i+1] - jrow;
1914: sum1 = 0.0;
1915: sum2 = 0.0;
1916: sum3 = 0.0;
1917: sum4 = 0.0;
1918: sum5 = 0.0;
1919: sum6 = 0.0;
1920: sum7 = 0.0;
1921: sum8 = 0.0;
1922: sum9 = 0.0;
1923: sum10 = 0.0;
1924: sum11 = 0.0;
1925: for (j=0; j<n; j++) {
1926: sum1 += v[jrow]*x[11*idx[jrow]];
1927: sum2 += v[jrow]*x[11*idx[jrow]+1];
1928: sum3 += v[jrow]*x[11*idx[jrow]+2];
1929: sum4 += v[jrow]*x[11*idx[jrow]+3];
1930: sum5 += v[jrow]*x[11*idx[jrow]+4];
1931: sum6 += v[jrow]*x[11*idx[jrow]+5];
1932: sum7 += v[jrow]*x[11*idx[jrow]+6];
1933: sum8 += v[jrow]*x[11*idx[jrow]+7];
1934: sum9 += v[jrow]*x[11*idx[jrow]+8];
1935: sum10 += v[jrow]*x[11*idx[jrow]+9];
1936: sum11 += v[jrow]*x[11*idx[jrow]+10];
1937: jrow++;
1938: }
1939: y[11*i] += sum1;
1940: y[11*i+1] += sum2;
1941: y[11*i+2] += sum3;
1942: y[11*i+3] += sum4;
1943: y[11*i+4] += sum5;
1944: y[11*i+5] += sum6;
1945: y[11*i+6] += sum7;
1946: y[11*i+7] += sum8;
1947: y[11*i+8] += sum9;
1948: y[11*i+9] += sum10;
1949: y[11*i+10] += sum11;
1950: }
1952: PetscLogFlops(22*a->nz);
1953: VecRestoreArray(xx,&x);
1954: VecRestoreArray(yy,&y);
1955: return(0);
1956: }
1960: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1961: {
1962: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1963: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1964: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11,zero = 0.0;
1966: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
1969: VecSet(yy,zero);
1970: VecGetArray(xx,&x);
1971: VecGetArray(yy,&y);
1973: for (i=0; i<m; i++) {
1974: idx = a->j + a->i[i] ;
1975: v = a->a + a->i[i] ;
1976: n = a->i[i+1] - a->i[i];
1977: alpha1 = x[11*i];
1978: alpha2 = x[11*i+1];
1979: alpha3 = x[11*i+2];
1980: alpha4 = x[11*i+3];
1981: alpha5 = x[11*i+4];
1982: alpha6 = x[11*i+5];
1983: alpha7 = x[11*i+6];
1984: alpha8 = x[11*i+7];
1985: alpha9 = x[11*i+8];
1986: alpha10 = x[11*i+9];
1987: alpha11 = x[11*i+10];
1988: while (n-->0) {
1989: y[11*(*idx)] += alpha1*(*v);
1990: y[11*(*idx)+1] += alpha2*(*v);
1991: y[11*(*idx)+2] += alpha3*(*v);
1992: y[11*(*idx)+3] += alpha4*(*v);
1993: y[11*(*idx)+4] += alpha5*(*v);
1994: y[11*(*idx)+5] += alpha6*(*v);
1995: y[11*(*idx)+6] += alpha7*(*v);
1996: y[11*(*idx)+7] += alpha8*(*v);
1997: y[11*(*idx)+8] += alpha9*(*v);
1998: y[11*(*idx)+9] += alpha10*(*v);
1999: y[11*(*idx)+10] += alpha11*(*v);
2000: idx++; v++;
2001: }
2002: }
2003: PetscLogFlops(22*a->nz);
2004: VecRestoreArray(xx,&x);
2005: VecRestoreArray(yy,&y);
2006: return(0);
2007: }
2011: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2012: {
2013: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2014: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2015: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2017: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
2020: if (yy != zz) {VecCopy(yy,zz);}
2021: VecGetArray(xx,&x);
2022: VecGetArray(zz,&y);
2023: for (i=0; i<m; i++) {
2024: idx = a->j + a->i[i] ;
2025: v = a->a + a->i[i] ;
2026: n = a->i[i+1] - a->i[i];
2027: alpha1 = x[11*i];
2028: alpha2 = x[11*i+1];
2029: alpha3 = x[11*i+2];
2030: alpha4 = x[11*i+3];
2031: alpha5 = x[11*i+4];
2032: alpha6 = x[11*i+5];
2033: alpha7 = x[11*i+6];
2034: alpha8 = x[11*i+7];
2035: alpha9 = x[11*i+8];
2036: alpha10 = x[11*i+9];
2037: alpha11 = x[11*i+10];
2038: while (n-->0) {
2039: y[11*(*idx)] += alpha1*(*v);
2040: y[11*(*idx)+1] += alpha2*(*v);
2041: y[11*(*idx)+2] += alpha3*(*v);
2042: y[11*(*idx)+3] += alpha4*(*v);
2043: y[11*(*idx)+4] += alpha5*(*v);
2044: y[11*(*idx)+5] += alpha6*(*v);
2045: y[11*(*idx)+6] += alpha7*(*v);
2046: y[11*(*idx)+7] += alpha8*(*v);
2047: y[11*(*idx)+8] += alpha9*(*v);
2048: y[11*(*idx)+9] += alpha10*(*v);
2049: y[11*(*idx)+10] += alpha11*(*v);
2050: idx++; v++;
2051: }
2052: }
2053: PetscLogFlops(22*a->nz);
2054: VecRestoreArray(xx,&x);
2055: VecRestoreArray(zz,&y);
2056: return(0);
2057: }
2060: /*--------------------------------------------------------------------------------------------*/
2063: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2064: {
2065: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2066: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2067: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2068: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2070: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
2071: PetscInt n,i,jrow,j;
2074: VecGetArray(xx,&x);
2075: VecGetArray(yy,&y);
2076: idx = a->j;
2077: v = a->a;
2078: ii = a->i;
2080: for (i=0; i<m; i++) {
2081: jrow = ii[i];
2082: n = ii[i+1] - jrow;
2083: sum1 = 0.0;
2084: sum2 = 0.0;
2085: sum3 = 0.0;
2086: sum4 = 0.0;
2087: sum5 = 0.0;
2088: sum6 = 0.0;
2089: sum7 = 0.0;
2090: sum8 = 0.0;
2091: sum9 = 0.0;
2092: sum10 = 0.0;
2093: sum11 = 0.0;
2094: sum12 = 0.0;
2095: sum13 = 0.0;
2096: sum14 = 0.0;
2097: sum15 = 0.0;
2098: sum16 = 0.0;
2099: nonzerorow += (n>0);
2100: for (j=0; j<n; j++) {
2101: sum1 += v[jrow]*x[16*idx[jrow]];
2102: sum2 += v[jrow]*x[16*idx[jrow]+1];
2103: sum3 += v[jrow]*x[16*idx[jrow]+2];
2104: sum4 += v[jrow]*x[16*idx[jrow]+3];
2105: sum5 += v[jrow]*x[16*idx[jrow]+4];
2106: sum6 += v[jrow]*x[16*idx[jrow]+5];
2107: sum7 += v[jrow]*x[16*idx[jrow]+6];
2108: sum8 += v[jrow]*x[16*idx[jrow]+7];
2109: sum9 += v[jrow]*x[16*idx[jrow]+8];
2110: sum10 += v[jrow]*x[16*idx[jrow]+9];
2111: sum11 += v[jrow]*x[16*idx[jrow]+10];
2112: sum12 += v[jrow]*x[16*idx[jrow]+11];
2113: sum13 += v[jrow]*x[16*idx[jrow]+12];
2114: sum14 += v[jrow]*x[16*idx[jrow]+13];
2115: sum15 += v[jrow]*x[16*idx[jrow]+14];
2116: sum16 += v[jrow]*x[16*idx[jrow]+15];
2117: jrow++;
2118: }
2119: y[16*i] = sum1;
2120: y[16*i+1] = sum2;
2121: y[16*i+2] = sum3;
2122: y[16*i+3] = sum4;
2123: y[16*i+4] = sum5;
2124: y[16*i+5] = sum6;
2125: y[16*i+6] = sum7;
2126: y[16*i+7] = sum8;
2127: y[16*i+8] = sum9;
2128: y[16*i+9] = sum10;
2129: y[16*i+10] = sum11;
2130: y[16*i+11] = sum12;
2131: y[16*i+12] = sum13;
2132: y[16*i+13] = sum14;
2133: y[16*i+14] = sum15;
2134: y[16*i+15] = sum16;
2135: }
2137: PetscLogFlops(32*a->nz - 16*nonzerorow);
2138: VecRestoreArray(xx,&x);
2139: VecRestoreArray(yy,&y);
2140: return(0);
2141: }
2145: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2146: {
2147: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2148: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2149: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2150: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2152: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
2155: VecSet(yy,zero);
2156: VecGetArray(xx,&x);
2157: VecGetArray(yy,&y);
2159: for (i=0; i<m; i++) {
2160: idx = a->j + a->i[i] ;
2161: v = a->a + a->i[i] ;
2162: n = a->i[i+1] - a->i[i];
2163: alpha1 = x[16*i];
2164: alpha2 = x[16*i+1];
2165: alpha3 = x[16*i+2];
2166: alpha4 = x[16*i+3];
2167: alpha5 = x[16*i+4];
2168: alpha6 = x[16*i+5];
2169: alpha7 = x[16*i+6];
2170: alpha8 = x[16*i+7];
2171: alpha9 = x[16*i+8];
2172: alpha10 = x[16*i+9];
2173: alpha11 = x[16*i+10];
2174: alpha12 = x[16*i+11];
2175: alpha13 = x[16*i+12];
2176: alpha14 = x[16*i+13];
2177: alpha15 = x[16*i+14];
2178: alpha16 = x[16*i+15];
2179: while (n-->0) {
2180: y[16*(*idx)] += alpha1*(*v);
2181: y[16*(*idx)+1] += alpha2*(*v);
2182: y[16*(*idx)+2] += alpha3*(*v);
2183: y[16*(*idx)+3] += alpha4*(*v);
2184: y[16*(*idx)+4] += alpha5*(*v);
2185: y[16*(*idx)+5] += alpha6*(*v);
2186: y[16*(*idx)+6] += alpha7*(*v);
2187: y[16*(*idx)+7] += alpha8*(*v);
2188: y[16*(*idx)+8] += alpha9*(*v);
2189: y[16*(*idx)+9] += alpha10*(*v);
2190: y[16*(*idx)+10] += alpha11*(*v);
2191: y[16*(*idx)+11] += alpha12*(*v);
2192: y[16*(*idx)+12] += alpha13*(*v);
2193: y[16*(*idx)+13] += alpha14*(*v);
2194: y[16*(*idx)+14] += alpha15*(*v);
2195: y[16*(*idx)+15] += alpha16*(*v);
2196: idx++; v++;
2197: }
2198: }
2199: PetscLogFlops(32*a->nz);
2200: VecRestoreArray(xx,&x);
2201: VecRestoreArray(yy,&y);
2202: return(0);
2203: }
2207: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2208: {
2209: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2210: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2211: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2212: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2214: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2215: PetscInt n,i,jrow,j;
2218: if (yy != zz) {VecCopy(yy,zz);}
2219: VecGetArray(xx,&x);
2220: VecGetArray(zz,&y);
2221: idx = a->j;
2222: v = a->a;
2223: ii = a->i;
2225: for (i=0; i<m; i++) {
2226: jrow = ii[i];
2227: n = ii[i+1] - jrow;
2228: sum1 = 0.0;
2229: sum2 = 0.0;
2230: sum3 = 0.0;
2231: sum4 = 0.0;
2232: sum5 = 0.0;
2233: sum6 = 0.0;
2234: sum7 = 0.0;
2235: sum8 = 0.0;
2236: sum9 = 0.0;
2237: sum10 = 0.0;
2238: sum11 = 0.0;
2239: sum12 = 0.0;
2240: sum13 = 0.0;
2241: sum14 = 0.0;
2242: sum15 = 0.0;
2243: sum16 = 0.0;
2244: for (j=0; j<n; j++) {
2245: sum1 += v[jrow]*x[16*idx[jrow]];
2246: sum2 += v[jrow]*x[16*idx[jrow]+1];
2247: sum3 += v[jrow]*x[16*idx[jrow]+2];
2248: sum4 += v[jrow]*x[16*idx[jrow]+3];
2249: sum5 += v[jrow]*x[16*idx[jrow]+4];
2250: sum6 += v[jrow]*x[16*idx[jrow]+5];
2251: sum7 += v[jrow]*x[16*idx[jrow]+6];
2252: sum8 += v[jrow]*x[16*idx[jrow]+7];
2253: sum9 += v[jrow]*x[16*idx[jrow]+8];
2254: sum10 += v[jrow]*x[16*idx[jrow]+9];
2255: sum11 += v[jrow]*x[16*idx[jrow]+10];
2256: sum12 += v[jrow]*x[16*idx[jrow]+11];
2257: sum13 += v[jrow]*x[16*idx[jrow]+12];
2258: sum14 += v[jrow]*x[16*idx[jrow]+13];
2259: sum15 += v[jrow]*x[16*idx[jrow]+14];
2260: sum16 += v[jrow]*x[16*idx[jrow]+15];
2261: jrow++;
2262: }
2263: y[16*i] += sum1;
2264: y[16*i+1] += sum2;
2265: y[16*i+2] += sum3;
2266: y[16*i+3] += sum4;
2267: y[16*i+4] += sum5;
2268: y[16*i+5] += sum6;
2269: y[16*i+6] += sum7;
2270: y[16*i+7] += sum8;
2271: y[16*i+8] += sum9;
2272: y[16*i+9] += sum10;
2273: y[16*i+10] += sum11;
2274: y[16*i+11] += sum12;
2275: y[16*i+12] += sum13;
2276: y[16*i+13] += sum14;
2277: y[16*i+14] += sum15;
2278: y[16*i+15] += sum16;
2279: }
2281: PetscLogFlops(32*a->nz);
2282: VecRestoreArray(xx,&x);
2283: VecRestoreArray(zz,&y);
2284: return(0);
2285: }
2289: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2290: {
2291: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2292: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2293: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2294: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2296: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
2299: if (yy != zz) {VecCopy(yy,zz);}
2300: VecGetArray(xx,&x);
2301: VecGetArray(zz,&y);
2302: for (i=0; i<m; i++) {
2303: idx = a->j + a->i[i] ;
2304: v = a->a + a->i[i] ;
2305: n = a->i[i+1] - a->i[i];
2306: alpha1 = x[16*i];
2307: alpha2 = x[16*i+1];
2308: alpha3 = x[16*i+2];
2309: alpha4 = x[16*i+3];
2310: alpha5 = x[16*i+4];
2311: alpha6 = x[16*i+5];
2312: alpha7 = x[16*i+6];
2313: alpha8 = x[16*i+7];
2314: alpha9 = x[16*i+8];
2315: alpha10 = x[16*i+9];
2316: alpha11 = x[16*i+10];
2317: alpha12 = x[16*i+11];
2318: alpha13 = x[16*i+12];
2319: alpha14 = x[16*i+13];
2320: alpha15 = x[16*i+14];
2321: alpha16 = x[16*i+15];
2322: while (n-->0) {
2323: y[16*(*idx)] += alpha1*(*v);
2324: y[16*(*idx)+1] += alpha2*(*v);
2325: y[16*(*idx)+2] += alpha3*(*v);
2326: y[16*(*idx)+3] += alpha4*(*v);
2327: y[16*(*idx)+4] += alpha5*(*v);
2328: y[16*(*idx)+5] += alpha6*(*v);
2329: y[16*(*idx)+6] += alpha7*(*v);
2330: y[16*(*idx)+7] += alpha8*(*v);
2331: y[16*(*idx)+8] += alpha9*(*v);
2332: y[16*(*idx)+9] += alpha10*(*v);
2333: y[16*(*idx)+10] += alpha11*(*v);
2334: y[16*(*idx)+11] += alpha12*(*v);
2335: y[16*(*idx)+12] += alpha13*(*v);
2336: y[16*(*idx)+13] += alpha14*(*v);
2337: y[16*(*idx)+14] += alpha15*(*v);
2338: y[16*(*idx)+15] += alpha16*(*v);
2339: idx++; v++;
2340: }
2341: }
2342: PetscLogFlops(32*a->nz);
2343: VecRestoreArray(xx,&x);
2344: VecRestoreArray(zz,&y);
2345: return(0);
2346: }
2348: /*--------------------------------------------------------------------------------------------*/
2351: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2352: {
2353: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2354: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2355: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2356: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2358: PetscInt m = b->AIJ->rmap->n,nonzerorow=0,*idx,*ii;
2359: PetscInt n,i,jrow,j;
2362: VecGetArray(xx,&x);
2363: VecGetArray(yy,&y);
2364: idx = a->j;
2365: v = a->a;
2366: ii = a->i;
2368: for (i=0; i<m; i++) {
2369: jrow = ii[i];
2370: n = ii[i+1] - jrow;
2371: sum1 = 0.0;
2372: sum2 = 0.0;
2373: sum3 = 0.0;
2374: sum4 = 0.0;
2375: sum5 = 0.0;
2376: sum6 = 0.0;
2377: sum7 = 0.0;
2378: sum8 = 0.0;
2379: sum9 = 0.0;
2380: sum10 = 0.0;
2381: sum11 = 0.0;
2382: sum12 = 0.0;
2383: sum13 = 0.0;
2384: sum14 = 0.0;
2385: sum15 = 0.0;
2386: sum16 = 0.0;
2387: sum17 = 0.0;
2388: sum18 = 0.0;
2389: nonzerorow += (n>0);
2390: for (j=0; j<n; j++) {
2391: sum1 += v[jrow]*x[18*idx[jrow]];
2392: sum2 += v[jrow]*x[18*idx[jrow]+1];
2393: sum3 += v[jrow]*x[18*idx[jrow]+2];
2394: sum4 += v[jrow]*x[18*idx[jrow]+3];
2395: sum5 += v[jrow]*x[18*idx[jrow]+4];
2396: sum6 += v[jrow]*x[18*idx[jrow]+5];
2397: sum7 += v[jrow]*x[18*idx[jrow]+6];
2398: sum8 += v[jrow]*x[18*idx[jrow]+7];
2399: sum9 += v[jrow]*x[18*idx[jrow]+8];
2400: sum10 += v[jrow]*x[18*idx[jrow]+9];
2401: sum11 += v[jrow]*x[18*idx[jrow]+10];
2402: sum12 += v[jrow]*x[18*idx[jrow]+11];
2403: sum13 += v[jrow]*x[18*idx[jrow]+12];
2404: sum14 += v[jrow]*x[18*idx[jrow]+13];
2405: sum15 += v[jrow]*x[18*idx[jrow]+14];
2406: sum16 += v[jrow]*x[18*idx[jrow]+15];
2407: sum17 += v[jrow]*x[18*idx[jrow]+16];
2408: sum18 += v[jrow]*x[18*idx[jrow]+17];
2409: jrow++;
2410: }
2411: y[18*i] = sum1;
2412: y[18*i+1] = sum2;
2413: y[18*i+2] = sum3;
2414: y[18*i+3] = sum4;
2415: y[18*i+4] = sum5;
2416: y[18*i+5] = sum6;
2417: y[18*i+6] = sum7;
2418: y[18*i+7] = sum8;
2419: y[18*i+8] = sum9;
2420: y[18*i+9] = sum10;
2421: y[18*i+10] = sum11;
2422: y[18*i+11] = sum12;
2423: y[18*i+12] = sum13;
2424: y[18*i+13] = sum14;
2425: y[18*i+14] = sum15;
2426: y[18*i+15] = sum16;
2427: y[18*i+16] = sum17;
2428: y[18*i+17] = sum18;
2429: }
2431: PetscLogFlops(36*a->nz - 18*nonzerorow);
2432: VecRestoreArray(xx,&x);
2433: VecRestoreArray(yy,&y);
2434: return(0);
2435: }
2439: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2440: {
2441: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2442: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2443: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2444: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2446: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
2449: VecSet(yy,zero);
2450: VecGetArray(xx,&x);
2451: VecGetArray(yy,&y);
2453: for (i=0; i<m; i++) {
2454: idx = a->j + a->i[i] ;
2455: v = a->a + a->i[i] ;
2456: n = a->i[i+1] - a->i[i];
2457: alpha1 = x[18*i];
2458: alpha2 = x[18*i+1];
2459: alpha3 = x[18*i+2];
2460: alpha4 = x[18*i+3];
2461: alpha5 = x[18*i+4];
2462: alpha6 = x[18*i+5];
2463: alpha7 = x[18*i+6];
2464: alpha8 = x[18*i+7];
2465: alpha9 = x[18*i+8];
2466: alpha10 = x[18*i+9];
2467: alpha11 = x[18*i+10];
2468: alpha12 = x[18*i+11];
2469: alpha13 = x[18*i+12];
2470: alpha14 = x[18*i+13];
2471: alpha15 = x[18*i+14];
2472: alpha16 = x[18*i+15];
2473: alpha17 = x[18*i+16];
2474: alpha18 = x[18*i+17];
2475: while (n-->0) {
2476: y[18*(*idx)] += alpha1*(*v);
2477: y[18*(*idx)+1] += alpha2*(*v);
2478: y[18*(*idx)+2] += alpha3*(*v);
2479: y[18*(*idx)+3] += alpha4*(*v);
2480: y[18*(*idx)+4] += alpha5*(*v);
2481: y[18*(*idx)+5] += alpha6*(*v);
2482: y[18*(*idx)+6] += alpha7*(*v);
2483: y[18*(*idx)+7] += alpha8*(*v);
2484: y[18*(*idx)+8] += alpha9*(*v);
2485: y[18*(*idx)+9] += alpha10*(*v);
2486: y[18*(*idx)+10] += alpha11*(*v);
2487: y[18*(*idx)+11] += alpha12*(*v);
2488: y[18*(*idx)+12] += alpha13*(*v);
2489: y[18*(*idx)+13] += alpha14*(*v);
2490: y[18*(*idx)+14] += alpha15*(*v);
2491: y[18*(*idx)+15] += alpha16*(*v);
2492: y[18*(*idx)+16] += alpha17*(*v);
2493: y[18*(*idx)+17] += alpha18*(*v);
2494: idx++; v++;
2495: }
2496: }
2497: PetscLogFlops(36*a->nz);
2498: VecRestoreArray(xx,&x);
2499: VecRestoreArray(yy,&y);
2500: return(0);
2501: }
2505: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2506: {
2507: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2508: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2509: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2510: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2512: PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2513: PetscInt n,i,jrow,j;
2516: if (yy != zz) {VecCopy(yy,zz);}
2517: VecGetArray(xx,&x);
2518: VecGetArray(zz,&y);
2519: idx = a->j;
2520: v = a->a;
2521: ii = a->i;
2523: for (i=0; i<m; i++) {
2524: jrow = ii[i];
2525: n = ii[i+1] - jrow;
2526: sum1 = 0.0;
2527: sum2 = 0.0;
2528: sum3 = 0.0;
2529: sum4 = 0.0;
2530: sum5 = 0.0;
2531: sum6 = 0.0;
2532: sum7 = 0.0;
2533: sum8 = 0.0;
2534: sum9 = 0.0;
2535: sum10 = 0.0;
2536: sum11 = 0.0;
2537: sum12 = 0.0;
2538: sum13 = 0.0;
2539: sum14 = 0.0;
2540: sum15 = 0.0;
2541: sum16 = 0.0;
2542: sum17 = 0.0;
2543: sum18 = 0.0;
2544: for (j=0; j<n; j++) {
2545: sum1 += v[jrow]*x[18*idx[jrow]];
2546: sum2 += v[jrow]*x[18*idx[jrow]+1];
2547: sum3 += v[jrow]*x[18*idx[jrow]+2];
2548: sum4 += v[jrow]*x[18*idx[jrow]+3];
2549: sum5 += v[jrow]*x[18*idx[jrow]+4];
2550: sum6 += v[jrow]*x[18*idx[jrow]+5];
2551: sum7 += v[jrow]*x[18*idx[jrow]+6];
2552: sum8 += v[jrow]*x[18*idx[jrow]+7];
2553: sum9 += v[jrow]*x[18*idx[jrow]+8];
2554: sum10 += v[jrow]*x[18*idx[jrow]+9];
2555: sum11 += v[jrow]*x[18*idx[jrow]+10];
2556: sum12 += v[jrow]*x[18*idx[jrow]+11];
2557: sum13 += v[jrow]*x[18*idx[jrow]+12];
2558: sum14 += v[jrow]*x[18*idx[jrow]+13];
2559: sum15 += v[jrow]*x[18*idx[jrow]+14];
2560: sum16 += v[jrow]*x[18*idx[jrow]+15];
2561: sum17 += v[jrow]*x[18*idx[jrow]+16];
2562: sum18 += v[jrow]*x[18*idx[jrow]+17];
2563: jrow++;
2564: }
2565: y[18*i] += sum1;
2566: y[18*i+1] += sum2;
2567: y[18*i+2] += sum3;
2568: y[18*i+3] += sum4;
2569: y[18*i+4] += sum5;
2570: y[18*i+5] += sum6;
2571: y[18*i+6] += sum7;
2572: y[18*i+7] += sum8;
2573: y[18*i+8] += sum9;
2574: y[18*i+9] += sum10;
2575: y[18*i+10] += sum11;
2576: y[18*i+11] += sum12;
2577: y[18*i+12] += sum13;
2578: y[18*i+13] += sum14;
2579: y[18*i+14] += sum15;
2580: y[18*i+15] += sum16;
2581: y[18*i+16] += sum17;
2582: y[18*i+17] += sum18;
2583: }
2585: PetscLogFlops(36*a->nz);
2586: VecRestoreArray(xx,&x);
2587: VecRestoreArray(zz,&y);
2588: return(0);
2589: }
2593: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2594: {
2595: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2596: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2597: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2598: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2600: PetscInt m = b->AIJ->rmap->n,n,i,*idx;
2603: if (yy != zz) {VecCopy(yy,zz);}
2604: VecGetArray(xx,&x);
2605: VecGetArray(zz,&y);
2606: for (i=0; i<m; i++) {
2607: idx = a->j + a->i[i] ;
2608: v = a->a + a->i[i] ;
2609: n = a->i[i+1] - a->i[i];
2610: alpha1 = x[18*i];
2611: alpha2 = x[18*i+1];
2612: alpha3 = x[18*i+2];
2613: alpha4 = x[18*i+3];
2614: alpha5 = x[18*i+4];
2615: alpha6 = x[18*i+5];
2616: alpha7 = x[18*i+6];
2617: alpha8 = x[18*i+7];
2618: alpha9 = x[18*i+8];
2619: alpha10 = x[18*i+9];
2620: alpha11 = x[18*i+10];
2621: alpha12 = x[18*i+11];
2622: alpha13 = x[18*i+12];
2623: alpha14 = x[18*i+13];
2624: alpha15 = x[18*i+14];
2625: alpha16 = x[18*i+15];
2626: alpha17 = x[18*i+16];
2627: alpha18 = x[18*i+17];
2628: while (n-->0) {
2629: y[18*(*idx)] += alpha1*(*v);
2630: y[18*(*idx)+1] += alpha2*(*v);
2631: y[18*(*idx)+2] += alpha3*(*v);
2632: y[18*(*idx)+3] += alpha4*(*v);
2633: y[18*(*idx)+4] += alpha5*(*v);
2634: y[18*(*idx)+5] += alpha6*(*v);
2635: y[18*(*idx)+6] += alpha7*(*v);
2636: y[18*(*idx)+7] += alpha8*(*v);
2637: y[18*(*idx)+8] += alpha9*(*v);
2638: y[18*(*idx)+9] += alpha10*(*v);
2639: y[18*(*idx)+10] += alpha11*(*v);
2640: y[18*(*idx)+11] += alpha12*(*v);
2641: y[18*(*idx)+12] += alpha13*(*v);
2642: y[18*(*idx)+13] += alpha14*(*v);
2643: y[18*(*idx)+14] += alpha15*(*v);
2644: y[18*(*idx)+15] += alpha16*(*v);
2645: y[18*(*idx)+16] += alpha17*(*v);
2646: y[18*(*idx)+17] += alpha18*(*v);
2647: idx++; v++;
2648: }
2649: }
2650: PetscLogFlops(36*a->nz);
2651: VecRestoreArray(xx,&x);
2652: VecRestoreArray(zz,&y);
2653: return(0);
2654: }
2656: /*===================================================================================*/
2659: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2660: {
2661: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2665: /* start the scatter */
2666: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2667: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2668: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2669: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2670: return(0);
2671: }
2675: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2676: {
2677: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2681: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2682: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2683: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2684: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2685: return(0);
2686: }
2690: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2691: {
2692: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2696: /* start the scatter */
2697: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2698: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2699: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2700: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2701: return(0);
2702: }
2706: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2707: {
2708: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2712: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2713: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2714: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2715: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2716: return(0);
2717: }
2719: /* ----------------------------------------------------------------*/
2722: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2723: {
2724: /* This routine requires testing -- but it's getting better. */
2725: PetscErrorCode ierr;
2726: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2727: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2728: Mat P=pp->AIJ;
2729: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2730: PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2731: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2732: PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof,cn;
2733: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2734: MatScalar *ca;
2737: /* Start timer */
2738: PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);
2740: /* Get ij structure of P^T */
2741: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2743: cn = pn*ppdof;
2744: /* Allocate ci array, arrays for fill computation and */
2745: /* free space for accumulating nonzero column info */
2746: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2747: ci[0] = 0;
2749: /* Work arrays for rows of P^T*A */
2750: PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2751: PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2752: ptasparserow = ptadenserow + an;
2753: denserow = ptasparserow + an;
2754: sparserow = denserow + cn;
2756: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2757: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2758: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2759: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2760: current_space = free_space;
2762: /* Determine symbolic info for each row of C: */
2763: for (i=0;i<pn;i++) {
2764: ptnzi = pti[i+1] - pti[i];
2765: ptJ = ptj + pti[i];
2766: for (dof=0;dof<ppdof;dof++) {
2767: ptanzi = 0;
2768: /* Determine symbolic row of PtA: */
2769: for (j=0;j<ptnzi;j++) {
2770: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2771: arow = ptJ[j]*ppdof + dof;
2772: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2773: anzj = ai[arow+1] - ai[arow];
2774: ajj = aj + ai[arow];
2775: for (k=0;k<anzj;k++) {
2776: if (!ptadenserow[ajj[k]]) {
2777: ptadenserow[ajj[k]] = -1;
2778: ptasparserow[ptanzi++] = ajj[k];
2779: }
2780: }
2781: }
2782: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2783: ptaj = ptasparserow;
2784: cnzi = 0;
2785: for (j=0;j<ptanzi;j++) {
2786: /* Get offset within block of P */
2787: pshift = *ptaj%ppdof;
2788: /* Get block row of P */
2789: prow = (*ptaj++)/ppdof; /* integer division */
2790: /* P has same number of nonzeros per row as the compressed form */
2791: pnzj = pi[prow+1] - pi[prow];
2792: pjj = pj + pi[prow];
2793: for (k=0;k<pnzj;k++) {
2794: /* Locations in C are shifted by the offset within the block */
2795: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2796: if (!denserow[pjj[k]*ppdof+pshift]) {
2797: denserow[pjj[k]*ppdof+pshift] = -1;
2798: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2799: }
2800: }
2801: }
2803: /* sort sparserow */
2804: PetscSortInt(cnzi,sparserow);
2805:
2806: /* If free space is not available, make more free space */
2807: /* Double the amount of total space in the list */
2808: if (current_space->local_remaining<cnzi) {
2809: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
2810: }
2812: /* Copy data into free space, and zero out denserows */
2813: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2814: current_space->array += cnzi;
2815: current_space->local_used += cnzi;
2816: current_space->local_remaining -= cnzi;
2818: for (j=0;j<ptanzi;j++) {
2819: ptadenserow[ptasparserow[j]] = 0;
2820: }
2821: for (j=0;j<cnzi;j++) {
2822: denserow[sparserow[j]] = 0;
2823: }
2824: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2825: /* For now, we will recompute what is needed. */
2826: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2827: }
2828: }
2829: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2830: /* Allocate space for cj, initialize cj, and */
2831: /* destroy list of free space and other temporary array(s) */
2832: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2833: PetscFreeSpaceContiguous(&free_space,cj);
2834: PetscFree(ptadenserow);
2835:
2836: /* Allocate space for ca */
2837: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2838: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2839:
2840: /* put together the new matrix */
2841: MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);
2843: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2844: /* Since these are PETSc arrays, change flags to free them as necessary. */
2845: c = (Mat_SeqAIJ *)((*C)->data);
2846: c->free_a = PETSC_TRUE;
2847: c->free_ij = PETSC_TRUE;
2848: c->nonew = 0;
2850: /* Clean up. */
2851: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2853: PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
2854: return(0);
2855: }
2859: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2860: {
2861: /* This routine requires testing -- first draft only */
2863: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2864: Mat P=pp->AIJ;
2865: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
2866: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
2867: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
2868: PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2869: PetscInt *ci=c->i,*cj=c->j,*cjj;
2870: PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
2871: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2872: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2875: /* Allocate temporary array for storage of one row of A*P */
2876: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2877: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));
2879: apj = (PetscInt *)(apa + cn);
2880: apjdense = apj + cn;
2882: /* Clear old values in C */
2883: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
2885: for (i=0;i<am;i++) {
2886: /* Form sparse row of A*P */
2887: anzi = ai[i+1] - ai[i];
2888: apnzj = 0;
2889: for (j=0;j<anzi;j++) {
2890: /* Get offset within block of P */
2891: pshift = *aj%ppdof;
2892: /* Get block row of P */
2893: prow = *aj++/ppdof; /* integer division */
2894: pnzj = pi[prow+1] - pi[prow];
2895: pjj = pj + pi[prow];
2896: paj = pa + pi[prow];
2897: for (k=0;k<pnzj;k++) {
2898: poffset = pjj[k]*ppdof+pshift;
2899: if (!apjdense[poffset]) {
2900: apjdense[poffset] = -1;
2901: apj[apnzj++] = poffset;
2902: }
2903: apa[poffset] += (*aa)*paj[k];
2904: }
2905: PetscLogFlops(2*pnzj);
2906: aa++;
2907: }
2909: /* Sort the j index array for quick sparse axpy. */
2910: /* Note: a array does not need sorting as it is in dense storage locations. */
2911: PetscSortInt(apnzj,apj);
2913: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2914: prow = i/ppdof; /* integer division */
2915: pshift = i%ppdof;
2916: poffset = pi[prow];
2917: pnzi = pi[prow+1] - poffset;
2918: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2919: pJ = pj+poffset;
2920: pA = pa+poffset;
2921: for (j=0;j<pnzi;j++) {
2922: crow = (*pJ)*ppdof+pshift;
2923: cjj = cj + ci[crow];
2924: caj = ca + ci[crow];
2925: pJ++;
2926: /* Perform sparse axpy operation. Note cjj includes apj. */
2927: for (k=0,nextap=0;nextap<apnzj;k++) {
2928: if (cjj[k]==apj[nextap]) {
2929: caj[k] += (*pA)*apa[apj[nextap++]];
2930: }
2931: }
2932: PetscLogFlops(2*apnzj);
2933: pA++;
2934: }
2936: /* Zero the current row info for A*P */
2937: for (j=0;j<apnzj;j++) {
2938: apa[apj[j]] = 0.;
2939: apjdense[apj[j]] = 0;
2940: }
2941: }
2943: /* Assemble the final matrix and clean up */
2944: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2945: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2946: PetscFree(apa);
2947: return(0);
2948: }
2952: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2953: {
2954: PetscErrorCode ierr;
2957: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2958: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2959: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2960: return(0);
2961: }
2965: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2966: {
2968: SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2969: return(0);
2970: }
2975: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2976: {
2977: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2978: Mat a = b->AIJ,B;
2979: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
2980: PetscErrorCode ierr;
2981: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2982: PetscInt *cols;
2983: PetscScalar *vals;
2986: MatGetSize(a,&m,&n);
2987: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2988: for (i=0; i<m; i++) {
2989: nmax = PetscMax(nmax,aij->ilen[i]);
2990: for (j=0; j<dof; j++) {
2991: ilen[dof*i+j] = aij->ilen[i];
2992: }
2993: }
2994: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2995: PetscFree(ilen);
2996: PetscMalloc(nmax*sizeof(PetscInt),&icols);
2997: ii = 0;
2998: for (i=0; i<m; i++) {
2999: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3000: for (j=0; j<dof; j++) {
3001: for (k=0; k<ncols; k++) {
3002: icols[k] = dof*cols[k]+j;
3003: }
3004: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3005: ii++;
3006: }
3007: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3008: }
3009: PetscFree(icols);
3010: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3011: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3013: if (reuse == MAT_REUSE_MATRIX) {
3014: MatHeaderReplace(A,B);
3015: } else {
3016: *newmat = B;
3017: }
3018: return(0);
3019: }
3022: #include ../src/mat/impls/aij/mpi/mpiaij.h
3027: PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3028: {
3029: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3030: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3031: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3032: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3033: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3034: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3035: PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3036: PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3037: PetscInt rstart,cstart,*garray,ii,k;
3038: PetscErrorCode ierr;
3039: PetscScalar *vals,*ovals;
3042: PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
3043: for (i=0; i<A->rmap->n/dof; i++) {
3044: nmax = PetscMax(nmax,AIJ->ilen[i]);
3045: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3046: for (j=0; j<dof; j++) {
3047: dnz[dof*i+j] = AIJ->ilen[i];
3048: onz[dof*i+j] = OAIJ->ilen[i];
3049: }
3050: }
3051: MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3052: PetscFree2(dnz,onz);
3054: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
3055: rstart = dof*maij->A->rmap->rstart;
3056: cstart = dof*maij->A->cmap->rstart;
3057: garray = mpiaij->garray;
3059: ii = rstart;
3060: for (i=0; i<A->rmap->n/dof; i++) {
3061: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3062: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3063: for (j=0; j<dof; j++) {
3064: for (k=0; k<ncols; k++) {
3065: icols[k] = cstart + dof*cols[k]+j;
3066: }
3067: for (k=0; k<oncols; k++) {
3068: oicols[k] = dof*garray[ocols[k]]+j;
3069: }
3070: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3071: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3072: ii++;
3073: }
3074: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3075: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3076: }
3077: PetscFree2(icols,oicols);
3079: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3080: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3082: if (reuse == MAT_REUSE_MATRIX) {
3083: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3084: ((PetscObject)A)->refct = 1;
3085: MatHeaderReplace(A,B);
3086: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3087: } else {
3088: *newmat = B;
3089: }
3090: return(0);
3091: }
3095: /* ---------------------------------------------------------------------------------- */
3096: /*MC
3097: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3098: operations for multicomponent problems. It interpolates each component the same
3099: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3100: and MATMPIAIJ for distributed matrices.
3102: Operations provided:
3103: + MatMult
3104: . MatMultTranspose
3105: . MatMultAdd
3106: . MatMultTransposeAdd
3107: - MatView
3109: Level: advanced
3111: M*/
3114: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3115: {
3117: PetscMPIInt size;
3118: PetscInt n;
3119: Mat_MPIMAIJ *b;
3120: Mat B;
3123: PetscObjectReference((PetscObject)A);
3125: if (dof == 1) {
3126: *maij = A;
3127: } else {
3128: MatCreate(((PetscObject)A)->comm,&B);
3129: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3130: B->assembled = PETSC_TRUE;
3132: MPI_Comm_size(((PetscObject)A)->comm,&size);
3133: if (size == 1) {
3134: MatSetType(B,MATSEQMAIJ);
3135: B->ops->destroy = MatDestroy_SeqMAIJ;
3136: B->ops->view = MatView_SeqMAIJ;
3137: b = (Mat_MPIMAIJ*)B->data;
3138: b->dof = dof;
3139: b->AIJ = A;
3140: if (dof == 2) {
3141: B->ops->mult = MatMult_SeqMAIJ_2;
3142: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3143: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3144: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3145: } else if (dof == 3) {
3146: B->ops->mult = MatMult_SeqMAIJ_3;
3147: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3148: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3149: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3150: } else if (dof == 4) {
3151: B->ops->mult = MatMult_SeqMAIJ_4;
3152: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3153: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3154: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3155: } else if (dof == 5) {
3156: B->ops->mult = MatMult_SeqMAIJ_5;
3157: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3158: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3159: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3160: } else if (dof == 6) {
3161: B->ops->mult = MatMult_SeqMAIJ_6;
3162: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3163: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3164: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3165: } else if (dof == 7) {
3166: B->ops->mult = MatMult_SeqMAIJ_7;
3167: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3168: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3169: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3170: } else if (dof == 8) {
3171: B->ops->mult = MatMult_SeqMAIJ_8;
3172: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3173: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3174: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3175: } else if (dof == 9) {
3176: B->ops->mult = MatMult_SeqMAIJ_9;
3177: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3178: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3179: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3180: } else if (dof == 10) {
3181: B->ops->mult = MatMult_SeqMAIJ_10;
3182: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3183: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3184: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3185: } else if (dof == 11) {
3186: B->ops->mult = MatMult_SeqMAIJ_11;
3187: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3188: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3189: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3190: } else if (dof == 16) {
3191: B->ops->mult = MatMult_SeqMAIJ_16;
3192: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3193: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3194: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3195: } else if (dof == 18) {
3196: B->ops->mult = MatMult_SeqMAIJ_18;
3197: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3198: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3199: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3200: } else {
3201: SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
3202: }
3203: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3204: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3205: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
3206: } else {
3207: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
3208: IS from,to;
3209: Vec gvec;
3210: PetscInt *garray,i;
3212: MatSetType(B,MATMPIMAIJ);
3213: B->ops->destroy = MatDestroy_MPIMAIJ;
3214: B->ops->view = MatView_MPIMAIJ;
3215: b = (Mat_MPIMAIJ*)B->data;
3216: b->dof = dof;
3217: b->A = A;
3218: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3219: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
3221: VecGetSize(mpiaij->lvec,&n);
3222: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
3223: VecSetBlockSize(b->w,dof);
3225: /* create two temporary Index sets for build scatter gather */
3226: PetscMalloc((n+1)*sizeof(PetscInt),&garray);
3227: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
3228: ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
3229: PetscFree(garray);
3230: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3232: /* create temporary global vector to generate scatter context */
3233: VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);
3234: VecSetBlockSize(gvec,dof);
3236: /* generate the scatter context */
3237: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3239: ISDestroy(from);
3240: ISDestroy(to);
3241: VecDestroy(gvec);
3243: B->ops->mult = MatMult_MPIMAIJ_dof;
3244: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3245: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3246: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3247: B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3248: B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3249: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3250: }
3251: *maij = B;
3252: MatView_Private(B);
3253: }
3254: return(0);
3255: }