Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include src/mat/impls/maij/maij.h
21: #include src/mat/utils/freespace.h
22: #include private/vecimpl.h
26: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
27: {
29: PetscTruth ismpimaij,isseqmaij;
32: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
33: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
34: if (ismpimaij) {
35: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
37: *B = b->A;
38: } else if (isseqmaij) {
39: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
41: *B = b->AIJ;
42: } else {
43: *B = A;
44: }
45: return(0);
46: }
50: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51: {
53: Mat Aij;
56: MatMAIJGetAIJ(A,&Aij);
57: MatCreateMAIJ(Aij,dof,B);
58: return(0);
59: }
63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64: {
66: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
69: if (b->AIJ) {
70: MatDestroy(b->AIJ);
71: }
72: PetscFree(b);
73: return(0);
74: }
78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79: {
81: Mat B;
84: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85: MatView(B,viewer);
86: MatDestroy(B);
87: return(0);
88: }
92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93: {
95: Mat B;
98: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99: MatView(B,viewer);
100: MatDestroy(B);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
112: if (b->AIJ) {
113: MatDestroy(b->AIJ);
114: }
115: if (b->OAIJ) {
116: MatDestroy(b->OAIJ);
117: }
118: if (b->A) {
119: MatDestroy(b->A);
120: }
121: if (b->ctx) {
122: VecScatterDestroy(b->ctx);
123: }
124: if (b->w) {
125: VecDestroy(b->w);
126: }
127: PetscFree(b);
128: return(0);
129: }
131: /*MC
132: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
133: multicomponent problems, interpolating or restricting each component the same way independently.
134: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
136: Operations provided:
137: . MatMult
138: . MatMultTranspose
139: . MatMultAdd
140: . MatMultTransposeAdd
142: Level: advanced
144: .seealso: MatCreateSeqDense
145: M*/
150: PetscErrorCode MatCreate_MAIJ(Mat A)
151: {
153: Mat_MPIMAIJ *b;
154: PetscMPIInt size;
157: PetscNew(Mat_MPIMAIJ,&b);
158: A->data = (void*)b;
159: PetscMemzero(A->ops,sizeof(struct _MatOps));
160: A->factor = 0;
161: A->mapping = 0;
163: b->AIJ = 0;
164: b->dof = 0;
165: b->OAIJ = 0;
166: b->ctx = 0;
167: b->w = 0;
168: MPI_Comm_size(A->comm,&size);
169: if (size == 1){
170: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
171: } else {
172: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
173: }
174: return(0);
175: }
178: /* --------------------------------------------------------------------------------------*/
181: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
182: {
183: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
184: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
185: PetscScalar *x,*y,*v,sum1, sum2;
187: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
188: PetscInt n,i,jrow,j;
191: VecGetArray(xx,&x);
192: VecGetArray(yy,&y);
193: idx = a->j;
194: v = a->a;
195: ii = a->i;
197: for (i=0; i<m; i++) {
198: jrow = ii[i];
199: n = ii[i+1] - jrow;
200: sum1 = 0.0;
201: sum2 = 0.0;
202: for (j=0; j<n; j++) {
203: sum1 += v[jrow]*x[2*idx[jrow]];
204: sum2 += v[jrow]*x[2*idx[jrow]+1];
205: jrow++;
206: }
207: y[2*i] = sum1;
208: y[2*i+1] = sum2;
209: }
211: PetscLogFlops(4*a->nz - 2*m);
212: VecRestoreArray(xx,&x);
213: VecRestoreArray(yy,&y);
214: return(0);
215: }
219: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
220: {
221: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
222: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
223: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
225: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
228: VecSet(yy,zero);
229: VecGetArray(xx,&x);
230: VecGetArray(yy,&y);
231:
232: for (i=0; i<m; i++) {
233: idx = a->j + a->i[i] ;
234: v = a->a + a->i[i] ;
235: n = a->i[i+1] - a->i[i];
236: alpha1 = x[2*i];
237: alpha2 = x[2*i+1];
238: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
239: }
240: PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
241: VecRestoreArray(xx,&x);
242: VecRestoreArray(yy,&y);
243: return(0);
244: }
248: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
249: {
250: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
251: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
252: PetscScalar *x,*y,*v,sum1, sum2;
254: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
255: PetscInt n,i,jrow,j;
258: if (yy != zz) {VecCopy(yy,zz);}
259: VecGetArray(xx,&x);
260: VecGetArray(zz,&y);
261: idx = a->j;
262: v = a->a;
263: ii = a->i;
265: for (i=0; i<m; i++) {
266: jrow = ii[i];
267: n = ii[i+1] - jrow;
268: sum1 = 0.0;
269: sum2 = 0.0;
270: for (j=0; j<n; j++) {
271: sum1 += v[jrow]*x[2*idx[jrow]];
272: sum2 += v[jrow]*x[2*idx[jrow]+1];
273: jrow++;
274: }
275: y[2*i] += sum1;
276: y[2*i+1] += sum2;
277: }
279: PetscLogFlops(4*a->nz - 2*m);
280: VecRestoreArray(xx,&x);
281: VecRestoreArray(zz,&y);
282: return(0);
283: }
286: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
287: {
288: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
289: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
290: PetscScalar *x,*y,*v,alpha1,alpha2;
292: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
295: if (yy != zz) {VecCopy(yy,zz);}
296: VecGetArray(xx,&x);
297: VecGetArray(zz,&y);
298:
299: for (i=0; i<m; i++) {
300: idx = a->j + a->i[i] ;
301: v = a->a + a->i[i] ;
302: n = a->i[i+1] - a->i[i];
303: alpha1 = x[2*i];
304: alpha2 = x[2*i+1];
305: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
306: }
307: PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
308: VecRestoreArray(xx,&x);
309: VecRestoreArray(zz,&y);
310: return(0);
311: }
312: /* --------------------------------------------------------------------------------------*/
315: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316: {
317: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
318: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
319: PetscScalar *x,*y,*v,sum1, sum2, sum3;
321: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
322: PetscInt n,i,jrow,j;
325: VecGetArray(xx,&x);
326: VecGetArray(yy,&y);
327: idx = a->j;
328: v = a->a;
329: ii = a->i;
331: for (i=0; i<m; i++) {
332: jrow = ii[i];
333: n = ii[i+1] - jrow;
334: sum1 = 0.0;
335: sum2 = 0.0;
336: sum3 = 0.0;
337: for (j=0; j<n; j++) {
338: sum1 += v[jrow]*x[3*idx[jrow]];
339: sum2 += v[jrow]*x[3*idx[jrow]+1];
340: sum3 += v[jrow]*x[3*idx[jrow]+2];
341: jrow++;
342: }
343: y[3*i] = sum1;
344: y[3*i+1] = sum2;
345: y[3*i+2] = sum3;
346: }
348: PetscLogFlops(6*a->nz - 3*m);
349: VecRestoreArray(xx,&x);
350: VecRestoreArray(yy,&y);
351: return(0);
352: }
356: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
357: {
358: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
359: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
360: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
362: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
365: VecSet(yy,zero);
366: VecGetArray(xx,&x);
367: VecGetArray(yy,&y);
368:
369: for (i=0; i<m; i++) {
370: idx = a->j + a->i[i];
371: v = a->a + a->i[i];
372: n = a->i[i+1] - a->i[i];
373: alpha1 = x[3*i];
374: alpha2 = x[3*i+1];
375: alpha3 = x[3*i+2];
376: while (n-->0) {
377: y[3*(*idx)] += alpha1*(*v);
378: y[3*(*idx)+1] += alpha2*(*v);
379: y[3*(*idx)+2] += alpha3*(*v);
380: idx++; v++;
381: }
382: }
383: PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);
384: VecRestoreArray(xx,&x);
385: VecRestoreArray(yy,&y);
386: return(0);
387: }
391: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
392: {
393: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
394: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
395: PetscScalar *x,*y,*v,sum1, sum2, sum3;
397: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
398: PetscInt n,i,jrow,j;
401: if (yy != zz) {VecCopy(yy,zz);}
402: VecGetArray(xx,&x);
403: VecGetArray(zz,&y);
404: idx = a->j;
405: v = a->a;
406: ii = a->i;
408: for (i=0; i<m; i++) {
409: jrow = ii[i];
410: n = ii[i+1] - jrow;
411: sum1 = 0.0;
412: sum2 = 0.0;
413: sum3 = 0.0;
414: for (j=0; j<n; j++) {
415: sum1 += v[jrow]*x[3*idx[jrow]];
416: sum2 += v[jrow]*x[3*idx[jrow]+1];
417: sum3 += v[jrow]*x[3*idx[jrow]+2];
418: jrow++;
419: }
420: y[3*i] += sum1;
421: y[3*i+1] += sum2;
422: y[3*i+2] += sum3;
423: }
425: PetscLogFlops(6*a->nz);
426: VecRestoreArray(xx,&x);
427: VecRestoreArray(zz,&y);
428: return(0);
429: }
432: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
433: {
434: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
435: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
436: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
438: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
441: if (yy != zz) {VecCopy(yy,zz);}
442: VecGetArray(xx,&x);
443: VecGetArray(zz,&y);
444: for (i=0; i<m; i++) {
445: idx = a->j + a->i[i] ;
446: v = a->a + a->i[i] ;
447: n = a->i[i+1] - a->i[i];
448: alpha1 = x[3*i];
449: alpha2 = x[3*i+1];
450: alpha3 = x[3*i+2];
451: while (n-->0) {
452: y[3*(*idx)] += alpha1*(*v);
453: y[3*(*idx)+1] += alpha2*(*v);
454: y[3*(*idx)+2] += alpha3*(*v);
455: idx++; v++;
456: }
457: }
458: PetscLogFlops(6*a->nz);
459: VecRestoreArray(xx,&x);
460: VecRestoreArray(zz,&y);
461: return(0);
462: }
464: /* ------------------------------------------------------------------------------*/
467: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
468: {
469: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
470: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
471: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
473: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
474: PetscInt n,i,jrow,j;
477: VecGetArray(xx,&x);
478: VecGetArray(yy,&y);
479: idx = a->j;
480: v = a->a;
481: ii = a->i;
483: for (i=0; i<m; i++) {
484: jrow = ii[i];
485: n = ii[i+1] - jrow;
486: sum1 = 0.0;
487: sum2 = 0.0;
488: sum3 = 0.0;
489: sum4 = 0.0;
490: for (j=0; j<n; j++) {
491: sum1 += v[jrow]*x[4*idx[jrow]];
492: sum2 += v[jrow]*x[4*idx[jrow]+1];
493: sum3 += v[jrow]*x[4*idx[jrow]+2];
494: sum4 += v[jrow]*x[4*idx[jrow]+3];
495: jrow++;
496: }
497: y[4*i] = sum1;
498: y[4*i+1] = sum2;
499: y[4*i+2] = sum3;
500: y[4*i+3] = sum4;
501: }
503: PetscLogFlops(8*a->nz - 4*m);
504: VecRestoreArray(xx,&x);
505: VecRestoreArray(yy,&y);
506: return(0);
507: }
511: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
512: {
513: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
514: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
515: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
517: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
520: VecSet(yy,zero);
521: VecGetArray(xx,&x);
522: VecGetArray(yy,&y);
523: for (i=0; i<m; i++) {
524: idx = a->j + a->i[i] ;
525: v = a->a + a->i[i] ;
526: n = a->i[i+1] - a->i[i];
527: alpha1 = x[4*i];
528: alpha2 = x[4*i+1];
529: alpha3 = x[4*i+2];
530: alpha4 = x[4*i+3];
531: while (n-->0) {
532: y[4*(*idx)] += alpha1*(*v);
533: y[4*(*idx)+1] += alpha2*(*v);
534: y[4*(*idx)+2] += alpha3*(*v);
535: y[4*(*idx)+3] += alpha4*(*v);
536: idx++; v++;
537: }
538: }
539: PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
540: VecRestoreArray(xx,&x);
541: VecRestoreArray(yy,&y);
542: return(0);
543: }
547: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
548: {
549: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
550: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
551: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
553: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
554: PetscInt n,i,jrow,j;
557: if (yy != zz) {VecCopy(yy,zz);}
558: VecGetArray(xx,&x);
559: VecGetArray(zz,&y);
560: idx = a->j;
561: v = a->a;
562: ii = a->i;
564: for (i=0; i<m; i++) {
565: jrow = ii[i];
566: n = ii[i+1] - jrow;
567: sum1 = 0.0;
568: sum2 = 0.0;
569: sum3 = 0.0;
570: sum4 = 0.0;
571: for (j=0; j<n; j++) {
572: sum1 += v[jrow]*x[4*idx[jrow]];
573: sum2 += v[jrow]*x[4*idx[jrow]+1];
574: sum3 += v[jrow]*x[4*idx[jrow]+2];
575: sum4 += v[jrow]*x[4*idx[jrow]+3];
576: jrow++;
577: }
578: y[4*i] += sum1;
579: y[4*i+1] += sum2;
580: y[4*i+2] += sum3;
581: y[4*i+3] += sum4;
582: }
584: PetscLogFlops(8*a->nz - 4*m);
585: VecRestoreArray(xx,&x);
586: VecRestoreArray(zz,&y);
587: return(0);
588: }
591: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
592: {
593: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
594: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
595: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
597: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
600: if (yy != zz) {VecCopy(yy,zz);}
601: VecGetArray(xx,&x);
602: VecGetArray(zz,&y);
603:
604: for (i=0; i<m; i++) {
605: idx = a->j + a->i[i] ;
606: v = a->a + a->i[i] ;
607: n = a->i[i+1] - a->i[i];
608: alpha1 = x[4*i];
609: alpha2 = x[4*i+1];
610: alpha3 = x[4*i+2];
611: alpha4 = x[4*i+3];
612: while (n-->0) {
613: y[4*(*idx)] += alpha1*(*v);
614: y[4*(*idx)+1] += alpha2*(*v);
615: y[4*(*idx)+2] += alpha3*(*v);
616: y[4*(*idx)+3] += alpha4*(*v);
617: idx++; v++;
618: }
619: }
620: PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
621: VecRestoreArray(xx,&x);
622: VecRestoreArray(zz,&y);
623: return(0);
624: }
625: /* ------------------------------------------------------------------------------*/
629: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
630: {
631: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
632: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
633: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
635: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
636: PetscInt n,i,jrow,j;
639: VecGetArray(xx,&x);
640: VecGetArray(yy,&y);
641: idx = a->j;
642: v = a->a;
643: ii = a->i;
645: for (i=0; i<m; i++) {
646: jrow = ii[i];
647: n = ii[i+1] - jrow;
648: sum1 = 0.0;
649: sum2 = 0.0;
650: sum3 = 0.0;
651: sum4 = 0.0;
652: sum5 = 0.0;
653: for (j=0; j<n; j++) {
654: sum1 += v[jrow]*x[5*idx[jrow]];
655: sum2 += v[jrow]*x[5*idx[jrow]+1];
656: sum3 += v[jrow]*x[5*idx[jrow]+2];
657: sum4 += v[jrow]*x[5*idx[jrow]+3];
658: sum5 += v[jrow]*x[5*idx[jrow]+4];
659: jrow++;
660: }
661: y[5*i] = sum1;
662: y[5*i+1] = sum2;
663: y[5*i+2] = sum3;
664: y[5*i+3] = sum4;
665: y[5*i+4] = sum5;
666: }
668: PetscLogFlops(10*a->nz - 5*m);
669: VecRestoreArray(xx,&x);
670: VecRestoreArray(yy,&y);
671: return(0);
672: }
676: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
677: {
678: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
679: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
680: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
682: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
685: VecSet(yy,zero);
686: VecGetArray(xx,&x);
687: VecGetArray(yy,&y);
688:
689: for (i=0; i<m; i++) {
690: idx = a->j + a->i[i] ;
691: v = a->a + a->i[i] ;
692: n = a->i[i+1] - a->i[i];
693: alpha1 = x[5*i];
694: alpha2 = x[5*i+1];
695: alpha3 = x[5*i+2];
696: alpha4 = x[5*i+3];
697: alpha5 = x[5*i+4];
698: while (n-->0) {
699: y[5*(*idx)] += alpha1*(*v);
700: y[5*(*idx)+1] += alpha2*(*v);
701: y[5*(*idx)+2] += alpha3*(*v);
702: y[5*(*idx)+3] += alpha4*(*v);
703: y[5*(*idx)+4] += alpha5*(*v);
704: idx++; v++;
705: }
706: }
707: PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);
708: VecRestoreArray(xx,&x);
709: VecRestoreArray(yy,&y);
710: return(0);
711: }
715: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
716: {
717: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
718: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
719: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
721: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
722: PetscInt n,i,jrow,j;
725: if (yy != zz) {VecCopy(yy,zz);}
726: VecGetArray(xx,&x);
727: VecGetArray(zz,&y);
728: idx = a->j;
729: v = a->a;
730: ii = a->i;
732: for (i=0; i<m; i++) {
733: jrow = ii[i];
734: n = ii[i+1] - jrow;
735: sum1 = 0.0;
736: sum2 = 0.0;
737: sum3 = 0.0;
738: sum4 = 0.0;
739: sum5 = 0.0;
740: for (j=0; j<n; j++) {
741: sum1 += v[jrow]*x[5*idx[jrow]];
742: sum2 += v[jrow]*x[5*idx[jrow]+1];
743: sum3 += v[jrow]*x[5*idx[jrow]+2];
744: sum4 += v[jrow]*x[5*idx[jrow]+3];
745: sum5 += v[jrow]*x[5*idx[jrow]+4];
746: jrow++;
747: }
748: y[5*i] += sum1;
749: y[5*i+1] += sum2;
750: y[5*i+2] += sum3;
751: y[5*i+3] += sum4;
752: y[5*i+4] += sum5;
753: }
755: PetscLogFlops(10*a->nz);
756: VecRestoreArray(xx,&x);
757: VecRestoreArray(zz,&y);
758: return(0);
759: }
763: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
764: {
765: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
766: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
767: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
769: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
772: if (yy != zz) {VecCopy(yy,zz);}
773: VecGetArray(xx,&x);
774: VecGetArray(zz,&y);
775:
776: for (i=0; i<m; i++) {
777: idx = a->j + a->i[i] ;
778: v = a->a + a->i[i] ;
779: n = a->i[i+1] - a->i[i];
780: alpha1 = x[5*i];
781: alpha2 = x[5*i+1];
782: alpha3 = x[5*i+2];
783: alpha4 = x[5*i+3];
784: alpha5 = x[5*i+4];
785: while (n-->0) {
786: y[5*(*idx)] += alpha1*(*v);
787: y[5*(*idx)+1] += alpha2*(*v);
788: y[5*(*idx)+2] += alpha3*(*v);
789: y[5*(*idx)+3] += alpha4*(*v);
790: y[5*(*idx)+4] += alpha5*(*v);
791: idx++; v++;
792: }
793: }
794: PetscLogFlops(10*a->nz);
795: VecRestoreArray(xx,&x);
796: VecRestoreArray(zz,&y);
797: return(0);
798: }
800: /* ------------------------------------------------------------------------------*/
803: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
804: {
805: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
806: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
807: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
809: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
810: PetscInt n,i,jrow,j;
813: VecGetArray(xx,&x);
814: VecGetArray(yy,&y);
815: idx = a->j;
816: v = a->a;
817: ii = a->i;
819: for (i=0; i<m; i++) {
820: jrow = ii[i];
821: n = ii[i+1] - jrow;
822: sum1 = 0.0;
823: sum2 = 0.0;
824: sum3 = 0.0;
825: sum4 = 0.0;
826: sum5 = 0.0;
827: sum6 = 0.0;
828: for (j=0; j<n; j++) {
829: sum1 += v[jrow]*x[6*idx[jrow]];
830: sum2 += v[jrow]*x[6*idx[jrow]+1];
831: sum3 += v[jrow]*x[6*idx[jrow]+2];
832: sum4 += v[jrow]*x[6*idx[jrow]+3];
833: sum5 += v[jrow]*x[6*idx[jrow]+4];
834: sum6 += v[jrow]*x[6*idx[jrow]+5];
835: jrow++;
836: }
837: y[6*i] = sum1;
838: y[6*i+1] = sum2;
839: y[6*i+2] = sum3;
840: y[6*i+3] = sum4;
841: y[6*i+4] = sum5;
842: y[6*i+5] = sum6;
843: }
845: PetscLogFlops(12*a->nz - 6*m);
846: VecRestoreArray(xx,&x);
847: VecRestoreArray(yy,&y);
848: return(0);
849: }
853: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
854: {
855: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
856: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
857: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
859: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
862: VecSet(yy,zero);
863: VecGetArray(xx,&x);
864: VecGetArray(yy,&y);
866: for (i=0; i<m; i++) {
867: idx = a->j + a->i[i] ;
868: v = a->a + a->i[i] ;
869: n = a->i[i+1] - a->i[i];
870: alpha1 = x[6*i];
871: alpha2 = x[6*i+1];
872: alpha3 = x[6*i+2];
873: alpha4 = x[6*i+3];
874: alpha5 = x[6*i+4];
875: alpha6 = x[6*i+5];
876: while (n-->0) {
877: y[6*(*idx)] += alpha1*(*v);
878: y[6*(*idx)+1] += alpha2*(*v);
879: y[6*(*idx)+2] += alpha3*(*v);
880: y[6*(*idx)+3] += alpha4*(*v);
881: y[6*(*idx)+4] += alpha5*(*v);
882: y[6*(*idx)+5] += alpha6*(*v);
883: idx++; v++;
884: }
885: }
886: PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);
887: VecRestoreArray(xx,&x);
888: VecRestoreArray(yy,&y);
889: return(0);
890: }
894: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
895: {
896: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
897: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
898: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
900: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
901: PetscInt n,i,jrow,j;
904: if (yy != zz) {VecCopy(yy,zz);}
905: VecGetArray(xx,&x);
906: VecGetArray(zz,&y);
907: idx = a->j;
908: v = a->a;
909: ii = a->i;
911: for (i=0; i<m; i++) {
912: jrow = ii[i];
913: n = ii[i+1] - jrow;
914: sum1 = 0.0;
915: sum2 = 0.0;
916: sum3 = 0.0;
917: sum4 = 0.0;
918: sum5 = 0.0;
919: sum6 = 0.0;
920: for (j=0; j<n; j++) {
921: sum1 += v[jrow]*x[6*idx[jrow]];
922: sum2 += v[jrow]*x[6*idx[jrow]+1];
923: sum3 += v[jrow]*x[6*idx[jrow]+2];
924: sum4 += v[jrow]*x[6*idx[jrow]+3];
925: sum5 += v[jrow]*x[6*idx[jrow]+4];
926: sum6 += v[jrow]*x[6*idx[jrow]+5];
927: jrow++;
928: }
929: y[6*i] += sum1;
930: y[6*i+1] += sum2;
931: y[6*i+2] += sum3;
932: y[6*i+3] += sum4;
933: y[6*i+4] += sum5;
934: y[6*i+5] += sum6;
935: }
937: PetscLogFlops(12*a->nz);
938: VecRestoreArray(xx,&x);
939: VecRestoreArray(zz,&y);
940: return(0);
941: }
945: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
946: {
947: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
948: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
949: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
951: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
954: if (yy != zz) {VecCopy(yy,zz);}
955: VecGetArray(xx,&x);
956: VecGetArray(zz,&y);
957:
958: for (i=0; i<m; i++) {
959: idx = a->j + a->i[i] ;
960: v = a->a + a->i[i] ;
961: n = a->i[i+1] - a->i[i];
962: alpha1 = x[6*i];
963: alpha2 = x[6*i+1];
964: alpha3 = x[6*i+2];
965: alpha4 = x[6*i+3];
966: alpha5 = x[6*i+4];
967: alpha6 = x[6*i+5];
968: while (n-->0) {
969: y[6*(*idx)] += alpha1*(*v);
970: y[6*(*idx)+1] += alpha2*(*v);
971: y[6*(*idx)+2] += alpha3*(*v);
972: y[6*(*idx)+3] += alpha4*(*v);
973: y[6*(*idx)+4] += alpha5*(*v);
974: y[6*(*idx)+5] += alpha6*(*v);
975: idx++; v++;
976: }
977: }
978: PetscLogFlops(12*a->nz);
979: VecRestoreArray(xx,&x);
980: VecRestoreArray(zz,&y);
981: return(0);
982: }
984: /* ------------------------------------------------------------------------------*/
987: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
988: {
989: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
990: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
991: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
993: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
994: PetscInt n,i,jrow,j;
997: VecGetArray(xx,&x);
998: VecGetArray(yy,&y);
999: idx = a->j;
1000: v = a->a;
1001: ii = a->i;
1003: for (i=0; i<m; i++) {
1004: jrow = ii[i];
1005: n = ii[i+1] - jrow;
1006: sum1 = 0.0;
1007: sum2 = 0.0;
1008: sum3 = 0.0;
1009: sum4 = 0.0;
1010: sum5 = 0.0;
1011: sum6 = 0.0;
1012: sum7 = 0.0;
1013: for (j=0; j<n; j++) {
1014: sum1 += v[jrow]*x[7*idx[jrow]];
1015: sum2 += v[jrow]*x[7*idx[jrow]+1];
1016: sum3 += v[jrow]*x[7*idx[jrow]+2];
1017: sum4 += v[jrow]*x[7*idx[jrow]+3];
1018: sum5 += v[jrow]*x[7*idx[jrow]+4];
1019: sum6 += v[jrow]*x[7*idx[jrow]+5];
1020: sum7 += v[jrow]*x[7*idx[jrow]+6];
1021: jrow++;
1022: }
1023: y[7*i] = sum1;
1024: y[7*i+1] = sum2;
1025: y[7*i+2] = sum3;
1026: y[7*i+3] = sum4;
1027: y[7*i+4] = sum5;
1028: y[7*i+5] = sum6;
1029: y[7*i+6] = sum7;
1030: }
1032: PetscLogFlops(14*a->nz - 7*m);
1033: VecRestoreArray(xx,&x);
1034: VecRestoreArray(yy,&y);
1035: return(0);
1036: }
1040: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1041: {
1042: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1043: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1044: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1046: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1049: VecSet(yy,zero);
1050: VecGetArray(xx,&x);
1051: VecGetArray(yy,&y);
1053: for (i=0; i<m; i++) {
1054: idx = a->j + a->i[i] ;
1055: v = a->a + a->i[i] ;
1056: n = a->i[i+1] - a->i[i];
1057: alpha1 = x[7*i];
1058: alpha2 = x[7*i+1];
1059: alpha3 = x[7*i+2];
1060: alpha4 = x[7*i+3];
1061: alpha5 = x[7*i+4];
1062: alpha6 = x[7*i+5];
1063: alpha7 = x[7*i+6];
1064: while (n-->0) {
1065: y[7*(*idx)] += alpha1*(*v);
1066: y[7*(*idx)+1] += alpha2*(*v);
1067: y[7*(*idx)+2] += alpha3*(*v);
1068: y[7*(*idx)+3] += alpha4*(*v);
1069: y[7*(*idx)+4] += alpha5*(*v);
1070: y[7*(*idx)+5] += alpha6*(*v);
1071: y[7*(*idx)+6] += alpha7*(*v);
1072: idx++; v++;
1073: }
1074: }
1075: PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);
1076: VecRestoreArray(xx,&x);
1077: VecRestoreArray(yy,&y);
1078: return(0);
1079: }
1083: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1084: {
1085: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1086: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1087: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1089: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1090: PetscInt n,i,jrow,j;
1093: if (yy != zz) {VecCopy(yy,zz);}
1094: VecGetArray(xx,&x);
1095: VecGetArray(zz,&y);
1096: idx = a->j;
1097: v = a->a;
1098: ii = a->i;
1100: for (i=0; i<m; i++) {
1101: jrow = ii[i];
1102: n = ii[i+1] - jrow;
1103: sum1 = 0.0;
1104: sum2 = 0.0;
1105: sum3 = 0.0;
1106: sum4 = 0.0;
1107: sum5 = 0.0;
1108: sum6 = 0.0;
1109: sum7 = 0.0;
1110: for (j=0; j<n; j++) {
1111: sum1 += v[jrow]*x[7*idx[jrow]];
1112: sum2 += v[jrow]*x[7*idx[jrow]+1];
1113: sum3 += v[jrow]*x[7*idx[jrow]+2];
1114: sum4 += v[jrow]*x[7*idx[jrow]+3];
1115: sum5 += v[jrow]*x[7*idx[jrow]+4];
1116: sum6 += v[jrow]*x[7*idx[jrow]+5];
1117: sum7 += v[jrow]*x[7*idx[jrow]+6];
1118: jrow++;
1119: }
1120: y[7*i] += sum1;
1121: y[7*i+1] += sum2;
1122: y[7*i+2] += sum3;
1123: y[7*i+3] += sum4;
1124: y[7*i+4] += sum5;
1125: y[7*i+5] += sum6;
1126: y[7*i+6] += sum7;
1127: }
1129: PetscLogFlops(14*a->nz);
1130: VecRestoreArray(xx,&x);
1131: VecRestoreArray(zz,&y);
1132: return(0);
1133: }
1137: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1138: {
1139: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1140: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1141: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1143: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1146: if (yy != zz) {VecCopy(yy,zz);}
1147: VecGetArray(xx,&x);
1148: VecGetArray(zz,&y);
1149: for (i=0; i<m; i++) {
1150: idx = a->j + a->i[i] ;
1151: v = a->a + a->i[i] ;
1152: n = a->i[i+1] - a->i[i];
1153: alpha1 = x[7*i];
1154: alpha2 = x[7*i+1];
1155: alpha3 = x[7*i+2];
1156: alpha4 = x[7*i+3];
1157: alpha5 = x[7*i+4];
1158: alpha6 = x[7*i+5];
1159: alpha7 = x[7*i+6];
1160: while (n-->0) {
1161: y[7*(*idx)] += alpha1*(*v);
1162: y[7*(*idx)+1] += alpha2*(*v);
1163: y[7*(*idx)+2] += alpha3*(*v);
1164: y[7*(*idx)+3] += alpha4*(*v);
1165: y[7*(*idx)+4] += alpha5*(*v);
1166: y[7*(*idx)+5] += alpha6*(*v);
1167: y[7*(*idx)+6] += alpha7*(*v);
1168: idx++; v++;
1169: }
1170: }
1171: PetscLogFlops(14*a->nz);
1172: VecRestoreArray(xx,&x);
1173: VecRestoreArray(zz,&y);
1174: return(0);
1175: }
1179: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1180: {
1181: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1182: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1183: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1185: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1186: PetscInt n,i,jrow,j;
1189: VecGetArray(xx,&x);
1190: VecGetArray(yy,&y);
1191: idx = a->j;
1192: v = a->a;
1193: ii = a->i;
1195: for (i=0; i<m; i++) {
1196: jrow = ii[i];
1197: n = ii[i+1] - jrow;
1198: sum1 = 0.0;
1199: sum2 = 0.0;
1200: sum3 = 0.0;
1201: sum4 = 0.0;
1202: sum5 = 0.0;
1203: sum6 = 0.0;
1204: sum7 = 0.0;
1205: sum8 = 0.0;
1206: for (j=0; j<n; j++) {
1207: sum1 += v[jrow]*x[8*idx[jrow]];
1208: sum2 += v[jrow]*x[8*idx[jrow]+1];
1209: sum3 += v[jrow]*x[8*idx[jrow]+2];
1210: sum4 += v[jrow]*x[8*idx[jrow]+3];
1211: sum5 += v[jrow]*x[8*idx[jrow]+4];
1212: sum6 += v[jrow]*x[8*idx[jrow]+5];
1213: sum7 += v[jrow]*x[8*idx[jrow]+6];
1214: sum8 += v[jrow]*x[8*idx[jrow]+7];
1215: jrow++;
1216: }
1217: y[8*i] = sum1;
1218: y[8*i+1] = sum2;
1219: y[8*i+2] = sum3;
1220: y[8*i+3] = sum4;
1221: y[8*i+4] = sum5;
1222: y[8*i+5] = sum6;
1223: y[8*i+6] = sum7;
1224: y[8*i+7] = sum8;
1225: }
1227: PetscLogFlops(16*a->nz - 8*m);
1228: VecRestoreArray(xx,&x);
1229: VecRestoreArray(yy,&y);
1230: return(0);
1231: }
1235: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1236: {
1237: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1238: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1239: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1241: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1244: VecSet(yy,zero);
1245: VecGetArray(xx,&x);
1246: VecGetArray(yy,&y);
1248: for (i=0; i<m; i++) {
1249: idx = a->j + a->i[i] ;
1250: v = a->a + a->i[i] ;
1251: n = a->i[i+1] - a->i[i];
1252: alpha1 = x[8*i];
1253: alpha2 = x[8*i+1];
1254: alpha3 = x[8*i+2];
1255: alpha4 = x[8*i+3];
1256: alpha5 = x[8*i+4];
1257: alpha6 = x[8*i+5];
1258: alpha7 = x[8*i+6];
1259: alpha8 = x[8*i+7];
1260: while (n-->0) {
1261: y[8*(*idx)] += alpha1*(*v);
1262: y[8*(*idx)+1] += alpha2*(*v);
1263: y[8*(*idx)+2] += alpha3*(*v);
1264: y[8*(*idx)+3] += alpha4*(*v);
1265: y[8*(*idx)+4] += alpha5*(*v);
1266: y[8*(*idx)+5] += alpha6*(*v);
1267: y[8*(*idx)+6] += alpha7*(*v);
1268: y[8*(*idx)+7] += alpha8*(*v);
1269: idx++; v++;
1270: }
1271: }
1272: PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);
1273: VecRestoreArray(xx,&x);
1274: VecRestoreArray(yy,&y);
1275: return(0);
1276: }
1280: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1281: {
1282: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1283: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1284: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1286: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1287: PetscInt n,i,jrow,j;
1290: if (yy != zz) {VecCopy(yy,zz);}
1291: VecGetArray(xx,&x);
1292: VecGetArray(zz,&y);
1293: idx = a->j;
1294: v = a->a;
1295: ii = a->i;
1297: for (i=0; i<m; i++) {
1298: jrow = ii[i];
1299: n = ii[i+1] - jrow;
1300: sum1 = 0.0;
1301: sum2 = 0.0;
1302: sum3 = 0.0;
1303: sum4 = 0.0;
1304: sum5 = 0.0;
1305: sum6 = 0.0;
1306: sum7 = 0.0;
1307: sum8 = 0.0;
1308: for (j=0; j<n; j++) {
1309: sum1 += v[jrow]*x[8*idx[jrow]];
1310: sum2 += v[jrow]*x[8*idx[jrow]+1];
1311: sum3 += v[jrow]*x[8*idx[jrow]+2];
1312: sum4 += v[jrow]*x[8*idx[jrow]+3];
1313: sum5 += v[jrow]*x[8*idx[jrow]+4];
1314: sum6 += v[jrow]*x[8*idx[jrow]+5];
1315: sum7 += v[jrow]*x[8*idx[jrow]+6];
1316: sum8 += v[jrow]*x[8*idx[jrow]+7];
1317: jrow++;
1318: }
1319: y[8*i] += sum1;
1320: y[8*i+1] += sum2;
1321: y[8*i+2] += sum3;
1322: y[8*i+3] += sum4;
1323: y[8*i+4] += sum5;
1324: y[8*i+5] += sum6;
1325: y[8*i+6] += sum7;
1326: y[8*i+7] += sum8;
1327: }
1329: PetscLogFlops(16*a->nz);
1330: VecRestoreArray(xx,&x);
1331: VecRestoreArray(zz,&y);
1332: return(0);
1333: }
1337: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1338: {
1339: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1340: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1341: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1343: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1346: if (yy != zz) {VecCopy(yy,zz);}
1347: VecGetArray(xx,&x);
1348: VecGetArray(zz,&y);
1349: for (i=0; i<m; i++) {
1350: idx = a->j + a->i[i] ;
1351: v = a->a + a->i[i] ;
1352: n = a->i[i+1] - a->i[i];
1353: alpha1 = x[8*i];
1354: alpha2 = x[8*i+1];
1355: alpha3 = x[8*i+2];
1356: alpha4 = x[8*i+3];
1357: alpha5 = x[8*i+4];
1358: alpha6 = x[8*i+5];
1359: alpha7 = x[8*i+6];
1360: alpha8 = x[8*i+7];
1361: while (n-->0) {
1362: y[8*(*idx)] += alpha1*(*v);
1363: y[8*(*idx)+1] += alpha2*(*v);
1364: y[8*(*idx)+2] += alpha3*(*v);
1365: y[8*(*idx)+3] += alpha4*(*v);
1366: y[8*(*idx)+4] += alpha5*(*v);
1367: y[8*(*idx)+5] += alpha6*(*v);
1368: y[8*(*idx)+6] += alpha7*(*v);
1369: y[8*(*idx)+7] += alpha8*(*v);
1370: idx++; v++;
1371: }
1372: }
1373: PetscLogFlops(16*a->nz);
1374: VecRestoreArray(xx,&x);
1375: VecRestoreArray(zz,&y);
1376: return(0);
1377: }
1379: /* ------------------------------------------------------------------------------*/
1382: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1383: {
1384: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1385: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1386: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1388: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1389: PetscInt n,i,jrow,j;
1392: VecGetArray(xx,&x);
1393: VecGetArray(yy,&y);
1394: idx = a->j;
1395: v = a->a;
1396: ii = a->i;
1398: for (i=0; i<m; i++) {
1399: jrow = ii[i];
1400: n = ii[i+1] - jrow;
1401: sum1 = 0.0;
1402: sum2 = 0.0;
1403: sum3 = 0.0;
1404: sum4 = 0.0;
1405: sum5 = 0.0;
1406: sum6 = 0.0;
1407: sum7 = 0.0;
1408: sum8 = 0.0;
1409: sum9 = 0.0;
1410: for (j=0; j<n; j++) {
1411: sum1 += v[jrow]*x[9*idx[jrow]];
1412: sum2 += v[jrow]*x[9*idx[jrow]+1];
1413: sum3 += v[jrow]*x[9*idx[jrow]+2];
1414: sum4 += v[jrow]*x[9*idx[jrow]+3];
1415: sum5 += v[jrow]*x[9*idx[jrow]+4];
1416: sum6 += v[jrow]*x[9*idx[jrow]+5];
1417: sum7 += v[jrow]*x[9*idx[jrow]+6];
1418: sum8 += v[jrow]*x[9*idx[jrow]+7];
1419: sum9 += v[jrow]*x[9*idx[jrow]+8];
1420: jrow++;
1421: }
1422: y[9*i] = sum1;
1423: y[9*i+1] = sum2;
1424: y[9*i+2] = sum3;
1425: y[9*i+3] = sum4;
1426: y[9*i+4] = sum5;
1427: y[9*i+5] = sum6;
1428: y[9*i+6] = sum7;
1429: y[9*i+7] = sum8;
1430: y[9*i+8] = sum9;
1431: }
1433: PetscLogFlops(18*a->nz - 9*m);
1434: VecRestoreArray(xx,&x);
1435: VecRestoreArray(yy,&y);
1436: return(0);
1437: }
1439: /* ------------------------------------------------------------------------------*/
1443: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1444: {
1445: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1446: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1447: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1449: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1452: VecSet(yy,zero);
1453: VecGetArray(xx,&x);
1454: VecGetArray(yy,&y);
1456: for (i=0; i<m; i++) {
1457: idx = a->j + a->i[i] ;
1458: v = a->a + a->i[i] ;
1459: n = a->i[i+1] - a->i[i];
1460: alpha1 = x[9*i];
1461: alpha2 = x[9*i+1];
1462: alpha3 = x[9*i+2];
1463: alpha4 = x[9*i+3];
1464: alpha5 = x[9*i+4];
1465: alpha6 = x[9*i+5];
1466: alpha7 = x[9*i+6];
1467: alpha8 = x[9*i+7];
1468: alpha9 = x[9*i+8];
1469: while (n-->0) {
1470: y[9*(*idx)] += alpha1*(*v);
1471: y[9*(*idx)+1] += alpha2*(*v);
1472: y[9*(*idx)+2] += alpha3*(*v);
1473: y[9*(*idx)+3] += alpha4*(*v);
1474: y[9*(*idx)+4] += alpha5*(*v);
1475: y[9*(*idx)+5] += alpha6*(*v);
1476: y[9*(*idx)+6] += alpha7*(*v);
1477: y[9*(*idx)+7] += alpha8*(*v);
1478: y[9*(*idx)+8] += alpha9*(*v);
1479: idx++; v++;
1480: }
1481: }
1482: PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);
1483: VecRestoreArray(xx,&x);
1484: VecRestoreArray(yy,&y);
1485: return(0);
1486: }
1490: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1491: {
1492: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1493: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1494: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1496: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1497: PetscInt n,i,jrow,j;
1500: if (yy != zz) {VecCopy(yy,zz);}
1501: VecGetArray(xx,&x);
1502: VecGetArray(zz,&y);
1503: idx = a->j;
1504: v = a->a;
1505: ii = a->i;
1507: for (i=0; i<m; i++) {
1508: jrow = ii[i];
1509: n = ii[i+1] - jrow;
1510: sum1 = 0.0;
1511: sum2 = 0.0;
1512: sum3 = 0.0;
1513: sum4 = 0.0;
1514: sum5 = 0.0;
1515: sum6 = 0.0;
1516: sum7 = 0.0;
1517: sum8 = 0.0;
1518: sum9 = 0.0;
1519: for (j=0; j<n; j++) {
1520: sum1 += v[jrow]*x[9*idx[jrow]];
1521: sum2 += v[jrow]*x[9*idx[jrow]+1];
1522: sum3 += v[jrow]*x[9*idx[jrow]+2];
1523: sum4 += v[jrow]*x[9*idx[jrow]+3];
1524: sum5 += v[jrow]*x[9*idx[jrow]+4];
1525: sum6 += v[jrow]*x[9*idx[jrow]+5];
1526: sum7 += v[jrow]*x[9*idx[jrow]+6];
1527: sum8 += v[jrow]*x[9*idx[jrow]+7];
1528: sum9 += v[jrow]*x[9*idx[jrow]+8];
1529: jrow++;
1530: }
1531: y[9*i] += sum1;
1532: y[9*i+1] += sum2;
1533: y[9*i+2] += sum3;
1534: y[9*i+3] += sum4;
1535: y[9*i+4] += sum5;
1536: y[9*i+5] += sum6;
1537: y[9*i+6] += sum7;
1538: y[9*i+7] += sum8;
1539: y[9*i+8] += sum9;
1540: }
1542: PetscLogFlops(18*a->nz);
1543: VecRestoreArray(xx,&x);
1544: VecRestoreArray(zz,&y);
1545: return(0);
1546: }
1550: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1551: {
1552: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1553: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1554: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1556: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1559: if (yy != zz) {VecCopy(yy,zz);}
1560: VecGetArray(xx,&x);
1561: VecGetArray(zz,&y);
1562: for (i=0; i<m; i++) {
1563: idx = a->j + a->i[i] ;
1564: v = a->a + a->i[i] ;
1565: n = a->i[i+1] - a->i[i];
1566: alpha1 = x[9*i];
1567: alpha2 = x[9*i+1];
1568: alpha3 = x[9*i+2];
1569: alpha4 = x[9*i+3];
1570: alpha5 = x[9*i+4];
1571: alpha6 = x[9*i+5];
1572: alpha7 = x[9*i+6];
1573: alpha8 = x[9*i+7];
1574: alpha9 = x[9*i+8];
1575: while (n-->0) {
1576: y[9*(*idx)] += alpha1*(*v);
1577: y[9*(*idx)+1] += alpha2*(*v);
1578: y[9*(*idx)+2] += alpha3*(*v);
1579: y[9*(*idx)+3] += alpha4*(*v);
1580: y[9*(*idx)+4] += alpha5*(*v);
1581: y[9*(*idx)+5] += alpha6*(*v);
1582: y[9*(*idx)+6] += alpha7*(*v);
1583: y[9*(*idx)+7] += alpha8*(*v);
1584: y[9*(*idx)+8] += alpha9*(*v);
1585: idx++; v++;
1586: }
1587: }
1588: PetscLogFlops(18*a->nz);
1589: VecRestoreArray(xx,&x);
1590: VecRestoreArray(zz,&y);
1591: return(0);
1592: }
1593: /*--------------------------------------------------------------------------------------------*/
1596: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1597: {
1598: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1599: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1600: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1602: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1603: PetscInt n,i,jrow,j;
1606: VecGetArray(xx,&x);
1607: VecGetArray(yy,&y);
1608: idx = a->j;
1609: v = a->a;
1610: ii = a->i;
1612: for (i=0; i<m; i++) {
1613: jrow = ii[i];
1614: n = ii[i+1] - jrow;
1615: sum1 = 0.0;
1616: sum2 = 0.0;
1617: sum3 = 0.0;
1618: sum4 = 0.0;
1619: sum5 = 0.0;
1620: sum6 = 0.0;
1621: sum7 = 0.0;
1622: sum8 = 0.0;
1623: sum9 = 0.0;
1624: sum10 = 0.0;
1625: for (j=0; j<n; j++) {
1626: sum1 += v[jrow]*x[10*idx[jrow]];
1627: sum2 += v[jrow]*x[10*idx[jrow]+1];
1628: sum3 += v[jrow]*x[10*idx[jrow]+2];
1629: sum4 += v[jrow]*x[10*idx[jrow]+3];
1630: sum5 += v[jrow]*x[10*idx[jrow]+4];
1631: sum6 += v[jrow]*x[10*idx[jrow]+5];
1632: sum7 += v[jrow]*x[10*idx[jrow]+6];
1633: sum8 += v[jrow]*x[10*idx[jrow]+7];
1634: sum9 += v[jrow]*x[10*idx[jrow]+8];
1635: sum10 += v[jrow]*x[10*idx[jrow]+9];
1636: jrow++;
1637: }
1638: y[10*i] = sum1;
1639: y[10*i+1] = sum2;
1640: y[10*i+2] = sum3;
1641: y[10*i+3] = sum4;
1642: y[10*i+4] = sum5;
1643: y[10*i+5] = sum6;
1644: y[10*i+6] = sum7;
1645: y[10*i+7] = sum8;
1646: y[10*i+8] = sum9;
1647: y[10*i+9] = sum10;
1648: }
1650: PetscLogFlops(20*a->nz - 10*m);
1651: VecRestoreArray(xx,&x);
1652: VecRestoreArray(yy,&y);
1653: return(0);
1654: }
1658: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1659: {
1660: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1661: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1662: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1664: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1665: PetscInt n,i,jrow,j;
1668: if (yy != zz) {VecCopy(yy,zz);}
1669: VecGetArray(xx,&x);
1670: VecGetArray(zz,&y);
1671: idx = a->j;
1672: v = a->a;
1673: ii = a->i;
1675: for (i=0; i<m; i++) {
1676: jrow = ii[i];
1677: n = ii[i+1] - jrow;
1678: sum1 = 0.0;
1679: sum2 = 0.0;
1680: sum3 = 0.0;
1681: sum4 = 0.0;
1682: sum5 = 0.0;
1683: sum6 = 0.0;
1684: sum7 = 0.0;
1685: sum8 = 0.0;
1686: sum9 = 0.0;
1687: sum10 = 0.0;
1688: for (j=0; j<n; j++) {
1689: sum1 += v[jrow]*x[10*idx[jrow]];
1690: sum2 += v[jrow]*x[10*idx[jrow]+1];
1691: sum3 += v[jrow]*x[10*idx[jrow]+2];
1692: sum4 += v[jrow]*x[10*idx[jrow]+3];
1693: sum5 += v[jrow]*x[10*idx[jrow]+4];
1694: sum6 += v[jrow]*x[10*idx[jrow]+5];
1695: sum7 += v[jrow]*x[10*idx[jrow]+6];
1696: sum8 += v[jrow]*x[10*idx[jrow]+7];
1697: sum9 += v[jrow]*x[10*idx[jrow]+8];
1698: sum10 += v[jrow]*x[10*idx[jrow]+9];
1699: jrow++;
1700: }
1701: y[10*i] += sum1;
1702: y[10*i+1] += sum2;
1703: y[10*i+2] += sum3;
1704: y[10*i+3] += sum4;
1705: y[10*i+4] += sum5;
1706: y[10*i+5] += sum6;
1707: y[10*i+6] += sum7;
1708: y[10*i+7] += sum8;
1709: y[10*i+8] += sum9;
1710: y[10*i+9] += sum10;
1711: }
1713: PetscLogFlops(20*a->nz - 10*m);
1714: VecRestoreArray(xx,&x);
1715: VecRestoreArray(yy,&y);
1716: return(0);
1717: }
1721: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1722: {
1723: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1724: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1725: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1727: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1730: VecSet(yy,zero);
1731: VecGetArray(xx,&x);
1732: VecGetArray(yy,&y);
1734: for (i=0; i<m; i++) {
1735: idx = a->j + a->i[i] ;
1736: v = a->a + a->i[i] ;
1737: n = a->i[i+1] - a->i[i];
1738: alpha1 = x[10*i];
1739: alpha2 = x[10*i+1];
1740: alpha3 = x[10*i+2];
1741: alpha4 = x[10*i+3];
1742: alpha5 = x[10*i+4];
1743: alpha6 = x[10*i+5];
1744: alpha7 = x[10*i+6];
1745: alpha8 = x[10*i+7];
1746: alpha9 = x[10*i+8];
1747: alpha10 = x[10*i+9];
1748: while (n-->0) {
1749: y[10*(*idx)] += alpha1*(*v);
1750: y[10*(*idx)+1] += alpha2*(*v);
1751: y[10*(*idx)+2] += alpha3*(*v);
1752: y[10*(*idx)+3] += alpha4*(*v);
1753: y[10*(*idx)+4] += alpha5*(*v);
1754: y[10*(*idx)+5] += alpha6*(*v);
1755: y[10*(*idx)+6] += alpha7*(*v);
1756: y[10*(*idx)+7] += alpha8*(*v);
1757: y[10*(*idx)+8] += alpha9*(*v);
1758: y[10*(*idx)+9] += alpha10*(*v);
1759: idx++; v++;
1760: }
1761: }
1762: PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);
1763: VecRestoreArray(xx,&x);
1764: VecRestoreArray(yy,&y);
1765: return(0);
1766: }
1770: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1771: {
1772: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1773: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1774: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1776: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1779: if (yy != zz) {VecCopy(yy,zz);}
1780: VecGetArray(xx,&x);
1781: VecGetArray(zz,&y);
1782: for (i=0; i<m; i++) {
1783: idx = a->j + a->i[i] ;
1784: v = a->a + a->i[i] ;
1785: n = a->i[i+1] - a->i[i];
1786: alpha1 = x[10*i];
1787: alpha2 = x[10*i+1];
1788: alpha3 = x[10*i+2];
1789: alpha4 = x[10*i+3];
1790: alpha5 = x[10*i+4];
1791: alpha6 = x[10*i+5];
1792: alpha7 = x[10*i+6];
1793: alpha8 = x[10*i+7];
1794: alpha9 = x[10*i+8];
1795: alpha10 = x[10*i+9];
1796: while (n-->0) {
1797: y[10*(*idx)] += alpha1*(*v);
1798: y[10*(*idx)+1] += alpha2*(*v);
1799: y[10*(*idx)+2] += alpha3*(*v);
1800: y[10*(*idx)+3] += alpha4*(*v);
1801: y[10*(*idx)+4] += alpha5*(*v);
1802: y[10*(*idx)+5] += alpha6*(*v);
1803: y[10*(*idx)+6] += alpha7*(*v);
1804: y[10*(*idx)+7] += alpha8*(*v);
1805: y[10*(*idx)+8] += alpha9*(*v);
1806: y[10*(*idx)+9] += alpha10*(*v);
1807: idx++; v++;
1808: }
1809: }
1810: PetscLogFlops(20*a->nz);
1811: VecRestoreArray(xx,&x);
1812: VecRestoreArray(zz,&y);
1813: return(0);
1814: }
1817: /*--------------------------------------------------------------------------------------------*/
1820: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1821: {
1822: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1823: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1824: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1825: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1827: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1828: PetscInt n,i,jrow,j;
1831: VecGetArray(xx,&x);
1832: VecGetArray(yy,&y);
1833: idx = a->j;
1834: v = a->a;
1835: ii = a->i;
1837: for (i=0; i<m; i++) {
1838: jrow = ii[i];
1839: n = ii[i+1] - jrow;
1840: sum1 = 0.0;
1841: sum2 = 0.0;
1842: sum3 = 0.0;
1843: sum4 = 0.0;
1844: sum5 = 0.0;
1845: sum6 = 0.0;
1846: sum7 = 0.0;
1847: sum8 = 0.0;
1848: sum9 = 0.0;
1849: sum10 = 0.0;
1850: sum11 = 0.0;
1851: sum12 = 0.0;
1852: sum13 = 0.0;
1853: sum14 = 0.0;
1854: sum15 = 0.0;
1855: sum16 = 0.0;
1856: for (j=0; j<n; j++) {
1857: sum1 += v[jrow]*x[16*idx[jrow]];
1858: sum2 += v[jrow]*x[16*idx[jrow]+1];
1859: sum3 += v[jrow]*x[16*idx[jrow]+2];
1860: sum4 += v[jrow]*x[16*idx[jrow]+3];
1861: sum5 += v[jrow]*x[16*idx[jrow]+4];
1862: sum6 += v[jrow]*x[16*idx[jrow]+5];
1863: sum7 += v[jrow]*x[16*idx[jrow]+6];
1864: sum8 += v[jrow]*x[16*idx[jrow]+7];
1865: sum9 += v[jrow]*x[16*idx[jrow]+8];
1866: sum10 += v[jrow]*x[16*idx[jrow]+9];
1867: sum11 += v[jrow]*x[16*idx[jrow]+10];
1868: sum12 += v[jrow]*x[16*idx[jrow]+11];
1869: sum13 += v[jrow]*x[16*idx[jrow]+12];
1870: sum14 += v[jrow]*x[16*idx[jrow]+13];
1871: sum15 += v[jrow]*x[16*idx[jrow]+14];
1872: sum16 += v[jrow]*x[16*idx[jrow]+15];
1873: jrow++;
1874: }
1875: y[16*i] = sum1;
1876: y[16*i+1] = sum2;
1877: y[16*i+2] = sum3;
1878: y[16*i+3] = sum4;
1879: y[16*i+4] = sum5;
1880: y[16*i+5] = sum6;
1881: y[16*i+6] = sum7;
1882: y[16*i+7] = sum8;
1883: y[16*i+8] = sum9;
1884: y[16*i+9] = sum10;
1885: y[16*i+10] = sum11;
1886: y[16*i+11] = sum12;
1887: y[16*i+12] = sum13;
1888: y[16*i+13] = sum14;
1889: y[16*i+14] = sum15;
1890: y[16*i+15] = sum16;
1891: }
1893: PetscLogFlops(32*a->nz - 16*m);
1894: VecRestoreArray(xx,&x);
1895: VecRestoreArray(yy,&y);
1896: return(0);
1897: }
1901: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1902: {
1903: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1904: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1905: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1906: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1908: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1911: VecSet(yy,zero);
1912: VecGetArray(xx,&x);
1913: VecGetArray(yy,&y);
1915: for (i=0; i<m; i++) {
1916: idx = a->j + a->i[i] ;
1917: v = a->a + a->i[i] ;
1918: n = a->i[i+1] - a->i[i];
1919: alpha1 = x[16*i];
1920: alpha2 = x[16*i+1];
1921: alpha3 = x[16*i+2];
1922: alpha4 = x[16*i+3];
1923: alpha5 = x[16*i+4];
1924: alpha6 = x[16*i+5];
1925: alpha7 = x[16*i+6];
1926: alpha8 = x[16*i+7];
1927: alpha9 = x[16*i+8];
1928: alpha10 = x[16*i+9];
1929: alpha11 = x[16*i+10];
1930: alpha12 = x[16*i+11];
1931: alpha13 = x[16*i+12];
1932: alpha14 = x[16*i+13];
1933: alpha15 = x[16*i+14];
1934: alpha16 = x[16*i+15];
1935: while (n-->0) {
1936: y[16*(*idx)] += alpha1*(*v);
1937: y[16*(*idx)+1] += alpha2*(*v);
1938: y[16*(*idx)+2] += alpha3*(*v);
1939: y[16*(*idx)+3] += alpha4*(*v);
1940: y[16*(*idx)+4] += alpha5*(*v);
1941: y[16*(*idx)+5] += alpha6*(*v);
1942: y[16*(*idx)+6] += alpha7*(*v);
1943: y[16*(*idx)+7] += alpha8*(*v);
1944: y[16*(*idx)+8] += alpha9*(*v);
1945: y[16*(*idx)+9] += alpha10*(*v);
1946: y[16*(*idx)+10] += alpha11*(*v);
1947: y[16*(*idx)+11] += alpha12*(*v);
1948: y[16*(*idx)+12] += alpha13*(*v);
1949: y[16*(*idx)+13] += alpha14*(*v);
1950: y[16*(*idx)+14] += alpha15*(*v);
1951: y[16*(*idx)+15] += alpha16*(*v);
1952: idx++; v++;
1953: }
1954: }
1955: PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);
1956: VecRestoreArray(xx,&x);
1957: VecRestoreArray(yy,&y);
1958: return(0);
1959: }
1963: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1964: {
1965: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1966: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1967: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1968: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1970: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1971: PetscInt n,i,jrow,j;
1974: if (yy != zz) {VecCopy(yy,zz);}
1975: VecGetArray(xx,&x);
1976: VecGetArray(zz,&y);
1977: idx = a->j;
1978: v = a->a;
1979: ii = a->i;
1981: for (i=0; i<m; i++) {
1982: jrow = ii[i];
1983: n = ii[i+1] - jrow;
1984: sum1 = 0.0;
1985: sum2 = 0.0;
1986: sum3 = 0.0;
1987: sum4 = 0.0;
1988: sum5 = 0.0;
1989: sum6 = 0.0;
1990: sum7 = 0.0;
1991: sum8 = 0.0;
1992: sum9 = 0.0;
1993: sum10 = 0.0;
1994: sum11 = 0.0;
1995: sum12 = 0.0;
1996: sum13 = 0.0;
1997: sum14 = 0.0;
1998: sum15 = 0.0;
1999: sum16 = 0.0;
2000: for (j=0; j<n; j++) {
2001: sum1 += v[jrow]*x[16*idx[jrow]];
2002: sum2 += v[jrow]*x[16*idx[jrow]+1];
2003: sum3 += v[jrow]*x[16*idx[jrow]+2];
2004: sum4 += v[jrow]*x[16*idx[jrow]+3];
2005: sum5 += v[jrow]*x[16*idx[jrow]+4];
2006: sum6 += v[jrow]*x[16*idx[jrow]+5];
2007: sum7 += v[jrow]*x[16*idx[jrow]+6];
2008: sum8 += v[jrow]*x[16*idx[jrow]+7];
2009: sum9 += v[jrow]*x[16*idx[jrow]+8];
2010: sum10 += v[jrow]*x[16*idx[jrow]+9];
2011: sum11 += v[jrow]*x[16*idx[jrow]+10];
2012: sum12 += v[jrow]*x[16*idx[jrow]+11];
2013: sum13 += v[jrow]*x[16*idx[jrow]+12];
2014: sum14 += v[jrow]*x[16*idx[jrow]+13];
2015: sum15 += v[jrow]*x[16*idx[jrow]+14];
2016: sum16 += v[jrow]*x[16*idx[jrow]+15];
2017: jrow++;
2018: }
2019: y[16*i] += sum1;
2020: y[16*i+1] += sum2;
2021: y[16*i+2] += sum3;
2022: y[16*i+3] += sum4;
2023: y[16*i+4] += sum5;
2024: y[16*i+5] += sum6;
2025: y[16*i+6] += sum7;
2026: y[16*i+7] += sum8;
2027: y[16*i+8] += sum9;
2028: y[16*i+9] += sum10;
2029: y[16*i+10] += sum11;
2030: y[16*i+11] += sum12;
2031: y[16*i+12] += sum13;
2032: y[16*i+13] += sum14;
2033: y[16*i+14] += sum15;
2034: y[16*i+15] += sum16;
2035: }
2037: PetscLogFlops(32*a->nz);
2038: VecRestoreArray(xx,&x);
2039: VecRestoreArray(zz,&y);
2040: return(0);
2041: }
2045: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2046: {
2047: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2048: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2049: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2050: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2052: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
2055: if (yy != zz) {VecCopy(yy,zz);}
2056: VecGetArray(xx,&x);
2057: VecGetArray(zz,&y);
2058: for (i=0; i<m; i++) {
2059: idx = a->j + a->i[i] ;
2060: v = a->a + a->i[i] ;
2061: n = a->i[i+1] - a->i[i];
2062: alpha1 = x[16*i];
2063: alpha2 = x[16*i+1];
2064: alpha3 = x[16*i+2];
2065: alpha4 = x[16*i+3];
2066: alpha5 = x[16*i+4];
2067: alpha6 = x[16*i+5];
2068: alpha7 = x[16*i+6];
2069: alpha8 = x[16*i+7];
2070: alpha9 = x[16*i+8];
2071: alpha10 = x[16*i+9];
2072: alpha11 = x[16*i+10];
2073: alpha12 = x[16*i+11];
2074: alpha13 = x[16*i+12];
2075: alpha14 = x[16*i+13];
2076: alpha15 = x[16*i+14];
2077: alpha16 = x[16*i+15];
2078: while (n-->0) {
2079: y[16*(*idx)] += alpha1*(*v);
2080: y[16*(*idx)+1] += alpha2*(*v);
2081: y[16*(*idx)+2] += alpha3*(*v);
2082: y[16*(*idx)+3] += alpha4*(*v);
2083: y[16*(*idx)+4] += alpha5*(*v);
2084: y[16*(*idx)+5] += alpha6*(*v);
2085: y[16*(*idx)+6] += alpha7*(*v);
2086: y[16*(*idx)+7] += alpha8*(*v);
2087: y[16*(*idx)+8] += alpha9*(*v);
2088: y[16*(*idx)+9] += alpha10*(*v);
2089: y[16*(*idx)+10] += alpha11*(*v);
2090: y[16*(*idx)+11] += alpha12*(*v);
2091: y[16*(*idx)+12] += alpha13*(*v);
2092: y[16*(*idx)+13] += alpha14*(*v);
2093: y[16*(*idx)+14] += alpha15*(*v);
2094: y[16*(*idx)+15] += alpha16*(*v);
2095: idx++; v++;
2096: }
2097: }
2098: PetscLogFlops(32*a->nz);
2099: VecRestoreArray(xx,&x);
2100: VecRestoreArray(zz,&y);
2101: return(0);
2102: }
2104: /*===================================================================================*/
2107: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2108: {
2109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2113: /* start the scatter */
2114: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2115: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2116: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2117: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2118: return(0);
2119: }
2123: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2124: {
2125: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2129: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2130: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2131: VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2132: VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2133: return(0);
2134: }
2138: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2139: {
2140: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2144: /* start the scatter */
2145: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2146: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2147: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2148: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2149: return(0);
2150: }
2154: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2155: {
2156: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2160: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2161: VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2162: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2163: VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2164: return(0);
2165: }
2169: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2170: {
2171: /* This routine requires testing -- but it's getting better. */
2172: PetscErrorCode ierr;
2173: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2174: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2175: Mat P=pp->AIJ;
2176: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2177: PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2178: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2179: PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2180: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2181: MatScalar *ca;
2184: /* Start timer */
2187: /* Get ij structure of P^T */
2188: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2190: cn = pn*ppdof;
2191: /* Allocate ci array, arrays for fill computation and */
2192: /* free space for accumulating nonzero column info */
2193: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2194: ci[0] = 0;
2196: /* Work arrays for rows of P^T*A */
2197: PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2198: PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2199: ptasparserow = ptadenserow + an;
2200: denserow = ptasparserow + an;
2201: sparserow = denserow + cn;
2203: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2204: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2205: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2206: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2207: current_space = free_space;
2209: /* Determine symbolic info for each row of C: */
2210: for (i=0;i<pn;i++) {
2211: ptnzi = pti[i+1] - pti[i];
2212: ptJ = ptj + pti[i];
2213: for (dof=0;dof<ppdof;dof++) {
2214: ptanzi = 0;
2215: /* Determine symbolic row of PtA: */
2216: for (j=0;j<ptnzi;j++) {
2217: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2218: arow = ptJ[j]*ppdof + dof;
2219: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2220: anzj = ai[arow+1] - ai[arow];
2221: ajj = aj + ai[arow];
2222: for (k=0;k<anzj;k++) {
2223: if (!ptadenserow[ajj[k]]) {
2224: ptadenserow[ajj[k]] = -1;
2225: ptasparserow[ptanzi++] = ajj[k];
2226: }
2227: }
2228: }
2229: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2230: ptaj = ptasparserow;
2231: cnzi = 0;
2232: for (j=0;j<ptanzi;j++) {
2233: /* Get offset within block of P */
2234: pshift = *ptaj%ppdof;
2235: /* Get block row of P */
2236: prow = (*ptaj++)/ppdof; /* integer division */
2237: /* P has same number of nonzeros per row as the compressed form */
2238: pnzj = pi[prow+1] - pi[prow];
2239: pjj = pj + pi[prow];
2240: for (k=0;k<pnzj;k++) {
2241: /* Locations in C are shifted by the offset within the block */
2242: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2243: if (!denserow[pjj[k]*ppdof+pshift]) {
2244: denserow[pjj[k]*ppdof+pshift] = -1;
2245: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2246: }
2247: }
2248: }
2250: /* sort sparserow */
2251: PetscSortInt(cnzi,sparserow);
2252:
2253: /* If free space is not available, make more free space */
2254: /* Double the amount of total space in the list */
2255: if (current_space->local_remaining<cnzi) {
2256: PetscFreeSpaceGet(current_space->total_array_size,¤t_space);
2257: }
2259: /* Copy data into free space, and zero out denserows */
2260: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2261: current_space->array += cnzi;
2262: current_space->local_used += cnzi;
2263: current_space->local_remaining -= cnzi;
2265: for (j=0;j<ptanzi;j++) {
2266: ptadenserow[ptasparserow[j]] = 0;
2267: }
2268: for (j=0;j<cnzi;j++) {
2269: denserow[sparserow[j]] = 0;
2270: }
2271: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2272: /* For now, we will recompute what is needed. */
2273: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2274: }
2275: }
2276: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2277: /* Allocate space for cj, initialize cj, and */
2278: /* destroy list of free space and other temporary array(s) */
2279: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2280: PetscFreeSpaceContiguous(&free_space,cj);
2281: PetscFree(ptadenserow);
2282:
2283: /* Allocate space for ca */
2284: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2285: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2286:
2287: /* put together the new matrix */
2288: MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);
2290: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2291: /* Since these are PETSc arrays, change flags to free them as necessary. */
2292: c = (Mat_SeqAIJ *)((*C)->data);
2293: c->free_a = PETSC_TRUE;
2294: c->free_ij = PETSC_TRUE;
2295: c->nonew = 0;
2297: /* Clean up. */
2298: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2301: return(0);
2302: }
2306: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2307: {
2308: /* This routine requires testing -- first draft only */
2310: PetscInt flops=0;
2311: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2312: Mat P=pp->AIJ;
2313: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
2314: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
2315: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
2316: PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2317: PetscInt *ci=c->i,*cj=c->j,*cjj;
2318: PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2319: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2320: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2323: /* Allocate temporary array for storage of one row of A*P */
2324: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2325: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));
2327: apj = (PetscInt *)(apa + cn);
2328: apjdense = apj + cn;
2330: /* Clear old values in C */
2331: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
2333: for (i=0;i<am;i++) {
2334: /* Form sparse row of A*P */
2335: anzi = ai[i+1] - ai[i];
2336: apnzj = 0;
2337: for (j=0;j<anzi;j++) {
2338: /* Get offset within block of P */
2339: pshift = *aj%ppdof;
2340: /* Get block row of P */
2341: prow = *aj++/ppdof; /* integer division */
2342: pnzj = pi[prow+1] - pi[prow];
2343: pjj = pj + pi[prow];
2344: paj = pa + pi[prow];
2345: for (k=0;k<pnzj;k++) {
2346: poffset = pjj[k]*ppdof+pshift;
2347: if (!apjdense[poffset]) {
2348: apjdense[poffset] = -1;
2349: apj[apnzj++] = poffset;
2350: }
2351: apa[poffset] += (*aa)*paj[k];
2352: }
2353: flops += 2*pnzj;
2354: aa++;
2355: }
2357: /* Sort the j index array for quick sparse axpy. */
2358: /* Note: a array does not need sorting as it is in dense storage locations. */
2359: PetscSortInt(apnzj,apj);
2361: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2362: prow = i/ppdof; /* integer division */
2363: pshift = i%ppdof;
2364: poffset = pi[prow];
2365: pnzi = pi[prow+1] - poffset;
2366: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2367: pJ = pj+poffset;
2368: pA = pa+poffset;
2369: for (j=0;j<pnzi;j++) {
2370: crow = (*pJ)*ppdof+pshift;
2371: cjj = cj + ci[crow];
2372: caj = ca + ci[crow];
2373: pJ++;
2374: /* Perform sparse axpy operation. Note cjj includes apj. */
2375: for (k=0,nextap=0;nextap<apnzj;k++) {
2376: if (cjj[k]==apj[nextap]) {
2377: caj[k] += (*pA)*apa[apj[nextap++]];
2378: }
2379: }
2380: flops += 2*apnzj;
2381: pA++;
2382: }
2384: /* Zero the current row info for A*P */
2385: for (j=0;j<apnzj;j++) {
2386: apa[apj[j]] = 0.;
2387: apjdense[apj[j]] = 0;
2388: }
2389: }
2391: /* Assemble the final matrix and clean up */
2392: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2393: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2394: PetscFree(apa);
2395: PetscLogFlops(flops);
2397: return(0);
2398: }
2403: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2404: {
2405: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2406: Mat a = b->AIJ,B;
2407: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
2408: PetscErrorCode ierr;
2409: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2410: PetscInt *cols;
2411: PetscScalar *vals;
2414: MatGetSize(a,&m,&n);
2415: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2416: for (i=0; i<m; i++) {
2417: nmax = PetscMax(nmax,aij->ilen[i]);
2418: for (j=0; j<dof; j++) {
2419: ilen[dof*i+j] = aij->ilen[i];
2420: }
2421: }
2422: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2423: MatSetOption(B,MAT_COLUMNS_SORTED);
2424: PetscFree(ilen);
2425: PetscMalloc(nmax*sizeof(PetscInt),&icols);
2426: ii = 0;
2427: for (i=0; i<m; i++) {
2428: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2429: for (j=0; j<dof; j++) {
2430: for (k=0; k<ncols; k++) {
2431: icols[k] = dof*cols[k]+j;
2432: }
2433: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2434: ii++;
2435: }
2436: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2437: }
2438: PetscFree(icols);
2439: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2440: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2442: if (reuse == MAT_REUSE_MATRIX) {
2443: MatHeaderReplace(A,B);
2444: } else {
2445: *newmat = B;
2446: }
2447: return(0);
2448: }
2451: #include src/mat/impls/aij/mpi/mpiaij.h
2456: PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2457: {
2458: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
2459: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2460: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2461: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2462: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2463: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2464: PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2465: PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2466: PetscInt rstart,cstart,*garray,ii,k;
2467: PetscErrorCode ierr;
2468: PetscScalar *vals,*ovals;
2471: PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2472: for (i=0; i<A->rmap.n/dof; i++) {
2473: nmax = PetscMax(nmax,AIJ->ilen[i]);
2474: onmax = PetscMax(onmax,OAIJ->ilen[i]);
2475: for (j=0; j<dof; j++) {
2476: dnz[dof*i+j] = AIJ->ilen[i];
2477: onz[dof*i+j] = OAIJ->ilen[i];
2478: }
2479: }
2480: MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2481: MatSetOption(B,MAT_COLUMNS_SORTED);
2482: PetscFree2(dnz,onz);
2484: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2485: rstart = dof*maij->A->rmap.rstart;
2486: cstart = dof*maij->A->cmap.rstart;
2487: garray = mpiaij->garray;
2489: ii = rstart;
2490: for (i=0; i<A->rmap.n/dof; i++) {
2491: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2492: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2493: for (j=0; j<dof; j++) {
2494: for (k=0; k<ncols; k++) {
2495: icols[k] = cstart + dof*cols[k]+j;
2496: }
2497: for (k=0; k<oncols; k++) {
2498: oicols[k] = dof*garray[ocols[k]]+j;
2499: }
2500: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2501: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2502: ii++;
2503: }
2504: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2505: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2506: }
2507: PetscFree2(icols,oicols);
2509: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2510: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2512: if (reuse == MAT_REUSE_MATRIX) {
2513: MatHeaderReplace(A,B);
2514: } else {
2515: *newmat = B;
2516: }
2517: return(0);
2518: }
2522: /* ---------------------------------------------------------------------------------- */
2523: /*MC
2524: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2525: operations for multicomponent problems. It interpolates each component the same
2526: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
2527: and MATMPIAIJ for distributed matrices.
2529: Operations provided:
2530: + MatMult
2531: . MatMultTranspose
2532: . MatMultAdd
2533: . MatMultTransposeAdd
2534: - MatView
2536: Level: advanced
2538: M*/
2541: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2542: {
2544: PetscMPIInt size;
2545: PetscInt n;
2546: Mat_MPIMAIJ *b;
2547: Mat B;
2550: PetscObjectReference((PetscObject)A);
2552: if (dof == 1) {
2553: *maij = A;
2554: } else {
2555: MatCreate(A->comm,&B);
2556: MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2557: B->assembled = PETSC_TRUE;
2559: MPI_Comm_size(A->comm,&size);
2560: if (size == 1) {
2561: MatSetType(B,MATSEQMAIJ);
2562: B->ops->destroy = MatDestroy_SeqMAIJ;
2563: B->ops->view = MatView_SeqMAIJ;
2564: b = (Mat_MPIMAIJ*)B->data;
2565: b->dof = dof;
2566: b->AIJ = A;
2567: if (dof == 2) {
2568: B->ops->mult = MatMult_SeqMAIJ_2;
2569: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
2570: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
2571: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2572: } else if (dof == 3) {
2573: B->ops->mult = MatMult_SeqMAIJ_3;
2574: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
2575: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
2576: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2577: } else if (dof == 4) {
2578: B->ops->mult = MatMult_SeqMAIJ_4;
2579: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
2580: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
2581: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2582: } else if (dof == 5) {
2583: B->ops->mult = MatMult_SeqMAIJ_5;
2584: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
2585: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
2586: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2587: } else if (dof == 6) {
2588: B->ops->mult = MatMult_SeqMAIJ_6;
2589: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
2590: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
2591: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2592: } else if (dof == 7) {
2593: B->ops->mult = MatMult_SeqMAIJ_7;
2594: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
2595: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
2596: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2597: } else if (dof == 8) {
2598: B->ops->mult = MatMult_SeqMAIJ_8;
2599: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
2600: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
2601: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2602: } else if (dof == 9) {
2603: B->ops->mult = MatMult_SeqMAIJ_9;
2604: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
2605: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
2606: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2607: } else if (dof == 10) {
2608: B->ops->mult = MatMult_SeqMAIJ_10;
2609: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
2610: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
2611: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2612: } else if (dof == 16) {
2613: B->ops->mult = MatMult_SeqMAIJ_16;
2614: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
2615: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
2616: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2617: } else {
2618: SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2619: }
2620: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2621: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2622: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2623: } else {
2624: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2625: IS from,to;
2626: Vec gvec;
2627: PetscInt *garray,i;
2629: MatSetType(B,MATMPIMAIJ);
2630: B->ops->destroy = MatDestroy_MPIMAIJ;
2631: B->ops->view = MatView_MPIMAIJ;
2632: b = (Mat_MPIMAIJ*)B->data;
2633: b->dof = dof;
2634: b->A = A;
2635: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2636: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
2638: VecGetSize(mpiaij->lvec,&n);
2639: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2640: VecSetBlockSize(b->w,dof);
2642: /* create two temporary Index sets for build scatter gather */
2643: PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2644: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2645: ISCreateBlock(A->comm,dof,n,garray,&from);
2646: PetscFree(garray);
2647: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
2649: /* create temporary global vector to generate scatter context */
2650: VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);
2651: VecSetBlockSize(gvec,dof);
2653: /* generate the scatter context */
2654: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
2656: ISDestroy(from);
2657: ISDestroy(to);
2658: VecDestroy(gvec);
2660: B->ops->mult = MatMult_MPIMAIJ_dof;
2661: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
2662: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
2663: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2664: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2665: }
2666: *maij = B;
2667: MatView_Private(B);
2668: }
2669: return(0);
2670: }