Actual source code: baij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include ../src/mat/impls/baij/seq/baij.h
8: #include ../src/inline/spops.h
9: #include petscsys.h
11: #include ../src/inline/ilu.h
15: /*@
16: MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
18: Collective on Mat
20: Input Parameters:
21: . mat - the matrix
23: Level: advanced
24: @*/
25: PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat)
26: {
27: PetscErrorCode ierr,(*f)(Mat);
31: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
34: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
35: if (f) {
36: (*f)(mat);
37: } else {
38: SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39: }
40: return(0);
41: }
46: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47: {
48: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
50: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
51: MatScalar *v = a->a,*odiag,*diag,*mdiag;
52: PetscReal shift = 0.0;
55: if (a->idiagvalid) return(0);
56: MatMarkDiagonal_SeqBAIJ(A);
57: diag_offset = a->diag;
58: if (!a->idiag) {
59: PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
60: }
61: diag = a->idiag;
62: mdiag = a->idiag+bs*bs*mbs;
63: /* factor and invert each block */
64: switch (bs){
65: case 2:
66: for (i=0; i<mbs; i++) {
67: odiag = v + 4*diag_offset[i];
68: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70: Kernel_A_gets_inverse_A_2(diag,shift);
71: diag += 4;
72: mdiag += 4;
73: }
74: break;
75: case 3:
76: for (i=0; i<mbs; i++) {
77: odiag = v + 9*diag_offset[i];
78: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80: diag[8] = odiag[8];
81: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83: mdiag[8] = odiag[8];
84: Kernel_A_gets_inverse_A_3(diag,shift);
85: diag += 9;
86: mdiag += 9;
87: }
88: break;
89: case 4:
90: for (i=0; i<mbs; i++) {
91: odiag = v + 16*diag_offset[i];
92: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
93: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
94: Kernel_A_gets_inverse_A_4(diag,shift);
95: diag += 16;
96: mdiag += 16;
97: }
98: break;
99: case 5:
100: for (i=0; i<mbs; i++) {
101: odiag = v + 25*diag_offset[i];
102: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
103: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
104: Kernel_A_gets_inverse_A_5(diag,shift);
105: diag += 25;
106: mdiag += 25;
107: }
108: break;
109: default:
110: SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111: }
112: a->idiagvalid = PETSC_TRUE;
113: return(0);
114: }
119: PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120: {
121: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
122: PetscScalar *x,x1,s1;
123: const PetscScalar *b;
124: const MatScalar *aa = a->a, *idiag,*mdiag,*v;
125: PetscErrorCode ierr;
126: PetscInt m = a->mbs,i,i2,nz,idx;
127: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
130: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131: its = its*lits;
132: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
138: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
140: diag = a->diag;
141: idiag = a->idiag;
142: VecGetArray(xx,&x);
143: VecGetArray(bb,(PetscScalar**)&b);
145: if (flag & SOR_ZERO_INITIAL_GUESS) {
146: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147: x[0] = b[0]*idiag[0];
148: i2 = 1;
149: idiag += 1;
150: for (i=1; i<m; i++) {
151: v = aa + ai[i];
152: vi = aj + ai[i];
153: nz = diag[i] - ai[i];
154: s1 = b[i2];
155: while (nz--) {
156: idx = (*vi++);
157: x1 = x[idx];
158: s1 -= v[0]*x1;
159: v += 1;
160: }
161: x[i2] = idiag[0]*s1;
162: idiag += 1;
163: i2 += 1;
164: }
165: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166: PetscLogFlops(a->nz);
167: }
168: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170: i2 = 0;
171: mdiag = a->idiag+a->mbs;
172: for (i=0; i<m; i++) {
173: x1 = x[i2];
174: x[i2] = mdiag[0]*x1;
175: mdiag += 1;
176: i2 += 1;
177: }
178: PetscLogFlops(m);
179: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
181: }
182: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183: idiag = a->idiag+a->mbs - 1;
184: i2 = m - 1;
185: x1 = x[i2];
186: x[i2] = idiag[0]*x1;
187: idiag -= 1;
188: i2 -= 1;
189: for (i=m-2; i>=0; i--) {
190: v = aa + (diag[i]+1);
191: vi = aj + diag[i] + 1;
192: nz = ai[i+1] - diag[i] - 1;
193: s1 = x[i2];
194: while (nz--) {
195: idx = (*vi++);
196: x1 = x[idx];
197: s1 -= v[0]*x1;
198: v += 1;
199: }
200: x[i2] = idiag[0]*s1;
201: idiag -= 2;
202: i2 -= 1;
203: }
204: PetscLogFlops(a->nz);
205: }
206: } else {
207: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208: }
209: VecRestoreArray(xx,&x);
210: VecRestoreArray(bb,(PetscScalar**)&b);
211: return(0);
212: }
216: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217: {
218: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
219: PetscScalar *x,x1,x2,s1,s2;
220: const PetscScalar *b;
221: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
222: PetscErrorCode ierr;
223: PetscInt m = a->mbs,i,i2,nz,idx;
224: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
227: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228: its = its*lits;
229: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
235: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
237: diag = a->diag;
238: idiag = a->idiag;
239: VecGetArray(xx,&x);
240: VecGetArray(bb,(PetscScalar**)&b);
242: if (flag & SOR_ZERO_INITIAL_GUESS) {
243: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246: i2 = 2;
247: idiag += 4;
248: for (i=1; i<m; i++) {
249: v = aa + 4*ai[i];
250: vi = aj + ai[i];
251: nz = diag[i] - ai[i];
252: s1 = b[i2]; s2 = b[i2+1];
253: while (nz--) {
254: idx = 2*(*vi++);
255: x1 = x[idx]; x2 = x[1+idx];
256: s1 -= v[0]*x1 + v[2]*x2;
257: s2 -= v[1]*x1 + v[3]*x2;
258: v += 4;
259: }
260: x[i2] = idiag[0]*s1 + idiag[2]*s2;
261: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262: idiag += 4;
263: i2 += 2;
264: }
265: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266: PetscLogFlops(4*(a->nz));
267: }
268: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270: i2 = 0;
271: mdiag = a->idiag+4*a->mbs;
272: for (i=0; i<m; i++) {
273: x1 = x[i2]; x2 = x[i2+1];
274: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
275: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276: mdiag += 4;
277: i2 += 2;
278: }
279: PetscLogFlops(6*m);
280: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
282: }
283: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284: idiag = a->idiag+4*a->mbs - 4;
285: i2 = 2*m - 2;
286: x1 = x[i2]; x2 = x[i2+1];
287: x[i2] = idiag[0]*x1 + idiag[2]*x2;
288: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289: idiag -= 4;
290: i2 -= 2;
291: for (i=m-2; i>=0; i--) {
292: v = aa + 4*(diag[i]+1);
293: vi = aj + diag[i] + 1;
294: nz = ai[i+1] - diag[i] - 1;
295: s1 = x[i2]; s2 = x[i2+1];
296: while (nz--) {
297: idx = 2*(*vi++);
298: x1 = x[idx]; x2 = x[1+idx];
299: s1 -= v[0]*x1 + v[2]*x2;
300: s2 -= v[1]*x1 + v[3]*x2;
301: v += 4;
302: }
303: x[i2] = idiag[0]*s1 + idiag[2]*s2;
304: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305: idiag -= 4;
306: i2 -= 2;
307: }
308: PetscLogFlops(4*(a->nz));
309: }
310: } else {
311: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312: }
313: VecRestoreArray(xx,&x);
314: VecRestoreArray(bb,(PetscScalar**)&b);
315: return(0);
316: }
320: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321: {
322: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
323: PetscScalar *x,x1,x2,x3,s1,s2,s3;
324: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
325: const PetscScalar *b;
326: PetscErrorCode ierr;
327: PetscInt m = a->mbs,i,i2,nz,idx;
328: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
331: its = its*lits;
332: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
339: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
341: diag = a->diag;
342: idiag = a->idiag;
343: VecGetArray(xx,&x);
344: VecGetArray(bb,(PetscScalar**)&b);
346: if (flag & SOR_ZERO_INITIAL_GUESS) {
347: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351: i2 = 3;
352: idiag += 9;
353: for (i=1; i<m; i++) {
354: v = aa + 9*ai[i];
355: vi = aj + ai[i];
356: nz = diag[i] - ai[i];
357: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358: while (nz--) {
359: idx = 3*(*vi++);
360: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364: v += 9;
365: }
366: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369: idiag += 9;
370: i2 += 3;
371: }
372: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373: PetscLogFlops(9*(a->nz));
374: }
375: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377: i2 = 0;
378: mdiag = a->idiag+9*a->mbs;
379: for (i=0; i<m; i++) {
380: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384: mdiag += 9;
385: i2 += 3;
386: }
387: PetscLogFlops(15*m);
388: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
390: }
391: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392: idiag = a->idiag+9*a->mbs - 9;
393: i2 = 3*m - 3;
394: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398: idiag -= 9;
399: i2 -= 3;
400: for (i=m-2; i>=0; i--) {
401: v = aa + 9*(diag[i]+1);
402: vi = aj + diag[i] + 1;
403: nz = ai[i+1] - diag[i] - 1;
404: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405: while (nz--) {
406: idx = 3*(*vi++);
407: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411: v += 9;
412: }
413: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416: idiag -= 9;
417: i2 -= 3;
418: }
419: PetscLogFlops(9*(a->nz));
420: }
421: } else {
422: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423: }
424: VecRestoreArray(xx,&x);
425: VecRestoreArray(bb,(PetscScalar**)&b);
426: return(0);
427: }
431: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432: {
433: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
434: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
435: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
436: const PetscScalar *b;
437: PetscErrorCode ierr;
438: PetscInt m = a->mbs,i,i2,nz,idx;
439: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
442: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443: its = its*lits;
444: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
450: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
452: diag = a->diag;
453: idiag = a->idiag;
454: VecGetArray(xx,&x);
455: VecGetArray(bb,(PetscScalar**)&b);
457: if (flag & SOR_ZERO_INITIAL_GUESS) {
458: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
460: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
461: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463: i2 = 4;
464: idiag += 16;
465: for (i=1; i<m; i++) {
466: v = aa + 16*ai[i];
467: vi = aj + ai[i];
468: nz = diag[i] - ai[i];
469: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470: while (nz--) {
471: idx = 4*(*vi++);
472: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
474: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
475: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477: v += 16;
478: }
479: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
480: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
481: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483: idiag += 16;
484: i2 += 4;
485: }
486: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487: PetscLogFlops(16*(a->nz));
488: }
489: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491: i2 = 0;
492: mdiag = a->idiag+16*a->mbs;
493: for (i=0; i<m; i++) {
494: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
496: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
497: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499: mdiag += 16;
500: i2 += 4;
501: }
502: PetscLogFlops(28*m);
503: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
505: }
506: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507: idiag = a->idiag+16*a->mbs - 16;
508: i2 = 4*m - 4;
509: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
511: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
512: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514: idiag -= 16;
515: i2 -= 4;
516: for (i=m-2; i>=0; i--) {
517: v = aa + 16*(diag[i]+1);
518: vi = aj + diag[i] + 1;
519: nz = ai[i+1] - diag[i] - 1;
520: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521: while (nz--) {
522: idx = 4*(*vi++);
523: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
525: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
526: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528: v += 16;
529: }
530: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
531: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
532: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534: idiag -= 16;
535: i2 -= 4;
536: }
537: PetscLogFlops(16*(a->nz));
538: }
539: } else {
540: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541: }
542: VecRestoreArray(xx,&x);
543: VecRestoreArray(bb,(PetscScalar**)&b);
544: return(0);
545: }
549: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550: {
551: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
552: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
554: const PetscScalar *b;
555: PetscErrorCode ierr;
556: PetscInt m = a->mbs,i,i2,nz,idx;
557: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
560: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561: its = its*lits;
562: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
568: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
570: diag = a->diag;
571: idiag = a->idiag;
572: VecGetArray(xx,&x);
573: VecGetArray(bb,(PetscScalar**)&b);
575: if (flag & SOR_ZERO_INITIAL_GUESS) {
576: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582: i2 = 5;
583: idiag += 25;
584: for (i=1; i<m; i++) {
585: v = aa + 25*ai[i];
586: vi = aj + ai[i];
587: nz = diag[i] - ai[i];
588: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589: while (nz--) {
590: idx = 5*(*vi++);
591: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597: v += 25;
598: }
599: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604: idiag += 25;
605: i2 += 5;
606: }
607: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608: PetscLogFlops(25*(a->nz));
609: }
610: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612: i2 = 0;
613: mdiag = a->idiag+25*a->mbs;
614: for (i=0; i<m; i++) {
615: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621: mdiag += 25;
622: i2 += 5;
623: }
624: PetscLogFlops(45*m);
625: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
627: }
628: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629: idiag = a->idiag+25*a->mbs - 25;
630: i2 = 5*m - 5;
631: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637: idiag -= 25;
638: i2 -= 5;
639: for (i=m-2; i>=0; i--) {
640: v = aa + 25*(diag[i]+1);
641: vi = aj + diag[i] + 1;
642: nz = ai[i+1] - diag[i] - 1;
643: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644: while (nz--) {
645: idx = 5*(*vi++);
646: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652: v += 25;
653: }
654: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659: idiag -= 25;
660: i2 -= 5;
661: }
662: PetscLogFlops(25*(a->nz));
663: }
664: } else {
665: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666: }
667: VecRestoreArray(xx,&x);
668: VecRestoreArray(bb,(PetscScalar**)&b);
669: return(0);
670: }
674: PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675: {
676: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
677: PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
679: const PetscScalar *b;
680: PetscErrorCode ierr;
681: PetscInt m = a->mbs,i,i2,nz,idx;
682: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
685: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686: its = its*lits;
687: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
693: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
695: diag = a->diag;
696: idiag = a->idiag;
697: VecGetArray(xx,&x);
698: VecGetArray(bb,(PetscScalar**)&b);
700: if (flag & SOR_ZERO_INITIAL_GUESS) {
701: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702: x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703: x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704: x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705: x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706: x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707: x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708: i2 = 6;
709: idiag += 36;
710: for (i=1; i<m; i++) {
711: v = aa + 36*ai[i];
712: vi = aj + ai[i];
713: nz = diag[i] - ai[i];
714: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715: while (nz--) {
716: idx = 6*(*vi++);
717: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724: v += 36;
725: }
726: x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727: x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728: x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729: x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730: x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731: x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732: idiag += 36;
733: i2 += 6;
734: }
735: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736: PetscLogFlops(36*(a->nz));
737: }
738: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740: i2 = 0;
741: mdiag = a->idiag+36*a->mbs;
742: for (i=0; i<m; i++) {
743: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744: x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745: x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746: x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747: x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748: x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749: x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750: mdiag += 36;
751: i2 += 6;
752: }
753: PetscLogFlops(60*m);
754: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
756: }
757: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758: idiag = a->idiag+36*a->mbs - 36;
759: i2 = 6*m - 6;
760: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761: x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762: x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763: x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764: x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765: x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766: x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767: idiag -= 36;
768: i2 -= 6;
769: for (i=m-2; i>=0; i--) {
770: v = aa + 36*(diag[i]+1);
771: vi = aj + diag[i] + 1;
772: nz = ai[i+1] - diag[i] - 1;
773: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774: while (nz--) {
775: idx = 6*(*vi++);
776: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783: v += 36;
784: }
785: x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786: x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787: x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788: x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789: x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790: x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791: idiag -= 36;
792: i2 -= 6;
793: }
794: PetscLogFlops(36*(a->nz));
795: }
796: } else {
797: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798: }
799: VecRestoreArray(xx,&x);
800: VecRestoreArray(bb,(PetscScalar**)&b);
801: return(0);
802: }
806: PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807: {
808: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
809: PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
811: const PetscScalar *b;
812: PetscErrorCode ierr;
813: PetscInt m = a->mbs,i,i2,nz,idx;
814: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
817: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818: its = its*lits;
819: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
825: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
827: diag = a->diag;
828: idiag = a->idiag;
829: VecGetArray(xx,&x);
830: VecGetArray(bb,(PetscScalar**)&b);
832: if (flag & SOR_ZERO_INITIAL_GUESS) {
833: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834: x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835: x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836: x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837: x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838: x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839: x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840: x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841: i2 = 7;
842: idiag += 49;
843: for (i=1; i<m; i++) {
844: v = aa + 49*ai[i];
845: vi = aj + ai[i];
846: nz = diag[i] - ai[i];
847: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848: while (nz--) {
849: idx = 7*(*vi++);
850: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858: v += 49;
859: }
860: x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861: x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862: x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863: x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864: x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865: x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866: x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867: idiag += 49;
868: i2 += 7;
869: }
870: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871: PetscLogFlops(49*(a->nz));
872: }
873: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875: i2 = 0;
876: mdiag = a->idiag+49*a->mbs;
877: for (i=0; i<m; i++) {
878: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879: x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880: x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881: x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882: x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883: x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884: x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885: x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886: mdiag += 36;
887: i2 += 6;
888: }
889: PetscLogFlops(93*m);
890: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
892: }
893: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894: idiag = a->idiag+49*a->mbs - 49;
895: i2 = 7*m - 7;
896: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897: x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898: x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899: x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900: x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901: x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902: x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903: x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904: idiag -= 49;
905: i2 -= 7;
906: for (i=m-2; i>=0; i--) {
907: v = aa + 49*(diag[i]+1);
908: vi = aj + diag[i] + 1;
909: nz = ai[i+1] - diag[i] - 1;
910: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911: while (nz--) {
912: idx = 7*(*vi++);
913: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921: v += 49;
922: }
923: x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924: x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925: x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926: x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927: x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928: x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929: x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930: idiag -= 49;
931: i2 -= 7;
932: }
933: PetscLogFlops(49*(a->nz));
934: }
935: } else {
936: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937: }
938: VecRestoreArray(xx,&x);
939: VecRestoreArray(bb,(PetscScalar**)&b);
940: return(0);
941: }
943: /*
944: Special version for direct calls from Fortran (Used in PETSc-fun3d)
945: */
946: #if defined(PETSC_HAVE_FORTRAN_CAPS)
947: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949: #define matsetvaluesblocked4_ matsetvaluesblocked4
950: #endif
955: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956: {
957: Mat A = *AA;
958: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
959: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960: PetscInt *ai=a->i,*ailen=a->ilen;
961: PetscInt *aj=a->j,stepval,lastcol = -1;
962: const PetscScalar *value = v;
963: MatScalar *ap,*aa = a->a,*bap;
966: if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967: stepval = (n-1)*4;
968: for (k=0; k<m; k++) { /* loop over added rows */
969: row = im[k];
970: rp = aj + ai[row];
971: ap = aa + 16*ai[row];
972: nrow = ailen[row];
973: low = 0;
974: high = nrow;
975: for (l=0; l<n; l++) { /* loop over added columns */
976: col = in[l];
977: if (col <= lastcol) low = 0; else high = nrow;
978: lastcol = col;
979: value = v + k*(stepval+4 + l)*4;
980: while (high-low > 7) {
981: t = (low+high)/2;
982: if (rp[t] > col) high = t;
983: else low = t;
984: }
985: for (i=low; i<high; i++) {
986: if (rp[i] > col) break;
987: if (rp[i] == col) {
988: bap = ap + 16*i;
989: for (ii=0; ii<4; ii++,value+=stepval) {
990: for (jj=ii; jj<16; jj+=4) {
991: bap[jj] += *value++;
992: }
993: }
994: goto noinsert2;
995: }
996: }
997: N = nrow++ - 1;
998: high++; /* added new column index thus must search to one higher than before */
999: /* shift up all the later entries in this row */
1000: for (ii=N; ii>=i; ii--) {
1001: rp[ii+1] = rp[ii];
1002: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003: }
1004: if (N >= i) {
1005: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006: }
1007: rp[i] = col;
1008: bap = ap + 16*i;
1009: for (ii=0; ii<4; ii++,value+=stepval) {
1010: for (jj=ii; jj<16; jj+=4) {
1011: bap[jj] = *value++;
1012: }
1013: }
1014: noinsert2:;
1015: low = i;
1016: }
1017: ailen[row] = nrow;
1018: }
1019: PetscFunctionReturnVoid();
1020: }
1023: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024: #define matsetvalues4_ MATSETVALUES4
1025: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026: #define matsetvalues4_ matsetvalues4
1027: #endif
1032: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033: {
1034: Mat A = *AA;
1035: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037: PetscInt *ai=a->i,*ailen=a->ilen;
1038: PetscInt *aj=a->j,brow,bcol;
1039: PetscInt ridx,cidx,lastcol = -1;
1040: MatScalar *ap,value,*aa=a->a,*bap;
1041:
1043: for (k=0; k<m; k++) { /* loop over added rows */
1044: row = im[k]; brow = row/4;
1045: rp = aj + ai[brow];
1046: ap = aa + 16*ai[brow];
1047: nrow = ailen[brow];
1048: low = 0;
1049: high = nrow;
1050: for (l=0; l<n; l++) { /* loop over added columns */
1051: col = in[l]; bcol = col/4;
1052: ridx = row % 4; cidx = col % 4;
1053: value = v[l + k*n];
1054: if (col <= lastcol) low = 0; else high = nrow;
1055: lastcol = col;
1056: while (high-low > 7) {
1057: t = (low+high)/2;
1058: if (rp[t] > bcol) high = t;
1059: else low = t;
1060: }
1061: for (i=low; i<high; i++) {
1062: if (rp[i] > bcol) break;
1063: if (rp[i] == bcol) {
1064: bap = ap + 16*i + 4*cidx + ridx;
1065: *bap += value;
1066: goto noinsert1;
1067: }
1068: }
1069: N = nrow++ - 1;
1070: high++; /* added new column thus must search to one higher than before */
1071: /* shift up all the later entries in this row */
1072: for (ii=N; ii>=i; ii--) {
1073: rp[ii+1] = rp[ii];
1074: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075: }
1076: if (N>=i) {
1077: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078: }
1079: rp[i] = bcol;
1080: ap[16*i + 4*cidx + ridx] = value;
1081: noinsert1:;
1082: low = i;
1083: }
1084: ailen[brow] = nrow;
1085: }
1086: PetscFunctionReturnVoid();
1087: }
1090: /*
1091: Checks for missing diagonals
1092: */
1095: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096: {
1097: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1099: PetscInt *diag,*jj = a->j,i;
1102: MatMarkDiagonal_SeqBAIJ(A);
1103: *missing = PETSC_FALSE;
1104: diag = a->diag;
1105: for (i=0; i<a->mbs; i++) {
1106: if (jj[diag[i]] != i) {
1107: *missing = PETSC_TRUE;
1108: if (d) *d = i;
1109: }
1110: }
1111: return(0);
1112: }
1116: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117: {
1118: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1120: PetscInt i,j,m = a->mbs;
1123: if (!a->diag) {
1124: PetscMalloc(m*sizeof(PetscInt),&a->diag);
1125: }
1126: for (i=0; i<m; i++) {
1127: a->diag[i] = a->i[i+1];
1128: for (j=a->i[i]; j<a->i[i+1]; j++) {
1129: if (a->j[j] == i) {
1130: a->diag[i] = j;
1131: break;
1132: }
1133: }
1134: }
1135: return(0);
1136: }
1139: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1143: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144: {
1145: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1147: PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1148: PetscInt *tia, *tja;
1151: *nn = n;
1152: if (!ia) return(0);
1153: if (symmetric) {
1154: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1155: nz = tia[n];
1156: } else {
1157: tia = a->i; tja = a->j;
1158: }
1159:
1160: if (!blockcompressed && bs > 1) {
1161: (*nn) *= bs;
1162: nbs = bs;
1163: /* malloc & create the natural set of indices */
1164: PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1165: if (n) {
1166: (*ia)[0] = 0;
1167: for (j=1; j<bs; j++) {
1168: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1169: }
1170: }
1172: for (i=1; i<n; i++) {
1173: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1174: for (j=1; j<bs; j++) {
1175: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1176: }
1177: }
1178: if (n) {
1179: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1180: }
1182: if (ja) {
1183: PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1184: cnt = 0;
1185: for (i=0; i<n; i++) {
1186: for (j=0; j<bs; j++) {
1187: for (k=tia[i]; k<tia[i+1]; k++) {
1188: for (l=0; l<bs; l++) {
1189: (*ja)[cnt++] = bs*tja[k] + l;
1190: }
1191: }
1192: }
1193: }
1194: }
1196: n *= bs;
1197: nz *= bs*bs;
1198: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1199: PetscFree(tia);
1200: PetscFree(tja);
1201: }
1202: } else {
1203: *ia = tia;
1204: if (ja) *ja = tja;
1205: }
1206: if (oshift == 1) {
1207: for (i=0; i<n+nbs; i++) (*ia)[i]++;
1208: if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1209: }
1210: return(0);
1211: }
1215: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1216: {
1217: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1219: PetscInt i,n = a->mbs,nz = a->i[n];
1222: if (!ia) return(0);
1223: if (!blockcompressed && A->rmap->bs > 1) {
1224: PetscFree(*ia);
1225: if (ja) {PetscFree(*ja);}
1226: } else if (symmetric) {
1227: PetscFree(*ia);
1228: if (ja) {PetscFree(*ja);}
1229: } else if (oshift == 1) { /* blockcompressed */
1230: for (i=0; i<n+1; i++) a->i[i]--;
1231: if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1232: }
1233: return(0);
1234: }
1238: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1239: {
1240: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1244: #if defined(PETSC_USE_LOG)
1245: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1246: #endif
1247: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1248: if (a->row) {
1249: ISDestroy(a->row);
1250: }
1251: if (a->col) {
1252: ISDestroy(a->col);
1253: }
1254: PetscFree(a->diag);
1255: PetscFree(a->idiag);
1256: PetscFree2(a->imax,a->ilen);
1257: PetscFree(a->solve_work);
1258: PetscFree(a->mult_work);
1259: if (a->icol) {ISDestroy(a->icol);}
1260: PetscFree(a->saved_values);
1261: PetscFree(a->xtoy);
1262: if (a->compressedrow.use){PetscFree(a->compressedrow.i);}
1264: if (a->sbaijMat) {MatDestroy(a->sbaijMat);}
1265: PetscFree(a);
1267: PetscObjectChangeTypeName((PetscObject)A,0);
1268: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
1269: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1270: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1271: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1272: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1273: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1274: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1275: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);
1276: return(0);
1277: }
1281: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1282: {
1283: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1287: switch (op) {
1288: case MAT_ROW_ORIENTED:
1289: a->roworiented = flg;
1290: break;
1291: case MAT_KEEP_ZEROED_ROWS:
1292: a->keepzeroedrows = flg;
1293: break;
1294: case MAT_NEW_NONZERO_LOCATIONS:
1295: a->nonew = (flg ? 0 : 1);
1296: break;
1297: case MAT_NEW_NONZERO_LOCATION_ERR:
1298: a->nonew = (flg ? -1 : 0);
1299: break;
1300: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1301: a->nonew = (flg ? -2 : 0);
1302: break;
1303: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1304: a->nounused = (flg ? -1 : 0);
1305: break;
1306: case MAT_NEW_DIAGONALS:
1307: case MAT_IGNORE_OFF_PROC_ENTRIES:
1308: case MAT_USE_HASH_TABLE:
1309: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1310: break;
1311: case MAT_SYMMETRIC:
1312: case MAT_STRUCTURALLY_SYMMETRIC:
1313: case MAT_HERMITIAN:
1314: case MAT_SYMMETRY_ETERNAL:
1315: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1316: break;
1317: default:
1318: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1319: }
1320: return(0);
1321: }
1325: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1326: {
1327: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1329: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1330: MatScalar *aa,*aa_i;
1331: PetscScalar *v_i;
1334: bs = A->rmap->bs;
1335: ai = a->i;
1336: aj = a->j;
1337: aa = a->a;
1338: bs2 = a->bs2;
1339:
1340: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1341:
1342: bn = row/bs; /* Block number */
1343: bp = row % bs; /* Block Position */
1344: M = ai[bn+1] - ai[bn];
1345: *nz = bs*M;
1346:
1347: if (v) {
1348: *v = 0;
1349: if (*nz) {
1350: PetscMalloc((*nz)*sizeof(PetscScalar),v);
1351: for (i=0; i<M; i++) { /* for each block in the block row */
1352: v_i = *v + i*bs;
1353: aa_i = aa + bs2*(ai[bn] + i);
1354: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1355: }
1356: }
1357: }
1359: if (idx) {
1360: *idx = 0;
1361: if (*nz) {
1362: PetscMalloc((*nz)*sizeof(PetscInt),idx);
1363: for (i=0; i<M; i++) { /* for each block in the block row */
1364: idx_i = *idx + i*bs;
1365: itmp = bs*aj[ai[bn] + i];
1366: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1367: }
1368: }
1369: }
1370: return(0);
1371: }
1375: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1376: {
1380: if (idx) {PetscFree(*idx);}
1381: if (v) {PetscFree(*v);}
1382: return(0);
1383: }
1387: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1388: {
1389: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1390: Mat C;
1392: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1393: PetscInt *rows,*cols,bs2=a->bs2;
1394: MatScalar *array;
1397: if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1398: if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1399: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1400: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
1402: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1403: MatCreate(((PetscObject)A)->comm,&C);
1404: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1405: MatSetType(C,((PetscObject)A)->type_name);
1406: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1407: PetscFree(col);
1408: } else {
1409: C = *B;
1410: }
1412: array = a->a;
1413: PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1414: cols = rows + bs;
1415: for (i=0; i<mbs; i++) {
1416: cols[0] = i*bs;
1417: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1418: len = ai[i+1] - ai[i];
1419: for (j=0; j<len; j++) {
1420: rows[0] = (*aj++)*bs;
1421: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1422: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1423: array += bs2;
1424: }
1425: }
1426: PetscFree(rows);
1427:
1428: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1429: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1430:
1431: if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1432: *B = C;
1433: } else {
1434: MatHeaderCopy(A,C);
1435: }
1436: return(0);
1437: }
1441: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1442: {
1443: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1445: PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1446: int fd;
1447: PetscScalar *aa;
1448: FILE *file;
1451: PetscViewerBinaryGetDescriptor(viewer,&fd);
1452: PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1453: col_lens[0] = MAT_FILE_COOKIE;
1455: col_lens[1] = A->rmap->N;
1456: col_lens[2] = A->cmap->n;
1457: col_lens[3] = a->nz*bs2;
1459: /* store lengths of each row and write (including header) to file */
1460: count = 0;
1461: for (i=0; i<a->mbs; i++) {
1462: for (j=0; j<bs; j++) {
1463: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1464: }
1465: }
1466: PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1467: PetscFree(col_lens);
1469: /* store column indices (zero start index) */
1470: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1471: count = 0;
1472: for (i=0; i<a->mbs; i++) {
1473: for (j=0; j<bs; j++) {
1474: for (k=a->i[i]; k<a->i[i+1]; k++) {
1475: for (l=0; l<bs; l++) {
1476: jj[count++] = bs*a->j[k] + l;
1477: }
1478: }
1479: }
1480: }
1481: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1482: PetscFree(jj);
1484: /* store nonzero values */
1485: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1486: count = 0;
1487: for (i=0; i<a->mbs; i++) {
1488: for (j=0; j<bs; j++) {
1489: for (k=a->i[i]; k<a->i[i+1]; k++) {
1490: for (l=0; l<bs; l++) {
1491: aa[count++] = a->a[bs2*k + l*bs + j];
1492: }
1493: }
1494: }
1495: }
1496: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1497: PetscFree(aa);
1499: PetscViewerBinaryGetInfoPointer(viewer,&file);
1500: if (file) {
1501: fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1502: }
1503: return(0);
1504: }
1508: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1509: {
1510: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1511: PetscErrorCode ierr;
1512: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1513: PetscViewerFormat format;
1516: PetscViewerGetFormat(viewer,&format);
1517: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1518: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1519: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1520: Mat aij;
1521: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1522: MatView(aij,viewer);
1523: MatDestroy(aij);
1524: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1525: return(0);
1526: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1527: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1528: for (i=0; i<a->mbs; i++) {
1529: for (j=0; j<bs; j++) {
1530: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1531: for (k=a->i[i]; k<a->i[i+1]; k++) {
1532: for (l=0; l<bs; l++) {
1533: #if defined(PETSC_USE_COMPLEX)
1534: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535: PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1536: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1537: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538: PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1539: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1540: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1542: }
1543: #else
1544: if (a->a[bs2*k + l*bs + j] != 0.0) {
1545: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1546: }
1547: #endif
1548: }
1549: }
1550: PetscViewerASCIIPrintf(viewer,"\n");
1551: }
1552: }
1553: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1554: } else {
1555: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1556: for (i=0; i<a->mbs; i++) {
1557: for (j=0; j<bs; j++) {
1558: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1559: for (k=a->i[i]; k<a->i[i+1]; k++) {
1560: for (l=0; l<bs; l++) {
1561: #if defined(PETSC_USE_COMPLEX)
1562: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1563: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1564: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1565: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1566: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1567: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1568: } else {
1569: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1570: }
1571: #else
1572: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1573: #endif
1574: }
1575: }
1576: PetscViewerASCIIPrintf(viewer,"\n");
1577: }
1578: }
1579: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1580: }
1581: PetscViewerFlush(viewer);
1582: return(0);
1583: }
1587: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1588: {
1589: Mat A = (Mat) Aa;
1590: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1592: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1593: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1594: MatScalar *aa;
1595: PetscViewer viewer;
1599: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1600: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1602: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1604: /* loop over matrix elements drawing boxes */
1605: color = PETSC_DRAW_BLUE;
1606: for (i=0,row=0; i<mbs; i++,row+=bs) {
1607: for (j=a->i[i]; j<a->i[i+1]; j++) {
1608: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1609: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1610: aa = a->a + j*bs2;
1611: for (k=0; k<bs; k++) {
1612: for (l=0; l<bs; l++) {
1613: if (PetscRealPart(*aa++) >= 0.) continue;
1614: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1615: }
1616: }
1617: }
1618: }
1619: color = PETSC_DRAW_CYAN;
1620: for (i=0,row=0; i<mbs; i++,row+=bs) {
1621: for (j=a->i[i]; j<a->i[i+1]; j++) {
1622: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1623: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1624: aa = a->a + j*bs2;
1625: for (k=0; k<bs; k++) {
1626: for (l=0; l<bs; l++) {
1627: if (PetscRealPart(*aa++) != 0.) continue;
1628: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1629: }
1630: }
1631: }
1632: }
1634: color = PETSC_DRAW_RED;
1635: for (i=0,row=0; i<mbs; i++,row+=bs) {
1636: for (j=a->i[i]; j<a->i[i+1]; j++) {
1637: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1638: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1639: aa = a->a + j*bs2;
1640: for (k=0; k<bs; k++) {
1641: for (l=0; l<bs; l++) {
1642: if (PetscRealPart(*aa++) <= 0.) continue;
1643: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1644: }
1645: }
1646: }
1647: }
1648: return(0);
1649: }
1653: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1654: {
1656: PetscReal xl,yl,xr,yr,w,h;
1657: PetscDraw draw;
1658: PetscTruth isnull;
1662: PetscViewerDrawGetDraw(viewer,0,&draw);
1663: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1665: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1666: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1667: xr += w; yr += h; xl = -w; yl = -h;
1668: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1669: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1670: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1671: return(0);
1672: }
1676: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1677: {
1679: PetscTruth iascii,isbinary,isdraw;
1682: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1683: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1684: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1685: if (iascii){
1686: MatView_SeqBAIJ_ASCII(A,viewer);
1687: } else if (isbinary) {
1688: MatView_SeqBAIJ_Binary(A,viewer);
1689: } else if (isdraw) {
1690: MatView_SeqBAIJ_Draw(A,viewer);
1691: } else {
1692: Mat B;
1693: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1694: MatView(B,viewer);
1695: MatDestroy(B);
1696: }
1697: return(0);
1698: }
1703: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1704: {
1705: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1706: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1707: PetscInt *ai = a->i,*ailen = a->ilen;
1708: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1709: MatScalar *ap,*aa = a->a;
1712: for (k=0; k<m; k++) { /* loop over rows */
1713: row = im[k]; brow = row/bs;
1714: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1715: if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1716: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1717: nrow = ailen[brow];
1718: for (l=0; l<n; l++) { /* loop over columns */
1719: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1720: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1721: col = in[l] ;
1722: bcol = col/bs;
1723: cidx = col%bs;
1724: ridx = row%bs;
1725: high = nrow;
1726: low = 0; /* assume unsorted */
1727: while (high-low > 5) {
1728: t = (low+high)/2;
1729: if (rp[t] > bcol) high = t;
1730: else low = t;
1731: }
1732: for (i=low; i<high; i++) {
1733: if (rp[i] > bcol) break;
1734: if (rp[i] == bcol) {
1735: *v++ = ap[bs2*i+bs*cidx+ridx];
1736: goto finished;
1737: }
1738: }
1739: *v++ = 0.0;
1740: finished:;
1741: }
1742: }
1743: return(0);
1744: }
1746: #define CHUNKSIZE 10
1749: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1750: {
1751: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1752: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1753: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1754: PetscErrorCode ierr;
1755: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1756: PetscTruth roworiented=a->roworiented;
1757: const PetscScalar *value = v;
1758: MatScalar *ap,*aa = a->a,*bap;
1761: if (roworiented) {
1762: stepval = (n-1)*bs;
1763: } else {
1764: stepval = (m-1)*bs;
1765: }
1766: for (k=0; k<m; k++) { /* loop over added rows */
1767: row = im[k];
1768: if (row < 0) continue;
1769: #if defined(PETSC_USE_DEBUG)
1770: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1771: #endif
1772: rp = aj + ai[row];
1773: ap = aa + bs2*ai[row];
1774: rmax = imax[row];
1775: nrow = ailen[row];
1776: low = 0;
1777: high = nrow;
1778: for (l=0; l<n; l++) { /* loop over added columns */
1779: if (in[l] < 0) continue;
1780: #if defined(PETSC_USE_DEBUG)
1781: if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1782: #endif
1783: col = in[l];
1784: if (roworiented) {
1785: value = v + k*(stepval+bs)*bs + l*bs;
1786: } else {
1787: value = v + l*(stepval+bs)*bs + k*bs;
1788: }
1789: if (col <= lastcol) low = 0; else high = nrow;
1790: lastcol = col;
1791: while (high-low > 7) {
1792: t = (low+high)/2;
1793: if (rp[t] > col) high = t;
1794: else low = t;
1795: }
1796: for (i=low; i<high; i++) {
1797: if (rp[i] > col) break;
1798: if (rp[i] == col) {
1799: bap = ap + bs2*i;
1800: if (roworiented) {
1801: if (is == ADD_VALUES) {
1802: for (ii=0; ii<bs; ii++,value+=stepval) {
1803: for (jj=ii; jj<bs2; jj+=bs) {
1804: bap[jj] += *value++;
1805: }
1806: }
1807: } else {
1808: for (ii=0; ii<bs; ii++,value+=stepval) {
1809: for (jj=ii; jj<bs2; jj+=bs) {
1810: bap[jj] = *value++;
1811: }
1812: }
1813: }
1814: } else {
1815: if (is == ADD_VALUES) {
1816: for (ii=0; ii<bs; ii++,value+=stepval) {
1817: for (jj=0; jj<bs; jj++) {
1818: *bap++ += *value++;
1819: }
1820: }
1821: } else {
1822: for (ii=0; ii<bs; ii++,value+=stepval) {
1823: for (jj=0; jj<bs; jj++) {
1824: *bap++ = *value++;
1825: }
1826: }
1827: }
1828: }
1829: goto noinsert2;
1830: }
1831: }
1832: if (nonew == 1) goto noinsert2;
1833: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1834: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1835: N = nrow++ - 1; high++;
1836: /* shift up all the later entries in this row */
1837: for (ii=N; ii>=i; ii--) {
1838: rp[ii+1] = rp[ii];
1839: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1840: }
1841: if (N >= i) {
1842: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1843: }
1844: rp[i] = col;
1845: bap = ap + bs2*i;
1846: if (roworiented) {
1847: for (ii=0; ii<bs; ii++,value+=stepval) {
1848: for (jj=ii; jj<bs2; jj+=bs) {
1849: bap[jj] = *value++;
1850: }
1851: }
1852: } else {
1853: for (ii=0; ii<bs; ii++,value+=stepval) {
1854: for (jj=0; jj<bs; jj++) {
1855: *bap++ = *value++;
1856: }
1857: }
1858: }
1859: noinsert2:;
1860: low = i;
1861: }
1862: ailen[row] = nrow;
1863: }
1864: return(0);
1865: }
1869: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1870: {
1871: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1872: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1873: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1875: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1876: MatScalar *aa = a->a,*ap;
1877: PetscReal ratio=0.6;
1880: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1882: if (m) rmax = ailen[0];
1883: for (i=1; i<mbs; i++) {
1884: /* move each row back by the amount of empty slots (fshift) before it*/
1885: fshift += imax[i-1] - ailen[i-1];
1886: rmax = PetscMax(rmax,ailen[i]);
1887: if (fshift) {
1888: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1889: N = ailen[i];
1890: for (j=0; j<N; j++) {
1891: ip[j-fshift] = ip[j];
1892: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1893: }
1894: }
1895: ai[i] = ai[i-1] + ailen[i-1];
1896: }
1897: if (mbs) {
1898: fshift += imax[mbs-1] - ailen[mbs-1];
1899: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1900: }
1901: /* reset ilen and imax for each row */
1902: for (i=0; i<mbs; i++) {
1903: ailen[i] = imax[i] = ai[i+1] - ai[i];
1904: }
1905: a->nz = ai[mbs];
1907: /* diagonals may have moved, so kill the diagonal pointers */
1908: a->idiagvalid = PETSC_FALSE;
1909: if (fshift && a->diag) {
1910: PetscFree(a->diag);
1911: PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1912: a->diag = 0;
1913: }
1914: if (fshift && a->nounused == -1) {
1915: SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1916: }
1917: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1918: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1919: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1920: a->reallocs = 0;
1921: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1923: /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1924: if (a->compressedrow.use){
1925: Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1926: }
1928: A->same_nonzero = PETSC_TRUE;
1929: return(0);
1930: }
1932: /*
1933: This function returns an array of flags which indicate the locations of contiguous
1934: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1935: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1936: Assume: sizes should be long enough to hold all the values.
1937: */
1940: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1941: {
1942: PetscInt i,j,k,row;
1943: PetscTruth flg;
1946: for (i=0,j=0; i<n; j++) {
1947: row = idx[i];
1948: if (row%bs!=0) { /* Not the begining of a block */
1949: sizes[j] = 1;
1950: i++;
1951: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1952: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1953: i++;
1954: } else { /* Begining of the block, so check if the complete block exists */
1955: flg = PETSC_TRUE;
1956: for (k=1; k<bs; k++) {
1957: if (row+k != idx[i+k]) { /* break in the block */
1958: flg = PETSC_FALSE;
1959: break;
1960: }
1961: }
1962: if (flg) { /* No break in the bs */
1963: sizes[j] = bs;
1964: i+= bs;
1965: } else {
1966: sizes[j] = 1;
1967: i++;
1968: }
1969: }
1970: }
1971: *bs_max = j;
1972: return(0);
1973: }
1974:
1977: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1978: {
1979: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1981: PetscInt i,j,k,count,*rows;
1982: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1983: PetscScalar zero = 0.0;
1984: MatScalar *aa;
1987: /* Make a copy of the IS and sort it */
1988: /* allocate memory for rows,sizes */
1989: PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1990: sizes = rows + is_n;
1992: /* copy IS values to rows, and sort them */
1993: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1994: PetscSortInt(is_n,rows);
1995: if (baij->keepzeroedrows) {
1996: for (i=0; i<is_n; i++) { sizes[i] = 1; }
1997: bs_max = is_n;
1998: A->same_nonzero = PETSC_TRUE;
1999: } else {
2000: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2001: A->same_nonzero = PETSC_FALSE;
2002: }
2004: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2005: row = rows[j];
2006: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2007: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2008: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2009: if (sizes[i] == bs && !baij->keepzeroedrows) {
2010: if (diag != 0.0) {
2011: if (baij->ilen[row/bs] > 0) {
2012: baij->ilen[row/bs] = 1;
2013: baij->j[baij->i[row/bs]] = row/bs;
2014: PetscMemzero(aa,count*bs*sizeof(MatScalar));
2015: }
2016: /* Now insert all the diagonal values for this bs */
2017: for (k=0; k<bs; k++) {
2018: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2019: }
2020: } else { /* (diag == 0.0) */
2021: baij->ilen[row/bs] = 0;
2022: } /* end (diag == 0.0) */
2023: } else { /* (sizes[i] != bs) */
2024: #if defined (PETSC_USE_DEBUG)
2025: if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2026: #endif
2027: for (k=0; k<count; k++) {
2028: aa[0] = zero;
2029: aa += bs;
2030: }
2031: if (diag != 0.0) {
2032: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2033: }
2034: }
2035: }
2037: PetscFree(rows);
2038: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2039: return(0);
2040: }
2044: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2045: {
2046: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2047: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2048: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2049: PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2051: PetscInt ridx,cidx,bs2=a->bs2;
2052: PetscTruth roworiented=a->roworiented;
2053: MatScalar *ap,value,*aa=a->a,*bap;
2056: for (k=0; k<m; k++) { /* loop over added rows */
2057: row = im[k];
2058: brow = row/bs;
2059: if (row < 0) continue;
2060: #if defined(PETSC_USE_DEBUG)
2061: if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2062: #endif
2063: rp = aj + ai[brow];
2064: ap = aa + bs2*ai[brow];
2065: rmax = imax[brow];
2066: nrow = ailen[brow];
2067: low = 0;
2068: high = nrow;
2069: for (l=0; l<n; l++) { /* loop over added columns */
2070: if (in[l] < 0) continue;
2071: #if defined(PETSC_USE_DEBUG)
2072: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2073: #endif
2074: col = in[l]; bcol = col/bs;
2075: ridx = row % bs; cidx = col % bs;
2076: if (roworiented) {
2077: value = v[l + k*n];
2078: } else {
2079: value = v[k + l*m];
2080: }
2081: if (col <= lastcol) low = 0; else high = nrow;
2082: lastcol = col;
2083: while (high-low > 7) {
2084: t = (low+high)/2;
2085: if (rp[t] > bcol) high = t;
2086: else low = t;
2087: }
2088: for (i=low; i<high; i++) {
2089: if (rp[i] > bcol) break;
2090: if (rp[i] == bcol) {
2091: bap = ap + bs2*i + bs*cidx + ridx;
2092: if (is == ADD_VALUES) *bap += value;
2093: else *bap = value;
2094: goto noinsert1;
2095: }
2096: }
2097: if (nonew == 1) goto noinsert1;
2098: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2099: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2100: N = nrow++ - 1; high++;
2101: /* shift up all the later entries in this row */
2102: for (ii=N; ii>=i; ii--) {
2103: rp[ii+1] = rp[ii];
2104: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2105: }
2106: if (N>=i) {
2107: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2108: }
2109: rp[i] = bcol;
2110: ap[bs2*i + bs*cidx + ridx] = value;
2111: a->nz++;
2112: noinsert1:;
2113: low = i;
2114: }
2115: ailen[brow] = nrow;
2116: }
2117: A->same_nonzero = PETSC_FALSE;
2118: return(0);
2119: }
2121: EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2125: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2126: {
2127: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2128: Mat outA;
2130: PetscTruth row_identity,col_identity;
2133: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2134: ISIdentity(row,&row_identity);
2135: ISIdentity(col,&col_identity);
2136: if (!row_identity || !col_identity) {
2137: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2138: }
2140: outA = inA;
2141: inA->factor = MAT_FACTOR_LU;
2143: MatMarkDiagonal_SeqBAIJ(inA);
2145: PetscObjectReference((PetscObject)row);
2146: if (a->row) { ISDestroy(a->row); }
2147: a->row = row;
2148: PetscObjectReference((PetscObject)col);
2149: if (a->col) { ISDestroy(a->col); }
2150: a->col = col;
2151:
2152: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2153: if (a->icol) {
2154: ISDestroy(a->icol);
2155: }
2156: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2157: PetscLogObjectParent(inA,a->icol);
2158:
2159: MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));
2160: if (!a->solve_work) {
2161: PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2162: PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2163: }
2164: MatLUFactorNumeric(outA,inA,info);
2166: return(0);
2167: }
2172: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2173: {
2174: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2175: PetscInt i,nz,nbs;
2178: nz = baij->maxnz/baij->bs2;
2179: nbs = baij->nbs;
2180: for (i=0; i<nz; i++) {
2181: baij->j[i] = indices[i];
2182: }
2183: baij->nz = nz;
2184: for (i=0; i<nbs; i++) {
2185: baij->ilen[i] = baij->imax[i];
2186: }
2188: return(0);
2189: }
2194: /*@
2195: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2196: in the matrix.
2198: Input Parameters:
2199: + mat - the SeqBAIJ matrix
2200: - indices - the column indices
2202: Level: advanced
2204: Notes:
2205: This can be called if you have precomputed the nonzero structure of the
2206: matrix and want to provide it to the matrix object to improve the performance
2207: of the MatSetValues() operation.
2209: You MUST have set the correct numbers of nonzeros per row in the call to
2210: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2212: MUST be called before any calls to MatSetValues();
2214: @*/
2215: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2216: {
2217: PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2222: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
2223: if (f) {
2224: (*f)(mat,indices);
2225: } else {
2226: SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2227: }
2228: return(0);
2229: }
2233: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2234: {
2235: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2237: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2238: PetscReal atmp;
2239: PetscScalar *x,zero = 0.0;
2240: MatScalar *aa;
2241: PetscInt ncols,brow,krow,kcol;
2244: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2245: bs = A->rmap->bs;
2246: aa = a->a;
2247: ai = a->i;
2248: aj = a->j;
2249: mbs = a->mbs;
2251: VecSet(v,zero);
2252: VecGetArray(v,&x);
2253: VecGetLocalSize(v,&n);
2254: if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2255: for (i=0; i<mbs; i++) {
2256: ncols = ai[1] - ai[0]; ai++;
2257: brow = bs*i;
2258: for (j=0; j<ncols; j++){
2259: for (kcol=0; kcol<bs; kcol++){
2260: for (krow=0; krow<bs; krow++){
2261: atmp = PetscAbsScalar(*aa);aa++;
2262: row = brow + krow; /* row index */
2263: /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2264: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2265: }
2266: }
2267: aj++;
2268: }
2269: }
2270: VecRestoreArray(v,&x);
2271: return(0);
2272: }
2276: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2277: {
2281: /* If the two matrices have the same copy implementation, use fast copy. */
2282: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2283: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2284: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2286: if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2287: SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2288: }
2289: PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));
2290: } else {
2291: MatCopy_Basic(A,B,str);
2292: }
2293: return(0);
2294: }
2298: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2299: {
2303: MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);
2304: return(0);
2305: }
2309: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2310: {
2311: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2313: *array = a->a;
2314: return(0);
2315: }
2319: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2320: {
2322: return(0);
2323: }
2325: #include petscblaslapack.h
2328: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2329: {
2330: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2332: PetscInt i,bs=Y->rmap->bs,j,bs2;
2333: PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz);
2336: if (str == SAME_NONZERO_PATTERN) {
2337: PetscScalar alpha = a;
2338: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2339: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2340: if (y->xtoy && y->XtoY != X) {
2341: PetscFree(y->xtoy);
2342: MatDestroy(y->XtoY);
2343: }
2344: if (!y->xtoy) { /* get xtoy */
2345: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2346: y->XtoY = X;
2347: PetscObjectReference((PetscObject)X);
2348: }
2349: bs2 = bs*bs;
2350: for (i=0; i<x->nz; i++) {
2351: j = 0;
2352: while (j < bs2){
2353: y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2354: j++;
2355: }
2356: }
2357: PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2358: } else {
2359: MatAXPY_Basic(Y,a,X,str);
2360: }
2361: return(0);
2362: }
2366: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2367: {
2368: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2369: PetscInt i,nz = a->bs2*a->i[a->mbs];
2370: MatScalar *aa = a->a;
2373: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2374: return(0);
2375: }
2379: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2380: {
2381: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2382: PetscInt i,nz = a->bs2*a->i[a->mbs];
2383: MatScalar *aa = a->a;
2386: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2387: return(0);
2388: }
2391: /* -------------------------------------------------------------------*/
2392: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2393: MatGetRow_SeqBAIJ,
2394: MatRestoreRow_SeqBAIJ,
2395: MatMult_SeqBAIJ_N,
2396: /* 4*/ MatMultAdd_SeqBAIJ_N,
2397: MatMultTranspose_SeqBAIJ,
2398: MatMultTransposeAdd_SeqBAIJ,
2399: 0,
2400: 0,
2401: 0,
2402: /*10*/ 0,
2403: MatLUFactor_SeqBAIJ,
2404: 0,
2405: 0,
2406: MatTranspose_SeqBAIJ,
2407: /*15*/ MatGetInfo_SeqBAIJ,
2408: MatEqual_SeqBAIJ,
2409: MatGetDiagonal_SeqBAIJ,
2410: MatDiagonalScale_SeqBAIJ,
2411: MatNorm_SeqBAIJ,
2412: /*20*/ 0,
2413: MatAssemblyEnd_SeqBAIJ,
2414: 0,
2415: MatSetOption_SeqBAIJ,
2416: MatZeroEntries_SeqBAIJ,
2417: /*25*/ MatZeroRows_SeqBAIJ,
2418: 0,
2419: 0,
2420: 0,
2421: 0,
2422: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2423: 0,
2424: 0,
2425: MatGetArray_SeqBAIJ,
2426: MatRestoreArray_SeqBAIJ,
2427: /*35*/ MatDuplicate_SeqBAIJ,
2428: 0,
2429: 0,
2430: MatILUFactor_SeqBAIJ,
2431: 0,
2432: /*40*/ MatAXPY_SeqBAIJ,
2433: MatGetSubMatrices_SeqBAIJ,
2434: MatIncreaseOverlap_SeqBAIJ,
2435: MatGetValues_SeqBAIJ,
2436: MatCopy_SeqBAIJ,
2437: /*45*/ 0,
2438: MatScale_SeqBAIJ,
2439: 0,
2440: 0,
2441: 0,
2442: /*50*/ 0,
2443: MatGetRowIJ_SeqBAIJ,
2444: MatRestoreRowIJ_SeqBAIJ,
2445: 0,
2446: 0,
2447: /*55*/ 0,
2448: 0,
2449: 0,
2450: 0,
2451: MatSetValuesBlocked_SeqBAIJ,
2452: /*60*/ MatGetSubMatrix_SeqBAIJ,
2453: MatDestroy_SeqBAIJ,
2454: MatView_SeqBAIJ,
2455: 0,
2456: 0,
2457: /*65*/ 0,
2458: 0,
2459: 0,
2460: 0,
2461: 0,
2462: /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2463: 0,
2464: MatConvert_Basic,
2465: 0,
2466: 0,
2467: /*75*/ 0,
2468: 0,
2469: 0,
2470: 0,
2471: 0,
2472: /*80*/ 0,
2473: 0,
2474: 0,
2475: 0,
2476: MatLoad_SeqBAIJ,
2477: /*85*/ 0,
2478: 0,
2479: 0,
2480: 0,
2481: 0,
2482: /*90*/ 0,
2483: 0,
2484: 0,
2485: 0,
2486: 0,
2487: /*95*/ 0,
2488: 0,
2489: 0,
2490: 0,
2491: 0,
2492: /*100*/0,
2493: 0,
2494: 0,
2495: 0,
2496: 0,
2497: /*105*/0,
2498: MatRealPart_SeqBAIJ,
2499: MatImaginaryPart_SeqBAIJ,
2500: 0,
2501: 0,
2502: /*110*/0,
2503: 0,
2504: 0,
2505: 0,
2506: MatMissingDiagonal_SeqBAIJ
2507: /*115*/
2508: };
2513: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2514: {
2515: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2516: PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2520: if (aij->nonew != 1) {
2521: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2522: }
2524: /* allocate space for values if not already there */
2525: if (!aij->saved_values) {
2526: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2527: }
2529: /* copy values over */
2530: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2531: return(0);
2532: }
2538: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2539: {
2540: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2542: PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2545: if (aij->nonew != 1) {
2546: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2547: }
2548: if (!aij->saved_values) {
2549: SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2550: }
2552: /* copy values over */
2553: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2554: return(0);
2555: }
2566: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2567: {
2568: Mat_SeqBAIJ *b;
2570: PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2571: PetscTruth flg,skipallocation = PETSC_FALSE;
2575: if (nz == MAT_SKIP_ALLOCATION) {
2576: skipallocation = PETSC_TRUE;
2577: nz = 0;
2578: }
2580: if (bs < 0) {
2581: PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2582: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
2583: PetscOptionsEnd();
2584: bs = PetscAbs(bs);
2585: }
2586: if (nnz && newbs != bs) {
2587: SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2588: }
2589: bs = newbs;
2591: PetscMapSetBlockSize(B->rmap,bs);
2592: PetscMapSetBlockSize(B->cmap,bs);
2593: PetscMapSetUp(B->rmap);
2594: PetscMapSetUp(B->cmap);
2596: B->preallocated = PETSC_TRUE;
2598: mbs = B->rmap->n/bs;
2599: nbs = B->cmap->n/bs;
2600: bs2 = bs*bs;
2602: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2603: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2604: }
2606: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2607: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2608: if (nnz) {
2609: for (i=0; i<mbs; i++) {
2610: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2611: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2612: }
2613: }
2615: b = (Mat_SeqBAIJ*)B->data;
2616: PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2617: PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2618: PetscOptionsEnd();
2620: if (!flg) {
2621: switch (bs) {
2622: case 1:
2623: B->ops->mult = MatMult_SeqBAIJ_1;
2624: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2625: B->ops->pbrelax = MatPBRelax_SeqBAIJ_1;
2626: break;
2627: case 2:
2628: B->ops->mult = MatMult_SeqBAIJ_2;
2629: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2630: B->ops->pbrelax = MatPBRelax_SeqBAIJ_2;
2631: break;
2632: case 3:
2633: B->ops->mult = MatMult_SeqBAIJ_3;
2634: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2635: B->ops->pbrelax = MatPBRelax_SeqBAIJ_3;
2636: break;
2637: case 4:
2638: B->ops->mult = MatMult_SeqBAIJ_4;
2639: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2640: B->ops->pbrelax = MatPBRelax_SeqBAIJ_4;
2641: break;
2642: case 5:
2643: B->ops->mult = MatMult_SeqBAIJ_5;
2644: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2645: B->ops->pbrelax = MatPBRelax_SeqBAIJ_5;
2646: break;
2647: case 6:
2648: B->ops->mult = MatMult_SeqBAIJ_6;
2649: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2650: B->ops->pbrelax = MatPBRelax_SeqBAIJ_6;
2651: break;
2652: case 7:
2653: B->ops->mult = MatMult_SeqBAIJ_7;
2654: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2655: B->ops->pbrelax = MatPBRelax_SeqBAIJ_7;
2656: break;
2657: default:
2658: B->ops->mult = MatMult_SeqBAIJ_N;
2659: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2660: break;
2661: }
2662: }
2663: B->rmap->bs = bs;
2664: b->mbs = mbs;
2665: b->nbs = nbs;
2666: if (!skipallocation) {
2667: if (!b->imax) {
2668: PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2669: }
2670: /* b->ilen will count nonzeros in each block row so far. */
2671: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2672: if (!nnz) {
2673: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2674: else if (nz <= 0) nz = 1;
2675: for (i=0; i<mbs; i++) b->imax[i] = nz;
2676: nz = nz*mbs;
2677: } else {
2678: nz = 0;
2679: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2680: }
2682: /* allocate the matrix space */
2683: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2684: PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
2685: PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2686: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2687: PetscMemzero(b->j,nz*sizeof(PetscInt));
2688: b->singlemalloc = PETSC_TRUE;
2689: b->i[0] = 0;
2690: for (i=1; i<mbs+1; i++) {
2691: b->i[i] = b->i[i-1] + b->imax[i-1];
2692: }
2693: b->free_a = PETSC_TRUE;
2694: b->free_ij = PETSC_TRUE;
2695: } else {
2696: b->free_a = PETSC_FALSE;
2697: b->free_ij = PETSC_FALSE;
2698: }
2700: B->rmap->bs = bs;
2701: b->bs2 = bs2;
2702: b->mbs = mbs;
2703: b->nz = 0;
2704: b->maxnz = nz*bs2;
2705: B->info.nz_unneeded = (PetscReal)b->maxnz;
2706: return(0);
2707: }
2713: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2714: {
2715: PetscInt i,m,nz,nz_max=0,*nnz;
2716: PetscScalar *values=0;
2721: if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2723: PetscMapSetBlockSize(B->rmap,bs);
2724: PetscMapSetBlockSize(B->cmap,bs);
2725: PetscMapSetUp(B->rmap);
2726: PetscMapSetUp(B->cmap);
2727: m = B->rmap->n/bs;
2729: if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2730: PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
2731: for(i=0; i<m; i++) {
2732: nz = ii[i+1]- ii[i];
2733: if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2734: nz_max = PetscMax(nz_max, nz);
2735: nnz[i] = nz;
2736: }
2737: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2738: PetscFree(nnz);
2740: values = (PetscScalar*)V;
2741: if (!values) {
2742: PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
2743: PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
2744: }
2745: for (i=0; i<m; i++) {
2746: PetscInt ncols = ii[i+1] - ii[i];
2747: const PetscInt *icols = jj + ii[i];
2748: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2749: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2750: }
2751: if (!V) { PetscFree(values); }
2752: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2753: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2755: return(0);
2756: }
2765: /*MC
2766: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2767: block sparse compressed row format.
2769: Options Database Keys:
2770: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2772: Level: beginner
2774: .seealso: MatCreateSeqBAIJ()
2775: M*/
2781: PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2782: {
2784: PetscMPIInt size;
2785: Mat_SeqBAIJ *b;
2788: MPI_Comm_size(((PetscObject)B)->comm,&size);
2789: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2791: PetscNewLog(B,Mat_SeqBAIJ,&b);
2792: B->data = (void*)b;
2793: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2794: B->mapping = 0;
2795: b->row = 0;
2796: b->col = 0;
2797: b->icol = 0;
2798: b->reallocs = 0;
2799: b->saved_values = 0;
2801: b->roworiented = PETSC_TRUE;
2802: b->nonew = 0;
2803: b->diag = 0;
2804: b->solve_work = 0;
2805: b->mult_work = 0;
2806: B->spptr = 0;
2807: B->info.nz_unneeded = (PetscReal)b->maxnz;
2808: b->keepzeroedrows = PETSC_FALSE;
2809: b->xtoy = 0;
2810: b->XtoY = 0;
2811: b->compressedrow.use = PETSC_FALSE;
2812: b->compressedrow.nrows = 0;
2813: b->compressedrow.i = PETSC_NULL;
2814: b->compressedrow.rindex = PETSC_NULL;
2815: b->compressedrow.checked = PETSC_FALSE;
2816: B->same_nonzero = PETSC_FALSE;
2818: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2819: "MatGetFactorAvailable_seqbaij_petsc",
2820: MatGetFactorAvailable_seqbaij_petsc);
2821: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2822: "MatGetFactor_seqbaij_petsc",
2823: MatGetFactor_seqbaij_petsc);
2824: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2825: "MatInvertBlockDiagonal_SeqBAIJ",
2826: MatInvertBlockDiagonal_SeqBAIJ);
2827: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2828: "MatStoreValues_SeqBAIJ",
2829: MatStoreValues_SeqBAIJ);
2830: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2831: "MatRetrieveValues_SeqBAIJ",
2832: MatRetrieveValues_SeqBAIJ);
2833: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2834: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2835: MatSeqBAIJSetColumnIndices_SeqBAIJ);
2836: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2837: "MatConvert_SeqBAIJ_SeqAIJ",
2838: MatConvert_SeqBAIJ_SeqAIJ);
2839: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2840: "MatConvert_SeqBAIJ_SeqSBAIJ",
2841: MatConvert_SeqBAIJ_SeqSBAIJ);
2842: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2843: "MatSeqBAIJSetPreallocation_SeqBAIJ",
2844: MatSeqBAIJSetPreallocation_SeqBAIJ);
2845: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2846: "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2847: MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
2848: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2849: return(0);
2850: }
2855: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2856: {
2857: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2859: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2862: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2864: PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2865: for (i=0; i<mbs; i++) {
2866: c->imax[i] = a->imax[i];
2867: c->ilen[i] = a->ilen[i];
2868: }
2870: /* allocate the matrix space */
2871: PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2872: c->singlemalloc = PETSC_TRUE;
2873: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2874: if (mbs > 0) {
2875: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2876: if (cpvalues == MAT_COPY_VALUES) {
2877: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2878: } else {
2879: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2880: }
2881: }
2882: c->roworiented = a->roworiented;
2883: c->nonew = a->nonew;
2884: PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);
2885: PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);
2886: c->bs2 = a->bs2;
2887: c->mbs = a->mbs;
2888: c->nbs = a->nbs;
2890: if (a->diag) {
2891: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2892: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2893: for (i=0; i<mbs; i++) {
2894: c->diag[i] = a->diag[i];
2895: }
2896: } else c->diag = 0;
2897: c->nz = a->nz;
2898: c->maxnz = a->maxnz;
2899: c->solve_work = 0;
2900: c->mult_work = 0;
2901: c->free_a = PETSC_TRUE;
2902: c->free_ij = PETSC_TRUE;
2903: C->preallocated = PETSC_TRUE;
2904: C->assembled = PETSC_TRUE;
2906: c->compressedrow.use = a->compressedrow.use;
2907: c->compressedrow.nrows = a->compressedrow.nrows;
2908: c->compressedrow.checked = a->compressedrow.checked;
2909: if ( a->compressedrow.checked && a->compressedrow.use){
2910: i = a->compressedrow.nrows;
2911: PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2912: c->compressedrow.rindex = c->compressedrow.i + i + 1;
2913: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2914: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2915: } else {
2916: c->compressedrow.use = PETSC_FALSE;
2917: c->compressedrow.i = PETSC_NULL;
2918: c->compressedrow.rindex = PETSC_NULL;
2919: }
2920: C->same_nonzero = A->same_nonzero;
2921: PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2922: return(0);
2923: }
2927: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2928: {
2932: MatCreate(((PetscObject)A)->comm,B);
2933: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
2934: MatSetType(*B,MATSEQBAIJ);
2935: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2936: return(0);
2937: }
2941: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2942: {
2943: Mat_SeqBAIJ *a;
2944: Mat B;
2946: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
2947: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2948: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows;
2949: PetscInt *masked,nmask,tmp,bs2,ishift;
2950: PetscMPIInt size;
2951: int fd;
2952: PetscScalar *aa;
2953: MPI_Comm comm = ((PetscObject)viewer)->comm;
2956: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2957: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2958: PetscOptionsEnd();
2959: bs2 = bs*bs;
2961: MPI_Comm_size(comm,&size);
2962: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2963: PetscViewerBinaryGetDescriptor(viewer,&fd);
2964: PetscBinaryRead(fd,header,4,PETSC_INT);
2965: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2966: M = header[1]; N = header[2]; nz = header[3];
2968: if (header[3] < 0) {
2969: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2970: }
2972: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2974: /*
2975: This code adds extra rows to make sure the number of rows is
2976: divisible by the blocksize
2977: */
2978: mbs = M/bs;
2979: extra_rows = bs - M + bs*(mbs);
2980: if (extra_rows == bs) extra_rows = 0;
2981: else mbs++;
2982: if (extra_rows) {
2983: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2984: }
2986: /* read in row lengths */
2987: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2988: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2989: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2991: /* read in column indices */
2992: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2993: PetscBinaryRead(fd,jj,nz,PETSC_INT);
2994: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2996: /* loop over row lengths determining block row lengths */
2997: PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2998: PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2999: PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
3000: PetscMemzero(mask,mbs*sizeof(PetscInt));
3001: masked = mask + mbs;
3002: rowcount = 0; nzcount = 0;
3003: for (i=0; i<mbs; i++) {
3004: nmask = 0;
3005: for (j=0; j<bs; j++) {
3006: kmax = rowlengths[rowcount];
3007: for (k=0; k<kmax; k++) {
3008: tmp = jj[nzcount++]/bs;
3009: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3010: }
3011: rowcount++;
3012: }
3013: browlengths[i] += nmask;
3014: /* zero out the mask elements we set */
3015: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3016: }
3018: /* create our matrix */
3019: MatCreate(comm,&B);
3020: MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3021: MatSetType(B,type);
3022: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
3023: a = (Mat_SeqBAIJ*)B->data;
3025: /* set matrix "i" values */
3026: a->i[0] = 0;
3027: for (i=1; i<= mbs; i++) {
3028: a->i[i] = a->i[i-1] + browlengths[i-1];
3029: a->ilen[i-1] = browlengths[i-1];
3030: }
3031: a->nz = 0;
3032: for (i=0; i<mbs; i++) a->nz += browlengths[i];
3034: /* read in nonzero values */
3035: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3036: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3037: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3039: /* set "a" and "j" values into matrix */
3040: nzcount = 0; jcount = 0;
3041: for (i=0; i<mbs; i++) {
3042: nzcountb = nzcount;
3043: nmask = 0;
3044: for (j=0; j<bs; j++) {
3045: kmax = rowlengths[i*bs+j];
3046: for (k=0; k<kmax; k++) {
3047: tmp = jj[nzcount++]/bs;
3048: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3049: }
3050: }
3051: /* sort the masked values */
3052: PetscSortInt(nmask,masked);
3054: /* set "j" values into matrix */
3055: maskcount = 1;
3056: for (j=0; j<nmask; j++) {
3057: a->j[jcount++] = masked[j];
3058: mask[masked[j]] = maskcount++;
3059: }
3060: /* set "a" values into matrix */
3061: ishift = bs2*a->i[i];
3062: for (j=0; j<bs; j++) {
3063: kmax = rowlengths[i*bs+j];
3064: for (k=0; k<kmax; k++) {
3065: tmp = jj[nzcountb]/bs ;
3066: block = mask[tmp] - 1;
3067: point = jj[nzcountb] - bs*tmp;
3068: idx = ishift + bs2*block + j + bs*point;
3069: a->a[idx] = (MatScalar)aa[nzcountb++];
3070: }
3071: }
3072: /* zero out the mask elements we set */
3073: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3074: }
3075: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3077: PetscFree(rowlengths);
3078: PetscFree(browlengths);
3079: PetscFree(aa);
3080: PetscFree(jj);
3081: PetscFree(mask);
3083: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3084: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3085: MatView_Private(B);
3087: *A = B;
3088: return(0);
3089: }
3093: /*@C
3094: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3095: compressed row) format. For good matrix assembly performance the
3096: user should preallocate the matrix storage by setting the parameter nz
3097: (or the array nnz). By setting these parameters accurately, performance
3098: during matrix assembly can be increased by more than a factor of 50.
3100: Collective on MPI_Comm
3102: Input Parameters:
3103: + comm - MPI communicator, set to PETSC_COMM_SELF
3104: . bs - size of block
3105: . m - number of rows
3106: . n - number of columns
3107: . nz - number of nonzero blocks per block row (same for all rows)
3108: - nnz - array containing the number of nonzero blocks in the various block rows
3109: (possibly different for each block row) or PETSC_NULL
3111: Output Parameter:
3112: . A - the matrix
3114: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3115: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3116: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3118: Options Database Keys:
3119: . -mat_no_unroll - uses code that does not unroll the loops in the
3120: block calculations (much slower)
3121: . -mat_block_size - size of the blocks to use
3123: Level: intermediate
3125: Notes:
3126: The number of rows and columns must be divisible by blocksize.
3128: If the nnz parameter is given then the nz parameter is ignored
3130: A nonzero block is any block that as 1 or more nonzeros in it
3132: The block AIJ format is fully compatible with standard Fortran 77
3133: storage. That is, the stored row and column indices can begin at
3134: either one (as in Fortran) or zero. See the users' manual for details.
3136: Specify the preallocated storage with either nz or nnz (not both).
3137: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3138: allocation. For additional details, see the users manual chapter on
3139: matrices.
3141: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3142: @*/
3143: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3144: {
3146:
3148: MatCreate(comm,A);
3149: MatSetSizes(*A,m,n,m,n);
3150: MatSetType(*A,MATSEQBAIJ);
3151: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3152: return(0);
3153: }
3157: /*@C
3158: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3159: per row in the matrix. For good matrix assembly performance the
3160: user should preallocate the matrix storage by setting the parameter nz
3161: (or the array nnz). By setting these parameters accurately, performance
3162: during matrix assembly can be increased by more than a factor of 50.
3164: Collective on MPI_Comm
3166: Input Parameters:
3167: + A - the matrix
3168: . bs - size of block
3169: . nz - number of block nonzeros per block row (same for all rows)
3170: - nnz - array containing the number of block nonzeros in the various block rows
3171: (possibly different for each block row) or PETSC_NULL
3173: Options Database Keys:
3174: . -mat_no_unroll - uses code that does not unroll the loops in the
3175: block calculations (much slower)
3176: . -mat_block_size - size of the blocks to use
3178: Level: intermediate
3180: Notes:
3181: If the nnz parameter is given then the nz parameter is ignored
3183: You can call MatGetInfo() to get information on how effective the preallocation was;
3184: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3185: You can also run with the option -info and look for messages with the string
3186: malloc in them to see if additional memory allocation was needed.
3188: The block AIJ format is fully compatible with standard Fortran 77
3189: storage. That is, the stored row and column indices can begin at
3190: either one (as in Fortran) or zero. See the users' manual for details.
3192: Specify the preallocated storage with either nz or nnz (not both).
3193: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3194: allocation. For additional details, see the users manual chapter on
3195: matrices.
3197: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3198: @*/
3199: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3200: {
3201: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3204: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
3205: if (f) {
3206: (*f)(B,bs,nz,nnz);
3207: }
3208: return(0);
3209: }
3213: /*@C
3214: MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3215: (the default sequential PETSc format).
3217: Collective on MPI_Comm
3219: Input Parameters:
3220: + A - the matrix
3221: . i - the indices into j for the start of each local row (starts with zero)
3222: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3223: - v - optional values in the matrix
3225: Level: developer
3227: .keywords: matrix, aij, compressed row, sparse
3229: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3230: @*/
3231: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3232: {
3233: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3236: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);
3237: if (f) {
3238: (*f)(B,bs,i,j,v);
3239: }
3240: return(0);
3241: }
3246: /*@
3247: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3248: (upper triangular entries in CSR format) provided by the user.
3250: Collective on MPI_Comm
3252: Input Parameters:
3253: + comm - must be an MPI communicator of size 1
3254: . bs - size of block
3255: . m - number of rows
3256: . n - number of columns
3257: . i - row indices
3258: . j - column indices
3259: - a - matrix values
3261: Output Parameter:
3262: . mat - the matrix
3264: Level: intermediate
3266: Notes:
3267: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3268: once the matrix is destroyed
3270: You cannot set new nonzero locations into this matrix, that will generate an error.
3272: The i and j indices are 0 based
3274: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3276: @*/
3277: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3278: {
3280: PetscInt ii;
3281: Mat_SeqBAIJ *baij;
3284: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3285: if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3286:
3287: MatCreate(comm,mat);
3288: MatSetSizes(*mat,m,n,m,n);
3289: MatSetType(*mat,MATSEQBAIJ);
3290: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3291: baij = (Mat_SeqBAIJ*)(*mat)->data;
3292: PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
3294: baij->i = i;
3295: baij->j = j;
3296: baij->a = a;
3297: baij->singlemalloc = PETSC_FALSE;
3298: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3299: baij->free_a = PETSC_FALSE;
3300: baij->free_ij = PETSC_FALSE;
3302: for (ii=0; ii<m; ii++) {
3303: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3304: #if defined(PETSC_USE_DEBUG)
3305: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3306: #endif
3307: }
3308: #if defined(PETSC_USE_DEBUG)
3309: for (ii=0; ii<baij->i[m]; ii++) {
3310: if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3311: if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3312: }
3313: #endif
3315: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3316: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3317: return(0);
3318: }