Actual source code: sbaij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the SBAIJ (compressed row)
5: matrix storage format.
6: */
7: #include ../src/mat/impls/baij/seq/baij.h
8: #include ../src/inline/spops.h
9: #include ../src/mat/impls/sbaij/seq/sbaij.h
12: #define CHUNKSIZE 10
14: /*
15: Checks for missing diagonals
16: */
19: PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd)
20: {
21: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
23: PetscInt *diag,*jj = a->j,i;
26: MatMarkDiagonal_SeqSBAIJ(A);
27: diag = a->diag;
28: *missing = PETSC_FALSE;
29: for (i=0; i<a->mbs; i++) {
30: if (jj[diag[i]] != i) {
31: *missing = PETSC_TRUE;
32: if (dd) *dd = i;
33: break;
34: }
35: }
36: return(0);
37: }
41: PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
42: {
43: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
45: PetscInt i;
48: if (!a->diag) {
49: PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);
50: }
51: for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i];
52: return(0);
53: }
57: static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
58: {
59: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
60: PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs;
64: *nn = n;
65: if (!ia) return(0);
66: if (!blockcompressed) {
67: /* malloc & create the natural set of indices */
68: PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);
69: for (i=0; i<n+1; i++) {
70: for (j=0; j<bs; j++) {
71: *ia[i*bs+j] = a->i[i]*bs+j+oshift;
72: }
73: }
74: for (i=0; i<nz; i++) {
75: for (j=0; j<bs; j++) {
76: *ja[i*bs+j] = a->j[i]*bs+j+oshift;
77: }
78: }
79: } else { /* blockcompressed */
80: if (oshift == 1) {
81: /* temporarily add 1 to i and j indices */
82: for (i=0; i<nz; i++) a->j[i]++;
83: for (i=0; i<n+1; i++) a->i[i]++;
84: }
85: *ia = a->i; *ja = a->j;
86: }
88: return(0);
89: }
93: static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
94: {
95: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
96: PetscInt i,n = a->mbs,nz = a->i[n];
100: if (!ia) return(0);
102: if (!blockcompressed) {
103: PetscFree2(*ia,*ja);
104: } else if (oshift == 1) { /* blockcompressed */
105: for (i=0; i<nz; i++) a->j[i]--;
106: for (i=0; i<n+1; i++) a->i[i]--;
107: }
109: return(0);
110: }
114: PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
115: {
116: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
120: #if defined(PETSC_USE_LOG)
121: PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap->N,a->nz);
122: #endif
123: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
124: if (a->row) {ISDestroy(a->row);}
125: if (a->col){ISDestroy(a->col);}
126: if (a->icol) {ISDestroy(a->icol);}
127: PetscFree(a->diag);
128: PetscFree2(a->imax,a->ilen);
129: PetscFree(a->solve_work);
130: PetscFree(a->relax_work);
131: PetscFree(a->solves_work);
132: PetscFree(a->mult_work);
133: PetscFree(a->saved_values);
134: PetscFree(a->xtoy);
136: PetscFree(a->inew);
137: PetscFree(a);
139: PetscObjectChangeTypeName((PetscObject)A,0);
140: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
141: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
142: PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);
143: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);
144: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);
145: PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);
146: return(0);
147: }
151: PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg)
152: {
153: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
157: switch (op) {
158: case MAT_ROW_ORIENTED:
159: a->roworiented = flg;
160: break;
161: case MAT_KEEP_ZEROED_ROWS:
162: a->keepzeroedrows = flg;
163: break;
164: case MAT_NEW_NONZERO_LOCATIONS:
165: a->nonew = (flg ? 0 : 1);
166: break;
167: case MAT_NEW_NONZERO_LOCATION_ERR:
168: a->nonew = (flg ? -1 : 0);
169: break;
170: case MAT_NEW_NONZERO_ALLOCATION_ERR:
171: a->nonew = (flg ? -2 : 0);
172: break;
173: case MAT_UNUSED_NONZERO_LOCATION_ERR:
174: a->nounused = (flg ? -1 : 0);
175: break;
176: case MAT_NEW_DIAGONALS:
177: case MAT_IGNORE_OFF_PROC_ENTRIES:
178: case MAT_USE_HASH_TABLE:
179: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
180: break;
181: case MAT_HERMITIAN:
182: if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
183: case MAT_SYMMETRIC:
184: case MAT_STRUCTURALLY_SYMMETRIC:
185: case MAT_SYMMETRY_ETERNAL:
186: if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
187: PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);
188: break;
189: case MAT_IGNORE_LOWER_TRIANGULAR:
190: a->ignore_ltriangular = flg;
191: break;
192: case MAT_ERROR_LOWER_TRIANGULAR:
193: a->ignore_ltriangular = flg;
194: break;
195: case MAT_GETROW_UPPERTRIANGULAR:
196: a->getrow_utriangular = flg;
197: break;
198: default:
199: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
200: }
201: return(0);
202: }
206: PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
207: {
208: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
210: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
211: MatScalar *aa,*aa_i;
212: PetscScalar *v_i;
215: if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()");
216: /* Get the upper triangular part of the row */
217: bs = A->rmap->bs;
218: ai = a->i;
219: aj = a->j;
220: aa = a->a;
221: bs2 = a->bs2;
222:
223: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
224:
225: bn = row/bs; /* Block number */
226: bp = row % bs; /* Block position */
227: M = ai[bn+1] - ai[bn];
228: *ncols = bs*M;
229:
230: if (v) {
231: *v = 0;
232: if (*ncols) {
233: PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);
234: for (i=0; i<M; i++) { /* for each block in the block row */
235: v_i = *v + i*bs;
236: aa_i = aa + bs2*(ai[bn] + i);
237: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
238: }
239: }
240: }
241:
242: if (cols) {
243: *cols = 0;
244: if (*ncols) {
245: PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);
246: for (i=0; i<M; i++) { /* for each block in the block row */
247: cols_i = *cols + i*bs;
248: itmp = bs*aj[ai[bn] + i];
249: for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
250: }
251: }
252: }
253:
254: /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
255: /* this segment is currently removed, so only entries in the upper triangle are obtained */
256: #ifdef column_search
257: v_i = *v + M*bs;
258: cols_i = *cols + M*bs;
259: for (i=0; i<bn; i++){ /* for each block row */
260: M = ai[i+1] - ai[i];
261: for (j=0; j<M; j++){
262: itmp = aj[ai[i] + j]; /* block column value */
263: if (itmp == bn){
264: aa_i = aa + bs2*(ai[i] + j) + bs*bp;
265: for (k=0; k<bs; k++) {
266: *cols_i++ = i*bs+k;
267: *v_i++ = aa_i[k];
268: }
269: *ncols += bs;
270: break;
271: }
272: }
273: }
274: #endif
275: return(0);
276: }
280: PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
281: {
283:
285: if (idx) {PetscFree(*idx);}
286: if (v) {PetscFree(*v);}
287: return(0);
288: }
292: PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A)
293: {
294: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
297: a->getrow_utriangular = PETSC_TRUE;
298: return(0);
299: }
302: PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A)
303: {
304: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
307: a->getrow_utriangular = PETSC_FALSE;
308: return(0);
309: }
313: PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,MatReuse reuse,Mat *B)
314: {
317: if (reuse == MAT_INITIAL_MATRIX || *B != A) {
318: MatDuplicate(A,MAT_COPY_VALUES,B);
319: }
320: return(0);
321: }
325: static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
326: {
327: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
328: PetscErrorCode ierr;
329: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
330: const char *name;
331: PetscViewerFormat format;
332:
334: PetscObjectGetName((PetscObject)A,&name);
335: PetscViewerGetFormat(viewer,&format);
336: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
337: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
338: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
339: Mat aij;
341: if (A->factor && bs>1){
342: PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");
343: return(0);
344: }
345: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
346: MatView(aij,viewer);
347: MatDestroy(aij);
348: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
349: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
350: for (i=0; i<a->mbs; i++) {
351: for (j=0; j<bs; j++) {
352: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
353: for (k=a->i[i]; k<a->i[i+1]; k++) {
354: for (l=0; l<bs; l++) {
355: #if defined(PETSC_USE_COMPLEX)
356: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
357: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
358: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
359: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
360: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
361: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
362: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
363: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
364: }
365: #else
366: if (a->a[bs2*k + l*bs + j] != 0.0) {
367: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
368: }
369: #endif
370: }
371: }
372: PetscViewerASCIIPrintf(viewer,"\n");
373: }
374: }
375: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
376: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
377: return(0);
378: } else {
379: if (A->factor && bs>1){
380: PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");
381: }
382: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
383: for (i=0; i<a->mbs; i++) {
384: for (j=0; j<bs; j++) {
385: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
386: for (k=a->i[i]; k<a->i[i+1]; k++) {
387: for (l=0; l<bs; l++) {
388: #if defined(PETSC_USE_COMPLEX)
389: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
390: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
391: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
392: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
393: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
394: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
395: } else {
396: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
397: }
398: #else
399: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
400: #endif
401: }
402: }
403: PetscViewerASCIIPrintf(viewer,"\n");
404: }
405: }
406: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
407: }
408: PetscViewerFlush(viewer);
409: return(0);
410: }
414: static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
415: {
416: Mat A = (Mat) Aa;
417: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
419: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
420: PetscMPIInt rank;
421: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
422: MatScalar *aa;
423: MPI_Comm comm;
424: PetscViewer viewer;
425:
427: /*
428: This is nasty. If this is called from an originally parallel matrix
429: then all processes call this,but only the first has the matrix so the
430: rest should return immediately.
431: */
432: PetscObjectGetComm((PetscObject)draw,&comm);
433: MPI_Comm_rank(comm,&rank);
434: if (rank) return(0);
435:
436: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
437:
438: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
439: PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
440:
441: /* loop over matrix elements drawing boxes */
442: color = PETSC_DRAW_BLUE;
443: for (i=0,row=0; i<mbs; i++,row+=bs) {
444: for (j=a->i[i]; j<a->i[i+1]; j++) {
445: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
446: x_l = a->j[j]*bs; x_r = x_l + 1.0;
447: aa = a->a + j*bs2;
448: for (k=0; k<bs; k++) {
449: for (l=0; l<bs; l++) {
450: if (PetscRealPart(*aa++) >= 0.) continue;
451: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
452: }
453: }
454: }
455: }
456: color = PETSC_DRAW_CYAN;
457: for (i=0,row=0; i<mbs; i++,row+=bs) {
458: for (j=a->i[i]; j<a->i[i+1]; j++) {
459: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
460: x_l = a->j[j]*bs; x_r = x_l + 1.0;
461: aa = a->a + j*bs2;
462: for (k=0; k<bs; k++) {
463: for (l=0; l<bs; l++) {
464: if (PetscRealPart(*aa++) != 0.) continue;
465: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
466: }
467: }
468: }
469: }
470:
471: color = PETSC_DRAW_RED;
472: for (i=0,row=0; i<mbs; i++,row+=bs) {
473: for (j=a->i[i]; j<a->i[i+1]; j++) {
474: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
475: x_l = a->j[j]*bs; x_r = x_l + 1.0;
476: aa = a->a + j*bs2;
477: for (k=0; k<bs; k++) {
478: for (l=0; l<bs; l++) {
479: if (PetscRealPart(*aa++) <= 0.) continue;
480: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
481: }
482: }
483: }
484: }
485: return(0);
486: }
490: static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
491: {
493: PetscReal xl,yl,xr,yr,w,h;
494: PetscDraw draw;
495: PetscTruth isnull;
496:
498: PetscViewerDrawGetDraw(viewer,0,&draw);
499: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
500:
501: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
502: xr = A->rmap->N; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
503: xr += w; yr += h; xl = -w; yl = -h;
504: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
505: PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);
506: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
507: return(0);
508: }
512: PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
513: {
515: PetscTruth iascii,isdraw;
516: FILE *file = 0;
519: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
520: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
521: if (iascii){
522: MatView_SeqSBAIJ_ASCII(A,viewer);
523: } else if (isdraw) {
524: MatView_SeqSBAIJ_Draw(A,viewer);
525: } else {
526: Mat B;
527: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
528: MatView(B,viewer);
529: MatDestroy(B);
530: PetscViewerBinaryGetInfoPointer(viewer,&file);
531: if (file) {
532: fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
533: }
534: }
535: return(0);
536: }
541: PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
542: {
543: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
544: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
545: PetscInt *ai = a->i,*ailen = a->ilen;
546: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
547: MatScalar *ap,*aa = a->a;
548:
550: for (k=0; k<m; k++) { /* loop over rows */
551: row = im[k]; brow = row/bs;
552: if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */
553: if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
554: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
555: nrow = ailen[brow];
556: for (l=0; l<n; l++) { /* loop over columns */
557: if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */
558: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
559: col = in[l] ;
560: bcol = col/bs;
561: cidx = col%bs;
562: ridx = row%bs;
563: high = nrow;
564: low = 0; /* assume unsorted */
565: while (high-low > 5) {
566: t = (low+high)/2;
567: if (rp[t] > bcol) high = t;
568: else low = t;
569: }
570: for (i=low; i<high; i++) {
571: if (rp[i] > bcol) break;
572: if (rp[i] == bcol) {
573: *v++ = ap[bs2*i+bs*cidx+ridx];
574: goto finished;
575: }
576: }
577: *v++ = 0.0;
578: finished:;
579: }
580: }
581: return(0);
582: }
587: PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
588: {
589: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
590: PetscErrorCode ierr;
591: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
592: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
593: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
594: PetscTruth roworiented=a->roworiented;
595: const PetscScalar *value = v;
596: MatScalar *ap,*aa = a->a,*bap;
597:
599: if (roworiented) {
600: stepval = (n-1)*bs;
601: } else {
602: stepval = (m-1)*bs;
603: }
604: for (k=0; k<m; k++) { /* loop over added rows */
605: row = im[k];
606: if (row < 0) continue;
607: #if defined(PETSC_USE_DEBUG)
608: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
609: #endif
610: rp = aj + ai[row];
611: ap = aa + bs2*ai[row];
612: rmax = imax[row];
613: nrow = ailen[row];
614: low = 0;
615: high = nrow;
616: for (l=0; l<n; l++) { /* loop over added columns */
617: if (in[l] < 0) continue;
618: col = in[l];
619: #if defined(PETSC_USE_DEBUG)
620: if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
621: #endif
622: if (col < row) continue; /* ignore lower triangular block */
623: if (roworiented) {
624: value = v + k*(stepval+bs)*bs + l*bs;
625: } else {
626: value = v + l*(stepval+bs)*bs + k*bs;
627: }
628: if (col <= lastcol) low = 0; else high = nrow;
629: lastcol = col;
630: while (high-low > 7) {
631: t = (low+high)/2;
632: if (rp[t] > col) high = t;
633: else low = t;
634: }
635: for (i=low; i<high; i++) {
636: if (rp[i] > col) break;
637: if (rp[i] == col) {
638: bap = ap + bs2*i;
639: if (roworiented) {
640: if (is == ADD_VALUES) {
641: for (ii=0; ii<bs; ii++,value+=stepval) {
642: for (jj=ii; jj<bs2; jj+=bs) {
643: bap[jj] += *value++;
644: }
645: }
646: } else {
647: for (ii=0; ii<bs; ii++,value+=stepval) {
648: for (jj=ii; jj<bs2; jj+=bs) {
649: bap[jj] = *value++;
650: }
651: }
652: }
653: } else {
654: if (is == ADD_VALUES) {
655: for (ii=0; ii<bs; ii++,value+=stepval) {
656: for (jj=0; jj<bs; jj++) {
657: *bap++ += *value++;
658: }
659: }
660: } else {
661: for (ii=0; ii<bs; ii++,value+=stepval) {
662: for (jj=0; jj<bs; jj++) {
663: *bap++ = *value++;
664: }
665: }
666: }
667: }
668: goto noinsert2;
669: }
670: }
671: if (nonew == 1) goto noinsert2;
672: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
673: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
674: N = nrow++ - 1; high++;
675: /* shift up all the later entries in this row */
676: for (ii=N; ii>=i; ii--) {
677: rp[ii+1] = rp[ii];
678: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
679: }
680: if (N >= i) {
681: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
682: }
683: rp[i] = col;
684: bap = ap + bs2*i;
685: if (roworiented) {
686: for (ii=0; ii<bs; ii++,value+=stepval) {
687: for (jj=ii; jj<bs2; jj+=bs) {
688: bap[jj] = *value++;
689: }
690: }
691: } else {
692: for (ii=0; ii<bs; ii++,value+=stepval) {
693: for (jj=0; jj<bs; jj++) {
694: *bap++ = *value++;
695: }
696: }
697: }
698: noinsert2:;
699: low = i;
700: }
701: ailen[row] = nrow;
702: }
703: return(0);
704: }
708: PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
709: {
710: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
712: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
713: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
714: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
715: MatScalar *aa = a->a,*ap;
716:
718: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
719:
720: if (m) rmax = ailen[0];
721: for (i=1; i<mbs; i++) {
722: /* move each row back by the amount of empty slots (fshift) before it*/
723: fshift += imax[i-1] - ailen[i-1];
724: rmax = PetscMax(rmax,ailen[i]);
725: if (fshift) {
726: ip = aj + ai[i]; ap = aa + bs2*ai[i];
727: N = ailen[i];
728: for (j=0; j<N; j++) {
729: ip[j-fshift] = ip[j];
730: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
731: }
732: }
733: ai[i] = ai[i-1] + ailen[i-1];
734: }
735: if (mbs) {
736: fshift += imax[mbs-1] - ailen[mbs-1];
737: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
738: }
739: /* reset ilen and imax for each row */
740: for (i=0; i<mbs; i++) {
741: ailen[i] = imax[i] = ai[i+1] - ai[i];
742: }
743: a->nz = ai[mbs];
744:
745: /* diagonals may have moved, reset it */
746: if (a->diag) {
747: PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));
748: }
749: if (fshift && a->nounused == -1) {
750: SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
751: }
752: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap->N,A->rmap->bs,fshift*bs2,a->nz*bs2);
753: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
754: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
755: a->reallocs = 0;
756: A->info.nz_unneeded = (PetscReal)fshift*bs2;
757: return(0);
758: }
760: /*
761: This function returns an array of flags which indicate the locations of contiguous
762: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
763: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
764: Assume: sizes should be long enough to hold all the values.
765: */
768: PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
769: {
770: PetscInt i,j,k,row;
771: PetscTruth flg;
772:
774: for (i=0,j=0; i<n; j++) {
775: row = idx[i];
776: if (row%bs!=0) { /* Not the begining of a block */
777: sizes[j] = 1;
778: i++;
779: } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
780: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
781: i++;
782: } else { /* Begining of the block, so check if the complete block exists */
783: flg = PETSC_TRUE;
784: for (k=1; k<bs; k++) {
785: if (row+k != idx[i+k]) { /* break in the block */
786: flg = PETSC_FALSE;
787: break;
788: }
789: }
790: if (flg) { /* No break in the bs */
791: sizes[j] = bs;
792: i+= bs;
793: } else {
794: sizes[j] = 1;
795: i++;
796: }
797: }
798: }
799: *bs_max = j;
800: return(0);
801: }
804: /* Only add/insert a(i,j) with i<=j (blocks).
805: Any a(i,j) with i>j input by user is ingored.
806: */
810: PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
811: {
812: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
814: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
815: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
816: PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
817: PetscInt ridx,cidx,bs2=a->bs2;
818: MatScalar *ap,value,*aa=a->a,*bap;
819:
821: for (k=0; k<m; k++) { /* loop over added rows */
822: row = im[k]; /* row number */
823: brow = row/bs; /* block row number */
824: if (row < 0) continue;
825: #if defined(PETSC_USE_DEBUG)
826: if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
827: #endif
828: rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
829: ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
830: rmax = imax[brow]; /* maximum space allocated for this row */
831: nrow = ailen[brow]; /* actual length of this row */
832: low = 0;
833:
834: for (l=0; l<n; l++) { /* loop over added columns */
835: if (in[l] < 0) continue;
836: #if defined(PETSC_USE_DEBUG)
837: if (in[l] >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap->N-1);
838: #endif
839: col = in[l];
840: bcol = col/bs; /* block col number */
841:
842: if (brow > bcol) {
843: if (a->ignore_ltriangular){
844: continue; /* ignore lower triangular values */
845: } else {
846: SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
847: }
848: }
849:
850: ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
851: if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
852: /* element value a(k,l) */
853: if (roworiented) {
854: value = v[l + k*n];
855: } else {
856: value = v[k + l*m];
857: }
858:
859: /* move pointer bap to a(k,l) quickly and add/insert value */
860: if (col <= lastcol) low = 0; high = nrow;
861: lastcol = col;
862: while (high-low > 7) {
863: t = (low+high)/2;
864: if (rp[t] > bcol) high = t;
865: else low = t;
866: }
867: for (i=low; i<high; i++) {
868: if (rp[i] > bcol) break;
869: if (rp[i] == bcol) {
870: bap = ap + bs2*i + bs*cidx + ridx;
871: if (is == ADD_VALUES) *bap += value;
872: else *bap = value;
873: /* for diag block, add/insert its symmetric element a(cidx,ridx) */
874: if (brow == bcol && ridx < cidx){
875: bap = ap + bs2*i + bs*ridx + cidx;
876: if (is == ADD_VALUES) *bap += value;
877: else *bap = value;
878: }
879: goto noinsert1;
880: }
881: }
882:
883: if (nonew == 1) goto noinsert1;
884: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
885: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
886:
887: N = nrow++ - 1; high++;
888: /* shift up all the later entries in this row */
889: for (ii=N; ii>=i; ii--) {
890: rp[ii+1] = rp[ii];
891: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
892: }
893: if (N>=i) {
894: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
895: }
896: rp[i] = bcol;
897: ap[bs2*i + bs*cidx + ridx] = value;
898: noinsert1:;
899: low = i;
900: }
901: } /* end of loop over added columns */
902: ailen[brow] = nrow;
903: } /* end of loop over added rows */
904: return(0);
905: }
909: PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,const MatFactorInfo *info)
910: {
911: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
912: Mat outA;
914: PetscTruth row_identity;
915:
917: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc");
918: ISIdentity(row,&row_identity);
919: if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported");
920: if (inA->rmap->bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap->bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */
922: outA = inA;
923: inA->factor = MAT_FACTOR_ICC;
924:
925: MatMarkDiagonal_SeqSBAIJ(inA);
926: MatSeqSBAIJSetNumericFactorization(inA,row_identity);
928: PetscObjectReference((PetscObject)row);
929: if (a->row) { ISDestroy(a->row); }
930: a->row = row;
931: PetscObjectReference((PetscObject)row);
932: if (a->col) { ISDestroy(a->col); }
933: a->col = row;
934:
935: /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */
936: if (a->icol) {ISInvertPermutation(row,PETSC_DECIDE, &a->icol);}
937: PetscLogObjectParent(inA,a->icol);
938:
939: if (!a->solve_work) {
940: PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
941: PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
942: }
943:
944: MatCholeskyFactorNumeric(outA,inA,info);
945: return(0);
946: }
951: PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
952: {
953: Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
954: PetscInt i,nz,n;
955:
957: nz = baij->maxnz;
958: n = mat->cmap->n;
959: for (i=0; i<nz; i++) {
960: baij->j[i] = indices[i];
961: }
962: baij->nz = nz;
963: for (i=0; i<n; i++) {
964: baij->ilen[i] = baij->imax[i];
965: }
966: return(0);
967: }
972: /*@
973: MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
974: in the matrix.
975:
976: Input Parameters:
977: + mat - the SeqSBAIJ matrix
978: - indices - the column indices
979:
980: Level: advanced
981:
982: Notes:
983: This can be called if you have precomputed the nonzero structure of the
984: matrix and want to provide it to the matrix object to improve the performance
985: of the MatSetValues() operation.
986:
987: You MUST have set the correct numbers of nonzeros per row in the call to
988: MatCreateSeqSBAIJ(), and the columns indices MUST be sorted.
989:
990: MUST be called before any calls to MatSetValues()
991:
992: .seealso: MatCreateSeqSBAIJ
993: @*/
994: PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
995: {
996: PetscErrorCode ierr,(*f)(Mat,PetscInt *);
997:
1001: PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);
1002: if (f) {
1003: (*f)(mat,indices);
1004: } else {
1005: SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1006: }
1007: return(0);
1008: }
1012: PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str)
1013: {
1017: /* If the two matrices have the same copy implementation, use fast copy. */
1018: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1019: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1020: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1022: if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
1023: SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1024: }
1025: PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));
1026: } else {
1027: MatGetRowUpperTriangular(A);
1028: MatCopy_Basic(A,B,str);
1029: MatRestoreRowUpperTriangular(A);
1030: }
1031: return(0);
1032: }
1036: PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1037: {
1039:
1041: MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);
1042: return(0);
1043: }
1047: PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1048: {
1049: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1051: *array = a->a;
1052: return(0);
1053: }
1057: PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1058: {
1060: return(0);
1061: }
1063: #include petscblaslapack.h
1066: PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1067: {
1068: Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1070: PetscInt i,bs=Y->rmap->bs,bs2,j;
1071: PetscBLASInt one = 1,bnz = PetscBLASIntCast(x->nz);
1072:
1074: if (str == SAME_NONZERO_PATTERN) {
1075: PetscScalar alpha = a;
1076: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1077: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1078: if (y->xtoy && y->XtoY != X) {
1079: PetscFree(y->xtoy);
1080: MatDestroy(y->XtoY);
1081: }
1082: if (!y->xtoy) { /* get xtoy */
1083: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1084: y->XtoY = X;
1085: }
1086: bs2 = bs*bs;
1087: for (i=0; i<x->nz; i++) {
1088: j = 0;
1089: while (j < bs2){
1090: y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1091: j++;
1092: }
1093: }
1094: PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1095: } else {
1096: MatGetRowUpperTriangular(X);
1097: MatAXPY_Basic(Y,a,X,str);
1098: MatRestoreRowUpperTriangular(X);
1099: }
1100: return(0);
1101: }
1105: PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1106: {
1108: *flg = PETSC_TRUE;
1109: return(0);
1110: }
1114: PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1115: {
1117: *flg = PETSC_TRUE;
1118: return(0);
1119: }
1123: PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1124: {
1126: *flg = PETSC_FALSE;
1127: return(0);
1128: }
1132: PetscErrorCode MatRealPart_SeqSBAIJ(Mat A)
1133: {
1134: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1135: PetscInt i,nz = a->bs2*a->i[a->mbs];
1136: MatScalar *aa = a->a;
1139: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
1140: return(0);
1141: }
1145: PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A)
1146: {
1147: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1148: PetscInt i,nz = a->bs2*a->i[a->mbs];
1149: MatScalar *aa = a->a;
1152: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
1153: return(0);
1154: }
1156: /* -------------------------------------------------------------------*/
1157: static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1158: MatGetRow_SeqSBAIJ,
1159: MatRestoreRow_SeqSBAIJ,
1160: MatMult_SeqSBAIJ_N,
1161: /* 4*/ MatMultAdd_SeqSBAIJ_N,
1162: MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */
1163: MatMultAdd_SeqSBAIJ_N,
1164: 0,
1165: 0,
1166: 0,
1167: /*10*/ 0,
1168: 0,
1169: MatCholeskyFactor_SeqSBAIJ,
1170: MatRelax_SeqSBAIJ,
1171: MatTranspose_SeqSBAIJ,
1172: /*15*/ MatGetInfo_SeqSBAIJ,
1173: MatEqual_SeqSBAIJ,
1174: MatGetDiagonal_SeqSBAIJ,
1175: MatDiagonalScale_SeqSBAIJ,
1176: MatNorm_SeqSBAIJ,
1177: /*20*/ 0,
1178: MatAssemblyEnd_SeqSBAIJ,
1179: 0,
1180: MatSetOption_SeqSBAIJ,
1181: MatZeroEntries_SeqSBAIJ,
1182: /*25*/ 0,
1183: 0,
1184: 0,
1185: 0,
1186: 0,
1187: /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1188: 0,
1189: 0,
1190: MatGetArray_SeqSBAIJ,
1191: MatRestoreArray_SeqSBAIJ,
1192: /*35*/ MatDuplicate_SeqSBAIJ,
1193: 0,
1194: 0,
1195: 0,
1196: MatICCFactor_SeqSBAIJ,
1197: /*40*/ MatAXPY_SeqSBAIJ,
1198: MatGetSubMatrices_SeqSBAIJ,
1199: MatIncreaseOverlap_SeqSBAIJ,
1200: MatGetValues_SeqSBAIJ,
1201: MatCopy_SeqSBAIJ,
1202: /*45*/ 0,
1203: MatScale_SeqSBAIJ,
1204: 0,
1205: 0,
1206: 0,
1207: /*50*/ 0,
1208: MatGetRowIJ_SeqSBAIJ,
1209: MatRestoreRowIJ_SeqSBAIJ,
1210: 0,
1211: 0,
1212: /*55*/ 0,
1213: 0,
1214: 0,
1215: 0,
1216: MatSetValuesBlocked_SeqSBAIJ,
1217: /*60*/ MatGetSubMatrix_SeqSBAIJ,
1218: 0,
1219: 0,
1220: 0,
1221: 0,
1222: /*65*/ 0,
1223: 0,
1224: 0,
1225: 0,
1226: 0,
1227: /*70*/ MatGetRowMaxAbs_SeqSBAIJ,
1228: 0,
1229: 0,
1230: 0,
1231: 0,
1232: /*75*/ 0,
1233: 0,
1234: 0,
1235: 0,
1236: 0,
1237: /*80*/ 0,
1238: 0,
1239: 0,
1240: #if !defined(PETSC_USE_COMPLEX)
1241: MatGetInertia_SeqSBAIJ,
1242: #else
1243: 0,
1244: #endif
1245: MatLoad_SeqSBAIJ,
1246: /*85*/ MatIsSymmetric_SeqSBAIJ,
1247: MatIsHermitian_SeqSBAIJ,
1248: MatIsStructurallySymmetric_SeqSBAIJ,
1249: 0,
1250: 0,
1251: /*90*/ 0,
1252: 0,
1253: 0,
1254: 0,
1255: 0,
1256: /*95*/ 0,
1257: 0,
1258: 0,
1259: 0,
1260: 0,
1261: /*100*/0,
1262: 0,
1263: 0,
1264: 0,
1265: 0,
1266: /*105*/0,
1267: MatRealPart_SeqSBAIJ,
1268: MatImaginaryPart_SeqSBAIJ,
1269: MatGetRowUpperTriangular_SeqSBAIJ,
1270: MatRestoreRowUpperTriangular_SeqSBAIJ,
1271: /*110*/0,
1272: 0,
1273: 0,
1274: 0,
1275: MatMissingDiagonal_SeqSBAIJ
1276: };
1281: PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1282: {
1283: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1284: PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
1286:
1288: if (aij->nonew != 1) {
1289: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1290: }
1291:
1292: /* allocate space for values if not already there */
1293: if (!aij->saved_values) {
1294: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1295: }
1296:
1297: /* copy values over */
1298: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1299: return(0);
1300: }
1306: PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1307: {
1308: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1310: PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
1311:
1313: if (aij->nonew != 1) {
1314: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1315: }
1316: if (!aij->saved_values) {
1317: SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1318: }
1319:
1320: /* copy values over */
1321: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1322: return(0);
1323: }
1329: PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1330: {
1331: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1333: PetscInt i,mbs,bs2, newbs = PetscAbs(bs);
1334: PetscTruth skipallocation = PETSC_FALSE,flg;
1335:
1337: B->preallocated = PETSC_TRUE;
1338: if (bs < 0) {
1339: PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");
1340: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqSBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
1341: PetscOptionsEnd();
1342: bs = PetscAbs(bs);
1343: }
1344: if (nnz && newbs != bs) {
1345: SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
1346: }
1347: bs = newbs;
1349: PetscMapSetBlockSize(B->rmap,bs);
1350: PetscMapSetBlockSize(B->cmap,bs);
1351: PetscMapSetUp(B->rmap);
1352: PetscMapSetUp(B->cmap);
1354: mbs = B->rmap->N/bs;
1355: bs2 = bs*bs;
1356:
1357: if (mbs*bs != B->rmap->N) {
1358: SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1359: }
1360:
1361: if (nz == MAT_SKIP_ALLOCATION) {
1362: skipallocation = PETSC_TRUE;
1363: nz = 0;
1364: }
1366: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1367: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1368: if (nnz) {
1369: for (i=0; i<mbs; i++) {
1370: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1371: if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1372: }
1373: }
1374:
1375: B->ops->mult = MatMult_SeqSBAIJ_N;
1376: B->ops->multadd = MatMultAdd_SeqSBAIJ_N;
1377: B->ops->multtranspose = MatMult_SeqSBAIJ_N;
1378: B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_N;
1379: PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_no_unroll",&flg);
1380: if (!flg) {
1381: switch (bs) {
1382: case 1:
1383: B->ops->mult = MatMult_SeqSBAIJ_1;
1384: B->ops->multadd = MatMultAdd_SeqSBAIJ_1;
1385: B->ops->multtranspose = MatMult_SeqSBAIJ_1;
1386: B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1;
1387: break;
1388: case 2:
1389: B->ops->mult = MatMult_SeqSBAIJ_2;
1390: B->ops->multadd = MatMultAdd_SeqSBAIJ_2;
1391: B->ops->multtranspose = MatMult_SeqSBAIJ_2;
1392: B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2;
1393: break;
1394: case 3:
1395: B->ops->mult = MatMult_SeqSBAIJ_3;
1396: B->ops->multadd = MatMultAdd_SeqSBAIJ_3;
1397: B->ops->multtranspose = MatMult_SeqSBAIJ_3;
1398: B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3;
1399: break;
1400: case 4:
1401: B->ops->mult = MatMult_SeqSBAIJ_4;
1402: B->ops->multadd = MatMultAdd_SeqSBAIJ_4;
1403: B->ops->multtranspose = MatMult_SeqSBAIJ_4;
1404: B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4;
1405: break;
1406: case 5:
1407: B->ops->mult = MatMult_SeqSBAIJ_5;
1408: B->ops->multadd = MatMultAdd_SeqSBAIJ_5;
1409: B->ops->multtranspose = MatMult_SeqSBAIJ_5;
1410: B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5;
1411: break;
1412: case 6:
1413: B->ops->mult = MatMult_SeqSBAIJ_6;
1414: B->ops->multadd = MatMultAdd_SeqSBAIJ_6;
1415: B->ops->multtranspose = MatMult_SeqSBAIJ_6;
1416: B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6;
1417: break;
1418: case 7:
1419: B->ops->mult = MatMult_SeqSBAIJ_7;
1420: B->ops->multadd = MatMultAdd_SeqSBAIJ_7;
1421: B->ops->multtranspose = MatMult_SeqSBAIJ_7;
1422: B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7;
1423: break;
1424: }
1425: }
1426:
1427: b->mbs = mbs;
1428: b->nbs = mbs;
1429: if (!skipallocation) {
1430: if (!b->imax) {
1431: PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
1432: PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
1433: }
1434: if (!nnz) {
1435: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1436: else if (nz <= 0) nz = 1;
1437: for (i=0; i<mbs; i++) {
1438: b->imax[i] = nz;
1439: }
1440: nz = nz*mbs; /* total nz */
1441: } else {
1442: nz = 0;
1443: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1444: }
1445: /* b->ilen will count nonzeros in each block row so far. */
1446: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1447: /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1448:
1449: /* allocate the matrix space */
1450: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
1451: PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
1452: PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
1453: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1454: PetscMemzero(b->j,nz*sizeof(PetscInt));
1455: b->singlemalloc = PETSC_TRUE;
1456:
1457: /* pointer to beginning of each row */
1458: b->i[0] = 0;
1459: for (i=1; i<mbs+1; i++) {
1460: b->i[i] = b->i[i-1] + b->imax[i-1];
1461: }
1462: b->free_a = PETSC_TRUE;
1463: b->free_ij = PETSC_TRUE;
1464: } else {
1465: b->free_a = PETSC_FALSE;
1466: b->free_ij = PETSC_FALSE;
1467: }
1468:
1469: B->rmap->bs = bs;
1470: b->bs2 = bs2;
1471: b->nz = 0;
1472: b->maxnz = nz*bs2;
1473:
1474: b->inew = 0;
1475: b->jnew = 0;
1476: b->anew = 0;
1477: b->a2anew = 0;
1478: b->permute = PETSC_FALSE;
1479: return(0);
1480: }
1485: /*
1486: This is used to set the numeric factorization for both Cholesky and ICC symbolic factorization
1487: */
1488: PetscErrorCode MatSeqSBAIJSetNumericFactorization(Mat B,PetscTruth natural)
1489: {
1491: PetscTruth flg;
1492: PetscInt bs = B->rmap->bs;
1495: PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_no_unroll",&flg);
1496: if (flg) bs = 8;
1498: if (!natural) {
1499: switch (bs) {
1500: case 1:
1501: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1502: break;
1503: case 2:
1504: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1505: break;
1506: case 3:
1507: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1508: break;
1509: case 4:
1510: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1511: break;
1512: case 5:
1513: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1514: break;
1515: case 6:
1516: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1517: break;
1518: case 7:
1519: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1520: break;
1521: default:
1522: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N;
1523: break;
1524: }
1525: } else {
1526: switch (bs) {
1527: case 1:
1528: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1529: break;
1530: case 2:
1531: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1532: break;
1533: case 3:
1534: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1535: break;
1536: case 4:
1537: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1538: break;
1539: case 5:
1540: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1541: break;
1542: case 6:
1543: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1544: break;
1545: case 7:
1546: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1547: break;
1548: default:
1549: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1550: break;
1551: }
1552: }
1553: return(0);
1554: }
1557: EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
1558: EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*);
1561:
1565: PetscErrorCode MatGetFactor_seqsbaij_petsc(Mat A,MatFactorType ftype,Mat *B)
1566: {
1567: PetscInt n = A->rmap->n;
1568: PetscErrorCode ierr;
1571: MatCreate(((PetscObject)A)->comm,B);
1572: MatSetSizes(*B,n,n,n,n);
1573: if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1574: MatSetType(*B,MATSEQSBAIJ);
1575: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1576: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ;
1577: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ;
1578: } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
1579: (*B)->factor = ftype;
1580: return(0);
1581: }
1587: PetscErrorCode MatGetFactorAvailable_seqsbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
1588: {
1590: if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
1591: *flg = PETSC_TRUE;
1592: } else {
1593: *flg = PETSC_FALSE;
1594: }
1595: return(0);
1596: }
1600: #if defined(PETSC_HAVE_MUMPS)
1602: #endif
1603: #if defined(PETSC_HAVE_SPOOLES)
1605: #endif
1606: #if defined(PETSC_HAVE_PASTIX)
1608: #endif
1611: /*MC
1612: MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1613: based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored.
1614:
1615: Options Database Keys:
1616: . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1617:
1618: Level: beginner
1619:
1620: .seealso: MatCreateSeqSBAIJ
1621: M*/
1626: PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1627: {
1628: Mat_SeqSBAIJ *b;
1630: PetscMPIInt size;
1631: PetscTruth flg;
1632:
1634: MPI_Comm_size(((PetscObject)B)->comm,&size);
1635: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1636:
1637: PetscNewLog(B,Mat_SeqSBAIJ,&b);
1638: B->data = (void*)b;
1639: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1640: B->ops->destroy = MatDestroy_SeqSBAIJ;
1641: B->ops->view = MatView_SeqSBAIJ;
1642: B->mapping = 0;
1643: b->row = 0;
1644: b->icol = 0;
1645: b->reallocs = 0;
1646: b->saved_values = 0;
1647:
1648:
1649: b->roworiented = PETSC_TRUE;
1650: b->nonew = 0;
1651: b->diag = 0;
1652: b->solve_work = 0;
1653: b->mult_work = 0;
1654: B->spptr = 0;
1655: b->keepzeroedrows = PETSC_FALSE;
1656: b->xtoy = 0;
1657: b->XtoY = 0;
1658:
1659: b->inew = 0;
1660: b->jnew = 0;
1661: b->anew = 0;
1662: b->a2anew = 0;
1663: b->permute = PETSC_FALSE;
1665: b->ignore_ltriangular = PETSC_FALSE;
1666: PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_ignore_lower_triangular",&flg);
1667: if (flg) b->ignore_ltriangular = PETSC_TRUE;
1669: b->getrow_utriangular = PETSC_FALSE;
1670: PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_getrow_uppertriangular",&flg);
1671: if (flg) b->getrow_utriangular = PETSC_TRUE;
1673: #if defined(PETSC_HAVE_PASTIX)
1674: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_pastix_C",
1675: "MatGetFactor_seqsbaij_pastix",
1676: MatGetFactor_seqsbaij_pastix);
1677: #endif
1678: #if defined(PETSC_HAVE_SPOOLES)
1679: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_spooles_C",
1680: "MatGetFactor_seqsbaij_spooles",
1681: MatGetFactor_seqsbaij_spooles);
1682: #endif
1683: #if defined(PETSC_HAVE_MUMPS)
1684: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_mumps_C",
1685: "MatGetFactor_seqsbaij_mumps",
1686: MatGetFactor_seqsbaij_mumps);
1687: #endif
1688: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqsbaij_petsc_C",
1689: "MatGetFactorAvailable_seqsbaij_petsc",
1690: MatGetFactorAvailable_seqsbaij_petsc);
1691: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_petsc_C",
1692: "MatGetFactor_seqsbaij_petsc",
1693: MatGetFactor_seqsbaij_petsc);
1694: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1695: "MatStoreValues_SeqSBAIJ",
1696: MatStoreValues_SeqSBAIJ);
1697: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1698: "MatRetrieveValues_SeqSBAIJ",
1699: (void*)MatRetrieveValues_SeqSBAIJ);
1700: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1701: "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1702: MatSeqSBAIJSetColumnIndices_SeqSBAIJ);
1703: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1704: "MatConvert_SeqSBAIJ_SeqAIJ",
1705: MatConvert_SeqSBAIJ_SeqAIJ);
1706: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1707: "MatConvert_SeqSBAIJ_SeqBAIJ",
1708: MatConvert_SeqSBAIJ_SeqBAIJ);
1709: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1710: "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1711: MatSeqSBAIJSetPreallocation_SeqSBAIJ);
1713: B->symmetric = PETSC_TRUE;
1714: B->structurally_symmetric = PETSC_TRUE;
1715: B->symmetric_set = PETSC_TRUE;
1716: B->structurally_symmetric_set = PETSC_TRUE;
1717: PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);
1718: return(0);
1719: }
1724: /*@C
1725: MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1726: compressed row) format. For good matrix assembly performance the
1727: user should preallocate the matrix storage by setting the parameter nz
1728: (or the array nnz). By setting these parameters accurately, performance
1729: during matrix assembly can be increased by more than a factor of 50.
1731: Collective on Mat
1733: Input Parameters:
1734: + A - the symmetric matrix
1735: . bs - size of block
1736: . nz - number of block nonzeros per block row (same for all rows)
1737: - nnz - array containing the number of block nonzeros in the upper triangular plus
1738: diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1740: Options Database Keys:
1741: . -mat_no_unroll - uses code that does not unroll the loops in the
1742: block calculations (much slower)
1743: . -mat_block_size - size of the blocks to use (only works if a negative bs is passed in
1745: Level: intermediate
1747: Notes:
1748: Specify the preallocated storage with either nz or nnz (not both).
1749: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1750: allocation. For additional details, see the users manual chapter on
1751: matrices.
1753: You can call MatGetInfo() to get information on how effective the preallocation was;
1754: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1755: You can also run with the option -info and look for messages with the string
1756: malloc in them to see if additional memory allocation was needed.
1758: If the nnz parameter is given then the nz parameter is ignored
1761: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1762: @*/
1763: PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1764: {
1765: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1768: PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);
1769: if (f) {
1770: (*f)(B,bs,nz,nnz);
1771: }
1772: return(0);
1773: }
1777: /*@C
1778: MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1779: compressed row) format. For good matrix assembly performance the
1780: user should preallocate the matrix storage by setting the parameter nz
1781: (or the array nnz). By setting these parameters accurately, performance
1782: during matrix assembly can be increased by more than a factor of 50.
1784: Collective on MPI_Comm
1786: Input Parameters:
1787: + comm - MPI communicator, set to PETSC_COMM_SELF
1788: . bs - size of block
1789: . m - number of rows, or number of columns
1790: . nz - number of block nonzeros per block row (same for all rows)
1791: - nnz - array containing the number of block nonzeros in the upper triangular plus
1792: diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1794: Output Parameter:
1795: . A - the symmetric matrix
1797: Options Database Keys:
1798: . -mat_no_unroll - uses code that does not unroll the loops in the
1799: block calculations (much slower)
1800: . -mat_block_size - size of the blocks to use
1802: Level: intermediate
1804: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1805: MatXXXXSetPreallocation() paradgm instead of this routine directly.
1806: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1808: Notes:
1809: The number of rows and columns must be divisible by blocksize.
1810: This matrix type does not support complex Hermitian operation.
1812: Specify the preallocated storage with either nz or nnz (not both).
1813: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1814: allocation. For additional details, see the users manual chapter on
1815: matrices.
1817: If the nnz parameter is given then the nz parameter is ignored
1819: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1820: @*/
1821: PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1822: {
1824:
1826: MatCreate(comm,A);
1827: MatSetSizes(*A,m,n,m,n);
1828: MatSetType(*A,MATSEQSBAIJ);
1829: MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);
1830: return(0);
1831: }
1835: PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1836: {
1837: Mat C;
1838: Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1840: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1843: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1845: *B = 0;
1846: MatCreate(((PetscObject)A)->comm,&C);
1847: MatSetSizes(C,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
1848: MatSetType(C,((PetscObject)A)->type_name);
1849: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1850: c = (Mat_SeqSBAIJ*)C->data;
1852: C->preallocated = PETSC_TRUE;
1853: C->factor = A->factor;
1854: c->row = 0;
1855: c->icol = 0;
1856: c->saved_values = 0;
1857: c->keepzeroedrows = a->keepzeroedrows;
1858: C->assembled = PETSC_TRUE;
1860: PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);
1861: PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);
1862: c->bs2 = a->bs2;
1863: c->mbs = a->mbs;
1864: c->nbs = a->nbs;
1866: PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);
1867: for (i=0; i<mbs; i++) {
1868: c->imax[i] = a->imax[i];
1869: c->ilen[i] = a->ilen[i];
1870: }
1872: /* allocate the matrix space */
1873: PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
1874: c->singlemalloc = PETSC_TRUE;
1875: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
1876: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));
1877: if (mbs > 0) {
1878: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
1879: if (cpvalues == MAT_COPY_VALUES) {
1880: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1881: } else {
1882: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1883: }
1884: }
1886: c->roworiented = a->roworiented;
1887: c->nonew = a->nonew;
1889: if (a->diag) {
1890: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
1891: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
1892: for (i=0; i<mbs; i++) {
1893: c->diag[i] = a->diag[i];
1894: }
1895: } else c->diag = 0;
1896: c->nz = a->nz;
1897: c->maxnz = a->maxnz;
1898: c->solve_work = 0;
1899: c->mult_work = 0;
1900: c->free_a = PETSC_TRUE;
1901: c->free_ij = PETSC_TRUE;
1902: *B = C;
1903: PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
1904: return(0);
1905: }
1909: PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, const MatType type,Mat *A)
1910: {
1911: Mat_SeqSBAIJ *a;
1912: Mat B;
1914: int fd;
1915: PetscMPIInt size;
1916: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
1917: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1918: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows;
1919: PetscInt *masked,nmask,tmp,bs2,ishift;
1920: PetscScalar *aa;
1921: MPI_Comm comm = ((PetscObject)viewer)->comm;
1924: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1925: bs2 = bs*bs;
1927: MPI_Comm_size(comm,&size);
1928: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1929: PetscViewerBinaryGetDescriptor(viewer,&fd);
1930: PetscBinaryRead(fd,header,4,PETSC_INT);
1931: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1932: M = header[1]; N = header[2]; nz = header[3];
1934: if (header[3] < 0) {
1935: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1936: }
1938: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1940: /*
1941: This code adds extra rows to make sure the number of rows is
1942: divisible by the blocksize
1943: */
1944: mbs = M/bs;
1945: extra_rows = bs - M + bs*(mbs);
1946: if (extra_rows == bs) extra_rows = 0;
1947: else mbs++;
1948: if (extra_rows) {
1949: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
1950: }
1952: /* read in row lengths */
1953: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
1954: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1955: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1957: /* read in column indices */
1958: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
1959: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1960: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1962: /* loop over row lengths determining block row lengths */
1963: PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);
1964: PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));
1965: PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
1966: PetscMemzero(mask,mbs*sizeof(PetscInt));
1967: masked = mask + mbs;
1968: rowcount = 0; nzcount = 0;
1969: for (i=0; i<mbs; i++) {
1970: nmask = 0;
1971: for (j=0; j<bs; j++) {
1972: kmax = rowlengths[rowcount];
1973: for (k=0; k<kmax; k++) {
1974: tmp = jj[nzcount++]/bs; /* block col. index */
1975: if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1976: }
1977: rowcount++;
1978: }
1979: s_browlengths[i] += nmask;
1980:
1981: /* zero out the mask elements we set */
1982: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1983: }
1985: /* create our matrix */
1986: MatCreate(comm,&B);
1987: MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);
1988: MatSetType(B,type);
1989: MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);
1990: a = (Mat_SeqSBAIJ*)B->data;
1992: /* set matrix "i" values */
1993: a->i[0] = 0;
1994: for (i=1; i<= mbs; i++) {
1995: a->i[i] = a->i[i-1] + s_browlengths[i-1];
1996: a->ilen[i-1] = s_browlengths[i-1];
1997: }
1998: a->nz = a->i[mbs];
2000: /* read in nonzero values */
2001: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2002: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2003: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2005: /* set "a" and "j" values into matrix */
2006: nzcount = 0; jcount = 0;
2007: for (i=0; i<mbs; i++) {
2008: nzcountb = nzcount;
2009: nmask = 0;
2010: for (j=0; j<bs; j++) {
2011: kmax = rowlengths[i*bs+j];
2012: for (k=0; k<kmax; k++) {
2013: tmp = jj[nzcount++]/bs; /* block col. index */
2014: if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
2015: }
2016: }
2017: /* sort the masked values */
2018: PetscSortInt(nmask,masked);
2020: /* set "j" values into matrix */
2021: maskcount = 1;
2022: for (j=0; j<nmask; j++) {
2023: a->j[jcount++] = masked[j];
2024: mask[masked[j]] = maskcount++;
2025: }
2027: /* set "a" values into matrix */
2028: ishift = bs2*a->i[i];
2029: for (j=0; j<bs; j++) {
2030: kmax = rowlengths[i*bs+j];
2031: for (k=0; k<kmax; k++) {
2032: tmp = jj[nzcountb]/bs ; /* block col. index */
2033: if (tmp >= i){
2034: block = mask[tmp] - 1;
2035: point = jj[nzcountb] - bs*tmp;
2036: idx = ishift + bs2*block + j + bs*point;
2037: a->a[idx] = aa[nzcountb];
2038: }
2039: nzcountb++;
2040: }
2041: }
2042: /* zero out the mask elements we set */
2043: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2044: }
2045: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2047: PetscFree(rowlengths);
2048: PetscFree(s_browlengths);
2049: PetscFree(aa);
2050: PetscFree(jj);
2051: PetscFree(mask);
2053: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2054: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2055: MatView_Private(B);
2056: *A = B;
2057: return(0);
2058: }
2062: PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2063: {
2064: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2065: MatScalar *aa=a->a,*v,*v1;
2066: PetscScalar *x,*b,*t,sum,d;
2068: PetscInt m=a->mbs,bs=A->rmap->bs,*ai=a->i,*aj=a->j;
2069: PetscInt nz,nz1,*vj,*vj1,i;
2072: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
2074: its = its*lits;
2075: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2077: if (bs > 1)
2078: SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2080: VecGetArray(xx,&x);
2081: if (xx != bb) {
2082: VecGetArray(bb,&b);
2083: } else {
2084: b = x;
2085: }
2087: if (!a->relax_work) {
2088: PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);
2089: }
2090: t = a->relax_work;
2091:
2092: if (flag & SOR_ZERO_INITIAL_GUESS) {
2093: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2094: PetscMemcpy(t,b,m*sizeof(PetscScalar));
2096: for (i=0; i<m; i++){
2097: d = *(aa + ai[i]); /* diag[i] */
2098: v = aa + ai[i] + 1;
2099: vj = aj + ai[i] + 1;
2100: nz = ai[i+1] - ai[i] - 1;
2101: PetscLogFlops(2*nz);
2102: x[i] = omega*t[i]/d;
2103: while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
2104: }
2105: }
2107: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2108: if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
2109: t = b;
2110: }
2111:
2112: for (i=m-1; i>=0; i--){
2113: d = *(aa + ai[i]);
2114: v = aa + ai[i] + 1;
2115: vj = aj + ai[i] + 1;
2116: nz = ai[i+1] - ai[i] - 1;
2117: PetscLogFlops(2*nz);
2118: sum = t[i];
2119: while (nz--) sum -= x[*vj++]*(*v++);
2120: x[i] = (1-omega)*x[i] + omega*sum/d;
2121: }
2122: t = a->relax_work;
2123: }
2124: its--;
2125: }
2127: while (its--) {
2128: /*
2129: forward sweep:
2130: for i=0,...,m-1:
2131: sum[i] = (b[i] - U(i,:)x )/d[i];
2132: x[i] = (1-omega)x[i] + omega*sum[i];
2133: b = b - x[i]*U^T(i,:);
2134:
2135: */
2136: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2137: PetscMemcpy(t,b,m*sizeof(PetscScalar));
2139: for (i=0; i<m; i++){
2140: d = *(aa + ai[i]); /* diag[i] */
2141: v = aa + ai[i] + 1; v1=v;
2142: vj = aj + ai[i] + 1; vj1=vj;
2143: nz = ai[i+1] - ai[i] - 1; nz1=nz;
2144: sum = t[i];
2145: PetscLogFlops(4*nz-2);
2146: while (nz1--) sum -= (*v1++)*x[*vj1++];
2147: x[i] = (1-omega)*x[i] + omega*sum/d;
2148: while (nz--) t[*vj++] -= x[i]*(*v++);
2149: }
2150: }
2151:
2152: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2153: /*
2154: backward sweep:
2155: b = b - x[i]*U^T(i,:), i=0,...,n-2
2156: for i=m-1,...,0:
2157: sum[i] = (b[i] - U(i,:)x )/d[i];
2158: x[i] = (1-omega)x[i] + omega*sum[i];
2159: */
2160: /* if there was a forward sweep done above then I thing the next two for loops are not needed */
2161: PetscMemcpy(t,b,m*sizeof(PetscScalar));
2162:
2163: for (i=0; i<m-1; i++){ /* update rhs */
2164: v = aa + ai[i] + 1;
2165: vj = aj + ai[i] + 1;
2166: nz = ai[i+1] - ai[i] - 1;
2167: PetscLogFlops(2*nz-1);
2168: while (nz--) t[*vj++] -= x[i]*(*v++);
2169: }
2170: for (i=m-1; i>=0; i--){
2171: d = *(aa + ai[i]);
2172: v = aa + ai[i] + 1;
2173: vj = aj + ai[i] + 1;
2174: nz = ai[i+1] - ai[i] - 1;
2175: PetscLogFlops(2*nz-1);
2176: sum = t[i];
2177: while (nz--) sum -= x[*vj++]*(*v++);
2178: x[i] = (1-omega)*x[i] + omega*sum/d;
2179: }
2180: }
2181: }
2183: VecRestoreArray(xx,&x);
2184: if (bb != xx) {
2185: VecRestoreArray(bb,&b);
2186: }
2187: return(0);
2188: }
2192: /*@
2193: MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements
2194: (upper triangular entries in CSR format) provided by the user.
2196: Collective on MPI_Comm
2198: Input Parameters:
2199: + comm - must be an MPI communicator of size 1
2200: . bs - size of block
2201: . m - number of rows
2202: . n - number of columns
2203: . i - row indices
2204: . j - column indices
2205: - a - matrix values
2207: Output Parameter:
2208: . mat - the matrix
2210: Level: intermediate
2212: Notes:
2213: The i, j, and a arrays are not copied by this routine, the user must free these arrays
2214: once the matrix is destroyed
2216: You cannot set new nonzero locations into this matrix, that will generate an error.
2218: The i and j indices are 0 based
2220: .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ()
2222: @*/
2223: PetscErrorCode MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2224: {
2226: PetscInt ii;
2227: Mat_SeqSBAIJ *sbaij;
2230: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2231: if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2232:
2233: MatCreate(comm,mat);
2234: MatSetSizes(*mat,m,n,m,n);
2235: MatSetType(*mat,MATSEQSBAIJ);
2236: MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
2237: sbaij = (Mat_SeqSBAIJ*)(*mat)->data;
2238: PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);
2240: sbaij->i = i;
2241: sbaij->j = j;
2242: sbaij->a = a;
2243: sbaij->singlemalloc = PETSC_FALSE;
2244: sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2245: sbaij->free_a = PETSC_FALSE;
2246: sbaij->free_ij = PETSC_FALSE;
2248: for (ii=0; ii<m; ii++) {
2249: sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii];
2250: #if defined(PETSC_USE_DEBUG)
2251: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2252: #endif
2253: }
2254: #if defined(PETSC_USE_DEBUG)
2255: for (ii=0; ii<sbaij->i[m]; ii++) {
2256: if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2257: if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2258: }
2259: #endif
2261: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
2262: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
2263: return(0);
2264: }