Actual source code: mpiaij.c
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petscblaslapack.h>
5: /*MC
6: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9: and MATMPIAIJ otherwise. As a result, for single process communicators,
10: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11: for communicators controlling multiple processes. It is recommended that you call both of
12: the above preallocation routines for simplicity.
14: Options Database Keys:
15: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17: Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18: enough exist.
20: Level: beginner
22: .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23: M*/
25: /*MC
26: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
30: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31: for communicators controlling multiple processes. It is recommended that you call both of
32: the above preallocation routines for simplicity.
34: Options Database Keys:
35: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37: Level: beginner
39: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40: M*/
44: PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45: {
46: PetscErrorCode ierr;
47: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
48: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
49: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
50: const PetscInt *ia,*ib;
51: const MatScalar *aa,*bb;
52: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
53: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
56: *keptrows = 0;
57: ia = a->i;
58: ib = b->i;
59: for (i=0; i<m; i++) {
60: na = ia[i+1] - ia[i];
61: nb = ib[i+1] - ib[i];
62: if (!na && !nb) {
63: cnt++;
64: goto ok1;
65: }
66: aa = a->a + ia[i];
67: for (j=0; j<na; j++) {
68: if (aa[j] != 0.0) goto ok1;
69: }
70: bb = b->a + ib[i];
71: for (j=0; j <nb; j++) {
72: if (bb[j] != 0.0) goto ok1;
73: }
74: cnt++;
75: ok1:;
76: }
77: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);
78: if (!n0rows) return(0);
79: PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);
80: cnt = 0;
81: for (i=0; i<m; i++) {
82: na = ia[i+1] - ia[i];
83: nb = ib[i+1] - ib[i];
84: if (!na && !nb) continue;
85: aa = a->a + ia[i];
86: for(j=0; j<na;j++) {
87: if (aa[j] != 0.0) {
88: rows[cnt++] = rstart + i;
89: goto ok2;
90: }
91: }
92: bb = b->a + ib[i];
93: for (j=0; j<nb; j++) {
94: if (bb[j] != 0.0) {
95: rows[cnt++] = rstart + i;
96: goto ok2;
97: }
98: }
99: ok2:;
100: }
101: ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);
102: return(0);
103: }
107: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108: {
110: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
111: PetscInt i,n,*garray = aij->garray;
112: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
113: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
114: PetscReal *work;
117: MatGetSize(A,PETSC_NULL,&n);
118: PetscMalloc(n*sizeof(PetscReal),&work);
119: PetscMemzero(work,n*sizeof(PetscReal));
120: if (type == NORM_2) {
121: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123: }
124: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126: }
127: } else if (type == NORM_1) {
128: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130: }
131: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133: }
134: } else if (type == NORM_INFINITY) {
135: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137: }
138: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140: }
142: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143: if (type == NORM_INFINITY) {
144: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
145: } else {
146: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
147: }
148: PetscFree(work);
149: if (type == NORM_2) {
150: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151: }
152: return(0);
153: }
157: /*
158: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
161: Only for square matrices
162: */
163: PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164: {
165: PetscMPIInt rank,size;
166: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
168: Mat mat;
169: Mat_SeqAIJ *gmata;
170: PetscMPIInt tag;
171: MPI_Status status;
172: PetscBool aij;
173: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
176: CHKMEMQ;
177: MPI_Comm_rank(comm,&rank);
178: MPI_Comm_size(comm,&size);
179: if (!rank) {
180: PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
181: if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182: }
183: if (reuse == MAT_INITIAL_MATRIX) {
184: MatCreate(comm,&mat);
185: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
186: MatSetType(mat,MATAIJ);
187: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
188: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
189: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
190: rowners[0] = 0;
191: for (i=2; i<=size; i++) {
192: rowners[i] += rowners[i-1];
193: }
194: rstart = rowners[rank];
195: rend = rowners[rank+1];
196: PetscObjectGetNewTag((PetscObject)mat,&tag);
197: if (!rank) {
198: gmata = (Mat_SeqAIJ*) gmat->data;
199: /* send row lengths to all processors */
200: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201: for (i=1; i<size; i++) {
202: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
203: }
204: /* determine number diagonal and off-diagonal counts */
205: PetscMemzero(olens,m*sizeof(PetscInt));
206: PetscMalloc(m*sizeof(PetscInt),&ld);
207: PetscMemzero(ld,m*sizeof(PetscInt));
208: jj = 0;
209: for (i=0; i<m; i++) {
210: for (j=0; j<dlens[i]; j++) {
211: if (gmata->j[jj] < rstart) ld[i]++;
212: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213: jj++;
214: }
215: }
216: /* send column indices to other processes */
217: for (i=1; i<size; i++) {
218: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
220: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
221: }
223: /* send numerical values to other processes */
224: for (i=1; i<size; i++) {
225: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
227: }
228: gmataa = gmata->a;
229: gmataj = gmata->j;
231: } else {
232: /* receive row lengths */
233: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
234: /* receive column indices */
235: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
236: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
237: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
238: /* determine number diagonal and off-diagonal counts */
239: PetscMemzero(olens,m*sizeof(PetscInt));
240: PetscMalloc(m*sizeof(PetscInt),&ld);
241: PetscMemzero(ld,m*sizeof(PetscInt));
242: jj = 0;
243: for (i=0; i<m; i++) {
244: for (j=0; j<dlens[i]; j++) {
245: if (gmataj[jj] < rstart) ld[i]++;
246: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247: jj++;
248: }
249: }
250: /* receive numerical values */
251: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
252: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
253: }
254: /* set preallocation */
255: for (i=0; i<m; i++) {
256: dlens[i] -= olens[i];
257: }
258: MatSeqAIJSetPreallocation(mat,0,dlens);
259: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
260:
261: for (i=0; i<m; i++) {
262: dlens[i] += olens[i];
263: }
264: cnt = 0;
265: for (i=0; i<m; i++) {
266: row = rstart + i;
267: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
268: cnt += dlens[i];
269: }
270: if (rank) {
271: PetscFree2(gmataa,gmataj);
272: }
273: PetscFree2(dlens,olens);
274: PetscFree(rowners);
275: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276: *inmat = mat;
277: } else { /* column indices are already set; only need to move over numerical values from process 0 */
278: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280: mat = *inmat;
281: PetscObjectGetNewTag((PetscObject)mat,&tag);
282: if (!rank) {
283: /* send numerical values to other processes */
284: gmata = (Mat_SeqAIJ*) gmat->data;
285: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
286: gmataa = gmata->a;
287: for (i=1; i<size; i++) {
288: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
290: }
291: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292: } else {
293: /* receive numerical values from process 0*/
294: nz = Ad->nz + Ao->nz;
295: PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
296: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
297: }
298: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300: ad = Ad->a;
301: ao = Ao->a;
302: if (mat->rmap->n) {
303: i = 0;
304: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
305: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
306: }
307: for (i=1; i<mat->rmap->n; i++) {
308: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
309: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
310: }
311: i--;
312: if (mat->rmap->n) {
313: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
314: }
315: if (rank) {
316: PetscFree(gmataarestore);
317: }
318: }
319: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
320: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
321: CHKMEMQ;
322: return(0);
323: }
325: /*
326: Local utility routine that creates a mapping from the global column
327: number to the local number in the off-diagonal part of the local
328: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
329: a slightly higher hash table cost; without it it is not scalable (each processor
330: has an order N integer array but is fast to acess.
331: */
334: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335: {
336: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
338: PetscInt n = aij->B->cmap->n,i;
341: #if defined (PETSC_USE_CTABLE)
342: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
343: for (i=0; i<n; i++){
344: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
345: }
346: #else
347: PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);
348: PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));
349: PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));
350: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351: #endif
352: return(0);
353: }
355: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356: { \
357: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358: lastcol1 = col;\
359: while (high1-low1 > 5) { \
360: t = (low1+high1)/2; \
361: if (rp1[t] > col) high1 = t; \
362: else low1 = t; \
363: } \
364: for (_i=low1; _i<high1; _i++) { \
365: if (rp1[_i] > col) break; \
366: if (rp1[_i] == col) { \
367: if (addv == ADD_VALUES) ap1[_i] += value; \
368: else ap1[_i] = value; \
369: goto a_noinsert; \
370: } \
371: } \
372: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
374: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376: N = nrow1++ - 1; a->nz++; high1++; \
377: /* shift up all the later entries in this row */ \
378: for (ii=N; ii>=_i; ii--) { \
379: rp1[ii+1] = rp1[ii]; \
380: ap1[ii+1] = ap1[ii]; \
381: } \
382: rp1[_i] = col; \
383: ap1[_i] = value; \
384: a_noinsert: ; \
385: ailen[row] = nrow1; \
386: }
389: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390: { \
391: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392: lastcol2 = col;\
393: while (high2-low2 > 5) { \
394: t = (low2+high2)/2; \
395: if (rp2[t] > col) high2 = t; \
396: else low2 = t; \
397: } \
398: for (_i=low2; _i<high2; _i++) { \
399: if (rp2[_i] > col) break; \
400: if (rp2[_i] == col) { \
401: if (addv == ADD_VALUES) ap2[_i] += value; \
402: else ap2[_i] = value; \
403: goto b_noinsert; \
404: } \
405: } \
406: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
408: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410: N = nrow2++ - 1; b->nz++; high2++; \
411: /* shift up all the later entries in this row */ \
412: for (ii=N; ii>=_i; ii--) { \
413: rp2[ii+1] = rp2[ii]; \
414: ap2[ii+1] = ap2[ii]; \
415: } \
416: rp2[_i] = col; \
417: ap2[_i] = value; \
418: b_noinsert: ; \
419: bilen[row] = nrow2; \
420: }
424: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425: {
426: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
427: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
429: PetscInt l,*garray = mat->garray,diag;
432: /* code only works for square matrices A */
434: /* find size of row to the left of the diagonal part */
435: MatGetOwnershipRange(A,&diag,0);
436: row = row - diag;
437: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438: if (garray[b->j[b->i[row]+l]] > diag) break;
439: }
440: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
442: /* diagonal part */
443: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
445: /* right of diagonal part */
446: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
447: return(0);
448: }
452: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453: {
454: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
455: PetscScalar value;
457: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459: PetscBool roworiented = aij->roworiented;
461: /* Some Variables required in the macro */
462: Mat A = aij->A;
463: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
464: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465: MatScalar *aa = a->a;
466: PetscBool ignorezeroentries = a->ignorezeroentries;
467: Mat B = aij->B;
468: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
469: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470: MatScalar *ba = b->a;
472: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473: PetscInt nonew = a->nonew;
474: MatScalar *ap1,*ap2;
478: for (i=0; i<m; i++) {
479: if (im[i] < 0) continue;
480: #if defined(PETSC_USE_DEBUG)
481: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482: #endif
483: if (im[i] >= rstart && im[i] < rend) {
484: row = im[i] - rstart;
485: lastcol1 = -1;
486: rp1 = aj + ai[row];
487: ap1 = aa + ai[row];
488: rmax1 = aimax[row];
489: nrow1 = ailen[row];
490: low1 = 0;
491: high1 = nrow1;
492: lastcol2 = -1;
493: rp2 = bj + bi[row];
494: ap2 = ba + bi[row];
495: rmax2 = bimax[row];
496: nrow2 = bilen[row];
497: low2 = 0;
498: high2 = nrow2;
500: for (j=0; j<n; j++) {
501: if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503: if (in[j] >= cstart && in[j] < cend){
504: col = in[j] - cstart;
505: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506: } else if (in[j] < 0) continue;
507: #if defined(PETSC_USE_DEBUG)
508: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509: #endif
510: else {
511: if (mat->was_assembled) {
512: if (!aij->colmap) {
513: CreateColmap_MPIAIJ_Private(mat);
514: }
515: #if defined (PETSC_USE_CTABLE)
516: PetscTableFind(aij->colmap,in[j]+1,&col);
517: col--;
518: #else
519: col = aij->colmap[in[j]] - 1;
520: #endif
521: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522: DisAssemble_MPIAIJ(mat);
523: col = in[j];
524: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525: B = aij->B;
526: b = (Mat_SeqAIJ*)B->data;
527: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528: rp2 = bj + bi[row];
529: ap2 = ba + bi[row];
530: rmax2 = bimax[row];
531: nrow2 = bilen[row];
532: low2 = 0;
533: high2 = nrow2;
534: bm = aij->B->rmap->n;
535: ba = b->a;
536: }
537: } else col = in[j];
538: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539: }
540: }
541: } else {
542: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543: if (!aij->donotstash) {
544: mat->assembled = PETSC_FALSE;
545: if (roworiented) {
546: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
547: } else {
548: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
549: }
550: }
551: }
552: }
553: return(0);
554: }
558: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
559: {
560: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
562: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
563: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
566: for (i=0; i<m; i++) {
567: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
568: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
569: if (idxm[i] >= rstart && idxm[i] < rend) {
570: row = idxm[i] - rstart;
571: for (j=0; j<n; j++) {
572: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
573: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
574: if (idxn[j] >= cstart && idxn[j] < cend){
575: col = idxn[j] - cstart;
576: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
577: } else {
578: if (!aij->colmap) {
579: CreateColmap_MPIAIJ_Private(mat);
580: }
581: #if defined (PETSC_USE_CTABLE)
582: PetscTableFind(aij->colmap,idxn[j]+1,&col);
583: col --;
584: #else
585: col = aij->colmap[idxn[j]] - 1;
586: #endif
587: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
588: else {
589: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
590: }
591: }
592: }
593: } else {
594: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
595: }
596: }
597: return(0);
598: }
600: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
604: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
605: {
606: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
608: PetscInt nstash,reallocs;
609: InsertMode addv;
612: if (aij->donotstash || mat->nooffprocentries) {
613: return(0);
614: }
616: /* make sure all processors are either in INSERTMODE or ADDMODE */
617: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
618: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
619: mat->insertmode = addv; /* in case this processor had no cache */
621: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
622: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
623: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
624: return(0);
625: }
629: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
630: {
631: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
632: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data;
634: PetscMPIInt n;
635: PetscInt i,j,rstart,ncols,flg;
636: PetscInt *row,*col;
637: PetscBool other_disassembled;
638: PetscScalar *val;
639: InsertMode addv = mat->insertmode;
641: /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
643: if (!aij->donotstash && !mat->nooffprocentries) {
644: while (1) {
645: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
646: if (!flg) break;
648: for (i=0; i<n;) {
649: /* Now identify the consecutive vals belonging to the same row */
650: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
651: if (j < n) ncols = j-i;
652: else ncols = n-i;
653: /* Now assemble all these values with a single function call */
654: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
655: i = j;
656: }
657: }
658: MatStashScatterEnd_Private(&mat->stash);
659: }
660: MatAssemblyBegin(aij->A,mode);
661: MatAssemblyEnd(aij->A,mode);
663: /* determine if any processor has disassembled, if so we must
664: also disassemble ourselfs, in order that we may reassemble. */
665: /*
666: if nonzero structure of submatrix B cannot change then we know that
667: no processor disassembled thus we can skip this stuff
668: */
669: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
670: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
671: if (mat->was_assembled && !other_disassembled) {
672: DisAssemble_MPIAIJ(mat);
673: }
674: }
675: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
676: MatSetUpMultiply_MPIAIJ(mat);
677: }
678: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
679: MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);
680: MatAssemblyBegin(aij->B,mode);
681: MatAssemblyEnd(aij->B,mode);
683: PetscFree2(aij->rowvalues,aij->rowindices);
684: aij->rowvalues = 0;
686: /* used by MatAXPY() */
687: a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
688: a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
690: VecDestroy(&aij->diag);
691: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
692: return(0);
693: }
697: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
698: {
699: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
703: MatZeroEntries(l->A);
704: MatZeroEntries(l->B);
705: return(0);
706: }
710: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
711: {
712: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
713: PetscErrorCode ierr;
714: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
715: PetscInt i,*owners = A->rmap->range;
716: PetscInt *nprocs,j,idx,nsends,row;
717: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
718: PetscInt *rvalues,count,base,slen,*source;
719: PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart;
720: MPI_Comm comm = ((PetscObject)A)->comm;
721: MPI_Request *send_waits,*recv_waits;
722: MPI_Status recv_status,*send_status;
723: const PetscScalar *xx;
724: PetscScalar *bb;
725: #if defined(PETSC_DEBUG)
726: PetscBool found = PETSC_FALSE;
727: #endif
730: /* first count number of contributors to each processor */
731: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
732: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
733: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
734: j = 0;
735: for (i=0; i<N; i++) {
736: if (lastidx > (idx = rows[i])) j = 0;
737: lastidx = idx;
738: for (; j<size; j++) {
739: if (idx >= owners[j] && idx < owners[j+1]) {
740: nprocs[2*j]++;
741: nprocs[2*j+1] = 1;
742: owner[i] = j;
743: #if defined(PETSC_DEBUG)
744: found = PETSC_TRUE;
745: #endif
746: break;
747: }
748: }
749: #if defined(PETSC_DEBUG)
750: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
751: found = PETSC_FALSE;
752: #endif
753: }
754: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
756: if (A->nooffproczerorows) {
757: if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
758: nrecvs = nsends;
759: nmax = N;
760: } else {
761: /* inform other processors of number of messages and max length*/
762: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
763: }
765: /* post receives: */
766: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
767: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
768: for (i=0; i<nrecvs; i++) {
769: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
770: }
772: /* do sends:
773: 1) starts[i] gives the starting index in svalues for stuff going to
774: the ith processor
775: */
776: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
777: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
778: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
779: starts[0] = 0;
780: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
781: for (i=0; i<N; i++) {
782: svalues[starts[owner[i]]++] = rows[i];
783: }
785: starts[0] = 0;
786: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
787: count = 0;
788: for (i=0; i<size; i++) {
789: if (nprocs[2*i+1]) {
790: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
791: }
792: }
793: PetscFree(starts);
795: base = owners[rank];
797: /* wait on receives */
798: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
799: count = nrecvs; slen = 0;
800: while (count) {
801: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
802: /* unpack receives into our local space */
803: MPI_Get_count(&recv_status,MPIU_INT,&n);
804: source[imdex] = recv_status.MPI_SOURCE;
805: lens[imdex] = n;
806: slen += n;
807: count--;
808: }
809: PetscFree(recv_waits);
810:
811: /* move the data into the send scatter */
812: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
813: count = 0;
814: for (i=0; i<nrecvs; i++) {
815: values = rvalues + i*nmax;
816: for (j=0; j<lens[i]; j++) {
817: lrows[count++] = values[j] - base;
818: }
819: }
820: PetscFree(rvalues);
821: PetscFree2(lens,source);
822: PetscFree(owner);
823: PetscFree(nprocs);
824:
825: /* fix right hand side if needed */
826: if (x && b) {
827: VecGetArrayRead(x,&xx);
828: VecGetArray(b,&bb);
829: for (i=0; i<slen; i++) {
830: bb[lrows[i]] = diag*xx[lrows[i]];
831: }
832: VecRestoreArrayRead(x,&xx);
833: VecRestoreArray(b,&bb);
834: }
835: /*
836: Zero the required rows. If the "diagonal block" of the matrix
837: is square and the user wishes to set the diagonal we use separate
838: code so that MatSetValues() is not called for each diagonal allocating
839: new memory, thus calling lots of mallocs and slowing things down.
841: */
842: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
843: MatZeroRows(l->B,slen,lrows,0.0,0,0);
844: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
845: MatZeroRows(l->A,slen,lrows,diag,0,0);
846: } else if (diag != 0.0) {
847: MatZeroRows(l->A,slen,lrows,0.0,0,0);
848: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
849: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
850: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
851: }
852: for (i = 0; i < slen; i++) {
853: row = lrows[i] + rstart;
854: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
855: }
856: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
857: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
858: } else {
859: MatZeroRows(l->A,slen,lrows,0.0,0,0);
860: }
861: PetscFree(lrows);
863: /* wait on sends */
864: if (nsends) {
865: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
866: MPI_Waitall(nsends,send_waits,send_status);
867: PetscFree(send_status);
868: }
869: PetscFree(send_waits);
870: PetscFree(svalues);
871: return(0);
872: }
876: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
877: {
878: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
879: PetscErrorCode ierr;
880: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
881: PetscInt i,*owners = A->rmap->range;
882: PetscInt *nprocs,j,idx,nsends;
883: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
884: PetscInt *rvalues,count,base,slen,*source;
885: PetscInt *lens,*lrows,*values,m;
886: MPI_Comm comm = ((PetscObject)A)->comm;
887: MPI_Request *send_waits,*recv_waits;
888: MPI_Status recv_status,*send_status;
889: const PetscScalar *xx;
890: PetscScalar *bb,*mask;
891: Vec xmask,lmask;
892: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
893: const PetscInt *aj, *ii,*ridx;
894: PetscScalar *aa;
895: #if defined(PETSC_DEBUG)
896: PetscBool found = PETSC_FALSE;
897: #endif
900: /* first count number of contributors to each processor */
901: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
902: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
903: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
904: j = 0;
905: for (i=0; i<N; i++) {
906: if (lastidx > (idx = rows[i])) j = 0;
907: lastidx = idx;
908: for (; j<size; j++) {
909: if (idx >= owners[j] && idx < owners[j+1]) {
910: nprocs[2*j]++;
911: nprocs[2*j+1] = 1;
912: owner[i] = j;
913: #if defined(PETSC_DEBUG)
914: found = PETSC_TRUE;
915: #endif
916: break;
917: }
918: }
919: #if defined(PETSC_DEBUG)
920: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
921: found = PETSC_FALSE;
922: #endif
923: }
924: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
926: /* inform other processors of number of messages and max length*/
927: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
929: /* post receives: */
930: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
931: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
932: for (i=0; i<nrecvs; i++) {
933: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
934: }
936: /* do sends:
937: 1) starts[i] gives the starting index in svalues for stuff going to
938: the ith processor
939: */
940: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
941: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
942: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
943: starts[0] = 0;
944: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
945: for (i=0; i<N; i++) {
946: svalues[starts[owner[i]]++] = rows[i];
947: }
949: starts[0] = 0;
950: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
951: count = 0;
952: for (i=0; i<size; i++) {
953: if (nprocs[2*i+1]) {
954: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
955: }
956: }
957: PetscFree(starts);
959: base = owners[rank];
961: /* wait on receives */
962: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
963: count = nrecvs; slen = 0;
964: while (count) {
965: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
966: /* unpack receives into our local space */
967: MPI_Get_count(&recv_status,MPIU_INT,&n);
968: source[imdex] = recv_status.MPI_SOURCE;
969: lens[imdex] = n;
970: slen += n;
971: count--;
972: }
973: PetscFree(recv_waits);
974:
975: /* move the data into the send scatter */
976: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
977: count = 0;
978: for (i=0; i<nrecvs; i++) {
979: values = rvalues + i*nmax;
980: for (j=0; j<lens[i]; j++) {
981: lrows[count++] = values[j] - base;
982: }
983: }
984: PetscFree(rvalues);
985: PetscFree2(lens,source);
986: PetscFree(owner);
987: PetscFree(nprocs);
988: /* lrows are the local rows to be zeroed, slen is the number of local rows */
990: /* zero diagonal part of matrix */
991: MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);
992:
993: /* handle off diagonal part of matrix */
994: MatGetVecs(A,&xmask,PETSC_NULL);
995: VecDuplicate(l->lvec,&lmask);
996: VecGetArray(xmask,&bb);
997: for (i=0; i<slen; i++) {
998: bb[lrows[i]] = 1;
999: }
1000: VecRestoreArray(xmask,&bb);
1001: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1002: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1003: VecDestroy(&xmask);
1004: if (x) {
1005: VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1006: VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1007: VecGetArrayRead(l->lvec,&xx);
1008: VecGetArray(b,&bb);
1009: }
1010: VecGetArray(lmask,&mask);
1012: /* remove zeroed rows of off diagonal matrix */
1013: ii = aij->i;
1014: for (i=0; i<slen; i++) {
1015: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
1016: }
1018: /* loop over all elements of off process part of matrix zeroing removed columns*/
1019: if (aij->compressedrow.use){
1020: m = aij->compressedrow.nrows;
1021: ii = aij->compressedrow.i;
1022: ridx = aij->compressedrow.rindex;
1023: for (i=0; i<m; i++){
1024: n = ii[i+1] - ii[i];
1025: aj = aij->j + ii[i];
1026: aa = aij->a + ii[i];
1028: for (j=0; j<n; j++) {
1029: if (PetscAbsScalar(mask[*aj])) {
1030: if (b) bb[*ridx] -= *aa*xx[*aj];
1031: *aa = 0.0;
1032: }
1033: aa++;
1034: aj++;
1035: }
1036: ridx++;
1037: }
1038: } else { /* do not use compressed row format */
1039: m = l->B->rmap->n;
1040: for (i=0; i<m; i++) {
1041: n = ii[i+1] - ii[i];
1042: aj = aij->j + ii[i];
1043: aa = aij->a + ii[i];
1044: for (j=0; j<n; j++) {
1045: if (PetscAbsScalar(mask[*aj])) {
1046: if (b) bb[i] -= *aa*xx[*aj];
1047: *aa = 0.0;
1048: }
1049: aa++;
1050: aj++;
1051: }
1052: }
1053: }
1054: if (x) {
1055: VecRestoreArray(b,&bb);
1056: VecRestoreArrayRead(l->lvec,&xx);
1057: }
1058: VecRestoreArray(lmask,&mask);
1059: VecDestroy(&lmask);
1060: PetscFree(lrows);
1062: /* wait on sends */
1063: if (nsends) {
1064: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1065: MPI_Waitall(nsends,send_waits,send_status);
1066: PetscFree(send_status);
1067: }
1068: PetscFree(send_waits);
1069: PetscFree(svalues);
1071: return(0);
1072: }
1076: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1077: {
1078: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1080: PetscInt nt;
1083: VecGetLocalSize(xx,&nt);
1084: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1085: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1086: (*a->A->ops->mult)(a->A,xx,yy);
1087: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1088: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1089: return(0);
1090: }
1094: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1095: {
1096: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1100: MatMultDiagonalBlock(a->A,bb,xx);
1101: return(0);
1102: }
1106: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1107: {
1108: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1112: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1113: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1114: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1115: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1116: return(0);
1117: }
1121: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1122: {
1123: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1125: PetscBool merged;
1128: VecScatterGetMerged(a->Mvctx,&merged);
1129: /* do nondiagonal part */
1130: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1131: if (!merged) {
1132: /* send it on its way */
1133: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1134: /* do local part */
1135: (*a->A->ops->multtranspose)(a->A,xx,yy);
1136: /* receive remote parts: note this assumes the values are not actually */
1137: /* added in yy until the next line, */
1138: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1139: } else {
1140: /* do local part */
1141: (*a->A->ops->multtranspose)(a->A,xx,yy);
1142: /* send it on its way */
1143: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1144: /* values actually were received in the Begin() but we need to call this nop */
1145: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1146: }
1147: return(0);
1148: }
1150: EXTERN_C_BEGIN
1153: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1154: {
1155: MPI_Comm comm;
1156: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1157: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1158: IS Me,Notme;
1160: PetscInt M,N,first,last,*notme,i;
1161: PetscMPIInt size;
1165: /* Easy test: symmetric diagonal block */
1166: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1167: MatIsTranspose(Adia,Bdia,tol,f);
1168: if (!*f) return(0);
1169: PetscObjectGetComm((PetscObject)Amat,&comm);
1170: MPI_Comm_size(comm,&size);
1171: if (size == 1) return(0);
1173: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1174: MatGetSize(Amat,&M,&N);
1175: MatGetOwnershipRange(Amat,&first,&last);
1176: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
1177: for (i=0; i<first; i++) notme[i] = i;
1178: for (i=last; i<M; i++) notme[i-last+first] = i;
1179: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1180: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1181: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1182: Aoff = Aoffs[0];
1183: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1184: Boff = Boffs[0];
1185: MatIsTranspose(Aoff,Boff,tol,f);
1186: MatDestroyMatrices(1,&Aoffs);
1187: MatDestroyMatrices(1,&Boffs);
1188: ISDestroy(&Me);
1189: ISDestroy(&Notme);
1190: PetscFree(notme);
1191: return(0);
1192: }
1193: EXTERN_C_END
1197: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1198: {
1199: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1203: /* do nondiagonal part */
1204: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1205: /* send it on its way */
1206: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1207: /* do local part */
1208: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1209: /* receive remote parts */
1210: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1211: return(0);
1212: }
1214: /*
1215: This only works correctly for square matrices where the subblock A->A is the
1216: diagonal block
1217: */
1220: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1221: {
1223: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1226: if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1227: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1228: MatGetDiagonal(a->A,v);
1229: return(0);
1230: }
1234: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1235: {
1236: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1240: MatScale(a->A,aa);
1241: MatScale(a->B,aa);
1242: return(0);
1243: }
1247: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1248: {
1249: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1253: #if defined(PETSC_USE_LOG)
1254: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1255: #endif
1256: MatStashDestroy_Private(&mat->stash);
1257: VecDestroy(&aij->diag);
1258: MatDestroy(&aij->A);
1259: MatDestroy(&aij->B);
1260: #if defined (PETSC_USE_CTABLE)
1261: PetscTableDestroy(&aij->colmap);
1262: #else
1263: PetscFree(aij->colmap);
1264: #endif
1265: PetscFree(aij->garray);
1266: VecDestroy(&aij->lvec);
1267: VecScatterDestroy(&aij->Mvctx);
1268: PetscFree2(aij->rowvalues,aij->rowindices);
1269: PetscFree(aij->ld);
1270: PetscFree(mat->data);
1272: PetscObjectChangeTypeName((PetscObject)mat,0);
1273: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1274: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1275: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1276: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
1277: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
1278: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
1279: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1280: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);
1281: return(0);
1282: }
1286: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1287: {
1288: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1289: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
1290: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
1291: PetscErrorCode ierr;
1292: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1293: int fd;
1294: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1295: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1296: PetscScalar *column_values;
1297: PetscInt message_count,flowcontrolcount;
1300: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1301: MPI_Comm_size(((PetscObject)mat)->comm,&size);
1302: nz = A->nz + B->nz;
1303: if (!rank) {
1304: header[0] = MAT_FILE_CLASSID;
1305: header[1] = mat->rmap->N;
1306: header[2] = mat->cmap->N;
1307: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1308: PetscViewerBinaryGetDescriptor(viewer,&fd);
1309: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1310: /* get largest number of rows any processor has */
1311: rlen = mat->rmap->n;
1312: range = mat->rmap->range;
1313: for (i=1; i<size; i++) {
1314: rlen = PetscMax(rlen,range[i+1] - range[i]);
1315: }
1316: } else {
1317: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1318: rlen = mat->rmap->n;
1319: }
1321: /* load up the local row counts */
1322: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
1323: for (i=0; i<mat->rmap->n; i++) {
1324: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1325: }
1327: /* store the row lengths to the file */
1328: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1329: if (!rank) {
1330: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1331: for (i=1; i<size; i++) {
1332: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1333: rlen = range[i+1] - range[i];
1334: MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1335: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1336: }
1337: PetscViewerFlowControlEndMaster(viewer,message_count);
1338: } else {
1339: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1340: MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1341: PetscViewerFlowControlEndWorker(viewer,message_count);
1342: }
1343: PetscFree(row_lengths);
1345: /* load up the local column indices */
1346: nzmax = nz; /* )th processor needs space a largest processor needs */
1347: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);
1348: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
1349: cnt = 0;
1350: for (i=0; i<mat->rmap->n; i++) {
1351: for (j=B->i[i]; j<B->i[i+1]; j++) {
1352: if ( (col = garray[B->j[j]]) > cstart) break;
1353: column_indices[cnt++] = col;
1354: }
1355: for (k=A->i[i]; k<A->i[i+1]; k++) {
1356: column_indices[cnt++] = A->j[k] + cstart;
1357: }
1358: for (; j<B->i[i+1]; j++) {
1359: column_indices[cnt++] = garray[B->j[j]];
1360: }
1361: }
1362: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1364: /* store the column indices to the file */
1365: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1366: if (!rank) {
1367: MPI_Status status;
1368: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1369: for (i=1; i<size; i++) {
1370: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1371: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1372: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1373: MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1374: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1375: }
1376: PetscViewerFlowControlEndMaster(viewer,message_count);
1377: } else {
1378: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1379: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1380: MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1381: PetscViewerFlowControlEndWorker(viewer,message_count);
1382: }
1383: PetscFree(column_indices);
1385: /* load up the local column values */
1386: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
1387: cnt = 0;
1388: for (i=0; i<mat->rmap->n; i++) {
1389: for (j=B->i[i]; j<B->i[i+1]; j++) {
1390: if ( garray[B->j[j]] > cstart) break;
1391: column_values[cnt++] = B->a[j];
1392: }
1393: for (k=A->i[i]; k<A->i[i+1]; k++) {
1394: column_values[cnt++] = A->a[k];
1395: }
1396: for (; j<B->i[i+1]; j++) {
1397: column_values[cnt++] = B->a[j];
1398: }
1399: }
1400: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1402: /* store the column values to the file */
1403: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1404: if (!rank) {
1405: MPI_Status status;
1406: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1407: for (i=1; i<size; i++) {
1408: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1409: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1410: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1411: MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);
1412: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1413: }
1414: PetscViewerFlowControlEndMaster(viewer,message_count);
1415: } else {
1416: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1417: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1418: MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
1419: PetscViewerFlowControlEndWorker(viewer,message_count);
1420: }
1421: PetscFree(column_values);
1422: return(0);
1423: }
1427: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1428: {
1429: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1430: PetscErrorCode ierr;
1431: PetscMPIInt rank = aij->rank,size = aij->size;
1432: PetscBool isdraw,iascii,isbinary;
1433: PetscViewer sviewer;
1434: PetscViewerFormat format;
1437: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1438: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1439: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1440: if (iascii) {
1441: PetscViewerGetFormat(viewer,&format);
1442: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1443: MatInfo info;
1444: PetscBool inodes;
1446: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1447: MatGetInfo(mat,MAT_LOCAL,&info);
1448: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
1449: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1450: if (!inodes) {
1451: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1452: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1453: } else {
1454: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1455: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1456: }
1457: MatGetInfo(aij->A,MAT_LOCAL,&info);
1458: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1459: MatGetInfo(aij->B,MAT_LOCAL,&info);
1460: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1461: PetscViewerFlush(viewer);
1462: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1463: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1464: VecScatterView(aij->Mvctx,viewer);
1465: return(0);
1466: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1467: PetscInt inodecount,inodelimit,*inodes;
1468: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1469: if (inodes) {
1470: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1471: } else {
1472: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1473: }
1474: return(0);
1475: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1476: return(0);
1477: }
1478: } else if (isbinary) {
1479: if (size == 1) {
1480: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1481: MatView(aij->A,viewer);
1482: } else {
1483: MatView_MPIAIJ_Binary(mat,viewer);
1484: }
1485: return(0);
1486: } else if (isdraw) {
1487: PetscDraw draw;
1488: PetscBool isnull;
1489: PetscViewerDrawGetDraw(viewer,0,&draw);
1490: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1491: }
1493: if (size == 1) {
1494: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1495: MatView(aij->A,viewer);
1496: } else {
1497: /* assemble the entire matrix onto first processor. */
1498: Mat A;
1499: Mat_SeqAIJ *Aloc;
1500: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1501: MatScalar *a;
1503: if (mat->rmap->N > 1024) {
1504: PetscBool flg = PETSC_FALSE;
1506: PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);
1507: if (!flg) {
1508: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1509: }
1510: }
1512: MatCreate(((PetscObject)mat)->comm,&A);
1513: if (!rank) {
1514: MatSetSizes(A,M,N,M,N);
1515: } else {
1516: MatSetSizes(A,0,0,M,N);
1517: }
1518: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1519: MatSetType(A,MATMPIAIJ);
1520: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
1521: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1522: PetscLogObjectParent(mat,A);
1524: /* copy over the A part */
1525: Aloc = (Mat_SeqAIJ*)aij->A->data;
1526: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1527: row = mat->rmap->rstart;
1528: for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1529: for (i=0; i<m; i++) {
1530: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1531: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1532: }
1533: aj = Aloc->j;
1534: for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1536: /* copy over the B part */
1537: Aloc = (Mat_SeqAIJ*)aij->B->data;
1538: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1539: row = mat->rmap->rstart;
1540: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
1541: ct = cols;
1542: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1543: for (i=0; i<m; i++) {
1544: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1545: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1546: }
1547: PetscFree(ct);
1548: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1549: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1550: /*
1551: Everyone has to call to draw the matrix since the graphics waits are
1552: synchronized across all processors that share the PetscDraw object
1553: */
1554: PetscViewerGetSingleton(viewer,&sviewer);
1555: if (!rank) {
1556: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
1557: /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1558: PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1559: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1560: }
1561: PetscViewerRestoreSingleton(viewer,&sviewer);
1562: MatDestroy(&A);
1563: }
1564: return(0);
1565: }
1569: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1570: {
1572: PetscBool iascii,isdraw,issocket,isbinary;
1573:
1575: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1576: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1577: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1578: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1579: if (iascii || isdraw || isbinary || issocket) {
1580: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1581: } else {
1582: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1583: }
1584: return(0);
1585: }
1589: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1590: {
1591: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1593: Vec bb1 = 0;
1594: PetscBool hasop;
1597: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1598: VecDuplicate(bb,&bb1);
1599: }
1601: if (flag == SOR_APPLY_UPPER) {
1602: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1603: return(0);
1604: }
1606: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1607: if (flag & SOR_ZERO_INITIAL_GUESS) {
1608: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1609: its--;
1610: }
1611:
1612: while (its--) {
1613: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1614: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1616: /* update rhs: bb1 = bb - B*x */
1617: VecScale(mat->lvec,-1.0);
1618: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1620: /* local sweep */
1621: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1622: }
1623: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1624: if (flag & SOR_ZERO_INITIAL_GUESS) {
1625: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1626: its--;
1627: }
1628: while (its--) {
1629: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1630: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1632: /* update rhs: bb1 = bb - B*x */
1633: VecScale(mat->lvec,-1.0);
1634: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1636: /* local sweep */
1637: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1638: }
1639: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1640: if (flag & SOR_ZERO_INITIAL_GUESS) {
1641: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1642: its--;
1643: }
1644: while (its--) {
1645: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1646: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1648: /* update rhs: bb1 = bb - B*x */
1649: VecScale(mat->lvec,-1.0);
1650: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1652: /* local sweep */
1653: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1654: }
1655: } else if (flag & SOR_EISENSTAT) {
1656: Vec xx1;
1658: VecDuplicate(bb,&xx1);
1659: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1661: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1662: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1663: if (!mat->diag) {
1664: MatGetVecs(matin,&mat->diag,PETSC_NULL);
1665: MatGetDiagonal(matin,mat->diag);
1666: }
1667: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1668: if (hasop) {
1669: MatMultDiagonalBlock(matin,xx,bb1);
1670: } else {
1671: VecPointwiseMult(bb1,mat->diag,xx);
1672: }
1673: VecAYPX(bb1,(omega-2.0)/omega,bb);
1675: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1677: /* local sweep */
1678: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1679: VecAXPY(xx,1.0,xx1);
1680: VecDestroy(&xx1);
1681: } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1683: VecDestroy(&bb1);
1684: return(0);
1685: }
1689: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1690: {
1691: MPI_Comm comm;
1692: PetscInt first,local_rowsize,local_colsize;
1693: const PetscInt *rows;
1694: IS crowp,growp,irowp,lrowp,lcolp,icolp;
1698: PetscObjectGetComm((PetscObject)A,&comm);
1699: /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1700: ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);
1701: /* collect the global row permutation and invert it */
1702: ISAllGather(crowp,&growp);
1703: ISSetPermutation(growp);
1704: ISDestroy(&crowp);
1705: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1706: ISDestroy(&growp);
1707: /* get the local target indices */
1708: MatGetOwnershipRange(A,&first,PETSC_NULL);
1709: MatGetLocalSize(A,&local_rowsize,&local_colsize);
1710: ISGetIndices(irowp,&rows);
1711: ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);
1712: ISRestoreIndices(irowp,&rows);
1713: ISDestroy(&irowp);
1714: /* the column permutation is so much easier;
1715: make a local version of 'colp' and invert it */
1716: ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);
1717: ISSetPermutation(lcolp);
1718: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1719: ISDestroy(&lcolp);
1720: /* now we just get the submatrix */
1721: MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);
1722: /* clean up */
1723: ISDestroy(&lrowp);
1724: ISDestroy(&icolp);
1725: return(0);
1726: }
1730: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1731: {
1732: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1733: Mat A = mat->A,B = mat->B;
1735: PetscReal isend[5],irecv[5];
1738: info->block_size = 1.0;
1739: MatGetInfo(A,MAT_LOCAL,info);
1740: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1741: isend[3] = info->memory; isend[4] = info->mallocs;
1742: MatGetInfo(B,MAT_LOCAL,info);
1743: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1744: isend[3] += info->memory; isend[4] += info->mallocs;
1745: if (flag == MAT_LOCAL) {
1746: info->nz_used = isend[0];
1747: info->nz_allocated = isend[1];
1748: info->nz_unneeded = isend[2];
1749: info->memory = isend[3];
1750: info->mallocs = isend[4];
1751: } else if (flag == MAT_GLOBAL_MAX) {
1752: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);
1753: info->nz_used = irecv[0];
1754: info->nz_allocated = irecv[1];
1755: info->nz_unneeded = irecv[2];
1756: info->memory = irecv[3];
1757: info->mallocs = irecv[4];
1758: } else if (flag == MAT_GLOBAL_SUM) {
1759: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);
1760: info->nz_used = irecv[0];
1761: info->nz_allocated = irecv[1];
1762: info->nz_unneeded = irecv[2];
1763: info->memory = irecv[3];
1764: info->mallocs = irecv[4];
1765: }
1766: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1767: info->fill_ratio_needed = 0;
1768: info->factor_mallocs = 0;
1770: return(0);
1771: }
1775: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1776: {
1777: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1781: switch (op) {
1782: case MAT_NEW_NONZERO_LOCATIONS:
1783: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1784: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1785: case MAT_KEEP_NONZERO_PATTERN:
1786: case MAT_NEW_NONZERO_LOCATION_ERR:
1787: case MAT_USE_INODES:
1788: case MAT_IGNORE_ZERO_ENTRIES:
1789: MatSetOption(a->A,op,flg);
1790: MatSetOption(a->B,op,flg);
1791: break;
1792: case MAT_ROW_ORIENTED:
1793: a->roworiented = flg;
1794: MatSetOption(a->A,op,flg);
1795: MatSetOption(a->B,op,flg);
1796: break;
1797: case MAT_NEW_DIAGONALS:
1798: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1799: break;
1800: case MAT_IGNORE_OFF_PROC_ENTRIES:
1801: a->donotstash = flg;
1802: break;
1803: case MAT_SPD:
1804: A->spd_set = PETSC_TRUE;
1805: A->spd = flg;
1806: if (flg) {
1807: A->symmetric = PETSC_TRUE;
1808: A->structurally_symmetric = PETSC_TRUE;
1809: A->symmetric_set = PETSC_TRUE;
1810: A->structurally_symmetric_set = PETSC_TRUE;
1811: }
1812: break;
1813: case MAT_SYMMETRIC:
1814: MatSetOption(a->A,op,flg);
1815: break;
1816: case MAT_STRUCTURALLY_SYMMETRIC:
1817: MatSetOption(a->A,op,flg);
1818: break;
1819: case MAT_HERMITIAN:
1820: MatSetOption(a->A,op,flg);
1821: break;
1822: case MAT_SYMMETRY_ETERNAL:
1823: MatSetOption(a->A,op,flg);
1824: break;
1825: default:
1826: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1827: }
1828: return(0);
1829: }
1833: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1834: {
1835: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1836: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1838: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1839: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1840: PetscInt *cmap,*idx_p;
1843: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1844: mat->getrowactive = PETSC_TRUE;
1846: if (!mat->rowvalues && (idx || v)) {
1847: /*
1848: allocate enough space to hold information from the longest row.
1849: */
1850: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1851: PetscInt max = 1,tmp;
1852: for (i=0; i<matin->rmap->n; i++) {
1853: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1854: if (max < tmp) { max = tmp; }
1855: }
1856: PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);
1857: }
1859: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1860: lrow = row - rstart;
1862: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1863: if (!v) {pvA = 0; pvB = 0;}
1864: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1865: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1866: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1867: nztot = nzA + nzB;
1869: cmap = mat->garray;
1870: if (v || idx) {
1871: if (nztot) {
1872: /* Sort by increasing column numbers, assuming A and B already sorted */
1873: PetscInt imark = -1;
1874: if (v) {
1875: *v = v_p = mat->rowvalues;
1876: for (i=0; i<nzB; i++) {
1877: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1878: else break;
1879: }
1880: imark = i;
1881: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1882: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1883: }
1884: if (idx) {
1885: *idx = idx_p = mat->rowindices;
1886: if (imark > -1) {
1887: for (i=0; i<imark; i++) {
1888: idx_p[i] = cmap[cworkB[i]];
1889: }
1890: } else {
1891: for (i=0; i<nzB; i++) {
1892: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1893: else break;
1894: }
1895: imark = i;
1896: }
1897: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1898: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1899: }
1900: } else {
1901: if (idx) *idx = 0;
1902: if (v) *v = 0;
1903: }
1904: }
1905: *nz = nztot;
1906: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1907: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1908: return(0);
1909: }
1913: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1914: {
1915: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1918: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1919: aij->getrowactive = PETSC_FALSE;
1920: return(0);
1921: }
1925: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1926: {
1927: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1928: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1930: PetscInt i,j,cstart = mat->cmap->rstart;
1931: PetscReal sum = 0.0;
1932: MatScalar *v;
1935: if (aij->size == 1) {
1936: MatNorm(aij->A,type,norm);
1937: } else {
1938: if (type == NORM_FROBENIUS) {
1939: v = amat->a;
1940: for (i=0; i<amat->nz; i++) {
1941: #if defined(PETSC_USE_COMPLEX)
1942: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1943: #else
1944: sum += (*v)*(*v); v++;
1945: #endif
1946: }
1947: v = bmat->a;
1948: for (i=0; i<bmat->nz; i++) {
1949: #if defined(PETSC_USE_COMPLEX)
1950: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1951: #else
1952: sum += (*v)*(*v); v++;
1953: #endif
1954: }
1955: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1956: *norm = PetscSqrtReal(*norm);
1957: } else if (type == NORM_1) { /* max column norm */
1958: PetscReal *tmp,*tmp2;
1959: PetscInt *jj,*garray = aij->garray;
1960: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);
1961: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);
1962: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
1963: *norm = 0.0;
1964: v = amat->a; jj = amat->j;
1965: for (j=0; j<amat->nz; j++) {
1966: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1967: }
1968: v = bmat->a; jj = bmat->j;
1969: for (j=0; j<bmat->nz; j++) {
1970: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1971: }
1972: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1973: for (j=0; j<mat->cmap->N; j++) {
1974: if (tmp2[j] > *norm) *norm = tmp2[j];
1975: }
1976: PetscFree(tmp);
1977: PetscFree(tmp2);
1978: } else if (type == NORM_INFINITY) { /* max row norm */
1979: PetscReal ntemp = 0.0;
1980: for (j=0; j<aij->A->rmap->n; j++) {
1981: v = amat->a + amat->i[j];
1982: sum = 0.0;
1983: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1984: sum += PetscAbsScalar(*v); v++;
1985: }
1986: v = bmat->a + bmat->i[j];
1987: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1988: sum += PetscAbsScalar(*v); v++;
1989: }
1990: if (sum > ntemp) ntemp = sum;
1991: }
1992: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);
1993: } else {
1994: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1995: }
1996: }
1997: return(0);
1998: }
2002: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2003: {
2004: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2005: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2007: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2008: PetscInt cstart=A->cmap->rstart,ncol;
2009: Mat B;
2010: MatScalar *array;
2013: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2015: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2016: ai = Aloc->i; aj = Aloc->j;
2017: bi = Bloc->i; bj = Bloc->j;
2018: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2019: /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2020: PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);
2021: PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));
2022: for (i=0; i<ai[ma]; i++){
2023: d_nnz[aj[i]] ++;
2024: aj[i] += cstart; /* global col index to be used by MatSetValues() */
2025: }
2027: MatCreate(((PetscObject)A)->comm,&B);
2028: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
2029: MatSetType(B,((PetscObject)A)->type_name);
2030: MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);
2031: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
2032: PetscFree(d_nnz);
2033: } else {
2034: B = *matout;
2035: }
2037: /* copy over the A part */
2038: array = Aloc->a;
2039: row = A->rmap->rstart;
2040: for (i=0; i<ma; i++) {
2041: ncol = ai[i+1]-ai[i];
2042: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
2043: row++; array += ncol; aj += ncol;
2044: }
2045: aj = Aloc->j;
2046: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2048: /* copy over the B part */
2049: PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);
2050: PetscMemzero(cols,bi[mb]*sizeof(PetscInt));
2051: array = Bloc->a;
2052: row = A->rmap->rstart;
2053: for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2054: cols_tmp = cols;
2055: for (i=0; i<mb; i++) {
2056: ncol = bi[i+1]-bi[i];
2057: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2058: row++; array += ncol; cols_tmp += ncol;
2059: }
2060: PetscFree(cols);
2061:
2062: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2063: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2064: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2065: *matout = B;
2066: } else {
2067: MatHeaderMerge(A,B);
2068: }
2069: return(0);
2070: }
2074: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2075: {
2076: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2077: Mat a = aij->A,b = aij->B;
2079: PetscInt s1,s2,s3;
2082: MatGetLocalSize(mat,&s2,&s3);
2083: if (rr) {
2084: VecGetLocalSize(rr,&s1);
2085: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2086: /* Overlap communication with computation. */
2087: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2088: }
2089: if (ll) {
2090: VecGetLocalSize(ll,&s1);
2091: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2092: (*b->ops->diagonalscale)(b,ll,0);
2093: }
2094: /* scale the diagonal block */
2095: (*a->ops->diagonalscale)(a,ll,rr);
2097: if (rr) {
2098: /* Do a scatter end and then right scale the off-diagonal block */
2099: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2100: (*b->ops->diagonalscale)(b,0,aij->lvec);
2101: }
2102:
2103: return(0);
2104: }
2108: PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2109: {
2110: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2114: MatSetBlockSize(a->A,bs);
2115: MatSetBlockSize(a->B,bs);
2116: PetscLayoutSetBlockSize(A->rmap,bs);
2117: PetscLayoutSetBlockSize(A->cmap,bs);
2118: return(0);
2119: }
2122: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2123: {
2124: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2128: MatSetUnfactored(a->A);
2129: return(0);
2130: }
2134: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2135: {
2136: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2137: Mat a,b,c,d;
2138: PetscBool flg;
2142: a = matA->A; b = matA->B;
2143: c = matB->A; d = matB->B;
2145: MatEqual(a,c,&flg);
2146: if (flg) {
2147: MatEqual(b,d,&flg);
2148: }
2149: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2150: return(0);
2151: }
2155: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2156: {
2158: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2159: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2162: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2163: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2164: /* because of the column compression in the off-processor part of the matrix a->B,
2165: the number of columns in a->B and b->B may be different, hence we cannot call
2166: the MatCopy() directly on the two parts. If need be, we can provide a more
2167: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2168: then copying the submatrices */
2169: MatCopy_Basic(A,B,str);
2170: } else {
2171: MatCopy(a->A,b->A,str);
2172: MatCopy(a->B,b->B,str);
2173: }
2174: return(0);
2175: }
2179: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2180: {
2184: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2185: return(0);
2186: }
2190: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2191: {
2193: PetscInt i;
2194: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2195: PetscBLASInt bnz,one=1;
2196: Mat_SeqAIJ *x,*y;
2199: if (str == SAME_NONZERO_PATTERN) {
2200: PetscScalar alpha = a;
2201: x = (Mat_SeqAIJ *)xx->A->data;
2202: y = (Mat_SeqAIJ *)yy->A->data;
2203: bnz = PetscBLASIntCast(x->nz);
2204: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2205: x = (Mat_SeqAIJ *)xx->B->data;
2206: y = (Mat_SeqAIJ *)yy->B->data;
2207: bnz = PetscBLASIntCast(x->nz);
2208: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2209: } else if (str == SUBSET_NONZERO_PATTERN) {
2210: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
2212: x = (Mat_SeqAIJ *)xx->B->data;
2213: y = (Mat_SeqAIJ *)yy->B->data;
2214: if (y->xtoy && y->XtoY != xx->B) {
2215: PetscFree(y->xtoy);
2216: MatDestroy(&y->XtoY);
2217: }
2218: if (!y->xtoy) { /* get xtoy */
2219: MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
2220: y->XtoY = xx->B;
2221: PetscObjectReference((PetscObject)xx->B);
2222: }
2223: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2224: } else {
2225: Mat B;
2226: PetscInt *nnz_d,*nnz_o;
2227: PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);
2228: PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);
2229: MatCreate(((PetscObject)Y)->comm,&B);
2230: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2231: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2232: MatSetType(B,MATMPIAIJ);
2233: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2234: MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);
2235: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2236: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2237: MatHeaderReplace(Y,B);
2238: PetscFree(nnz_d);
2239: PetscFree(nnz_o);
2240: }
2241: return(0);
2242: }
2244: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2248: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2249: {
2250: #if defined(PETSC_USE_COMPLEX)
2252: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2255: MatConjugate_SeqAIJ(aij->A);
2256: MatConjugate_SeqAIJ(aij->B);
2257: #else
2259: #endif
2260: return(0);
2261: }
2265: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2266: {
2267: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2271: MatRealPart(a->A);
2272: MatRealPart(a->B);
2273: return(0);
2274: }
2278: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2279: {
2280: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2284: MatImaginaryPart(a->A);
2285: MatImaginaryPart(a->B);
2286: return(0);
2287: }
2289: #ifdef PETSC_HAVE_PBGL
2291: #include <boost/parallel/mpi/bsp_process_group.hpp>
2292: #include <boost/graph/distributed/ilu_default_graph.hpp>
2293: #include <boost/graph/distributed/ilu_0_block.hpp>
2294: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2295: #include <boost/graph/distributed/petsc/interface.hpp>
2296: #include <boost/multi_array.hpp>
2297: #include <boost/parallel/distributed_property_map->hpp>
2301: /*
2302: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2303: */
2304: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2305: {
2306: namespace petsc = boost::distributed::petsc;
2307:
2308: namespace graph_dist = boost::graph::distributed;
2309: using boost::graph::distributed::ilu_default::process_group_type;
2310: using boost::graph::ilu_permuted;
2312: PetscBool row_identity, col_identity;
2313: PetscContainer c;
2314: PetscInt m, n, M, N;
2315: PetscErrorCode ierr;
2318: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2319: ISIdentity(isrow, &row_identity);
2320: ISIdentity(iscol, &col_identity);
2321: if (!row_identity || !col_identity) {
2322: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2323: }
2325: process_group_type pg;
2326: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2327: lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2328: lgraph_type& level_graph = *lgraph_p;
2329: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2331: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2332: ilu_permuted(level_graph);
2334: /* put together the new matrix */
2335: MatCreate(((PetscObject)A)->comm, fact);
2336: MatGetLocalSize(A, &m, &n);
2337: MatGetSize(A, &M, &N);
2338: MatSetSizes(fact, m, n, M, N);
2339: MatSetType(fact, ((PetscObject)A)->type_name);
2340: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2341: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2343: PetscContainerCreate(((PetscObject)A)->comm, &c);
2344: PetscContainerSetPointer(c, lgraph_p);
2345: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2346: PetscContainerDestroy(&c);
2347: return(0);
2348: }
2352: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2353: {
2355: return(0);
2356: }
2360: /*
2361: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2362: */
2363: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2364: {
2365: namespace graph_dist = boost::graph::distributed;
2367: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2368: lgraph_type* lgraph_p;
2369: PetscContainer c;
2373: PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);
2374: PetscContainerGetPointer(c, (void **) &lgraph_p);
2375: VecCopy(b, x);
2377: PetscScalar* array_x;
2378: VecGetArray(x, &array_x);
2379: PetscInt sx;
2380: VecGetSize(x, &sx);
2381:
2382: PetscScalar* array_b;
2383: VecGetArray(b, &array_b);
2384: PetscInt sb;
2385: VecGetSize(b, &sb);
2387: lgraph_type& level_graph = *lgraph_p;
2388: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2390: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2391: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]),
2392: ref_x(array_x, boost::extents[num_vertices(graph)]);
2394: typedef boost::iterator_property_map<array_ref_type::iterator,
2395: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2396: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2397: vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2398:
2399: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2401: return(0);
2402: }
2403: #endif
2405: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2406: PetscInt nzlocal,nsends,nrecvs;
2407: PetscMPIInt *send_rank,*recv_rank;
2408: PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2409: PetscScalar *sbuf_a,**rbuf_a;
2410: PetscErrorCode (*Destroy)(Mat);
2411: } Mat_Redundant;
2415: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2416: {
2417: PetscErrorCode ierr;
2418: Mat_Redundant *redund=(Mat_Redundant*)ptr;
2419: PetscInt i;
2422: PetscFree2(redund->send_rank,redund->recv_rank);
2423: PetscFree(redund->sbuf_j);
2424: PetscFree(redund->sbuf_a);
2425: for (i=0; i<redund->nrecvs; i++){
2426: PetscFree(redund->rbuf_j[i]);
2427: PetscFree(redund->rbuf_a[i]);
2428: }
2429: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
2430: PetscFree(redund);
2431: return(0);
2432: }
2436: PetscErrorCode MatDestroy_MatRedundant(Mat A)
2437: {
2438: PetscErrorCode ierr;
2439: PetscContainer container;
2440: Mat_Redundant *redund=PETSC_NULL;
2443: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
2444: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2445: PetscContainerGetPointer(container,(void **)&redund);
2446: A->ops->destroy = redund->Destroy;
2447: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
2448: if (A->ops->destroy) {
2449: (*A->ops->destroy)(A);
2450: }
2451: return(0);
2452: }
2456: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2457: {
2458: PetscMPIInt rank,size;
2459: MPI_Comm comm=((PetscObject)mat)->comm;
2461: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
2462: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2463: PetscInt *rowrange=mat->rmap->range;
2464: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2465: Mat A=aij->A,B=aij->B,C=*matredundant;
2466: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2467: PetscScalar *sbuf_a;
2468: PetscInt nzlocal=a->nz+b->nz;
2469: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2470: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2471: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2472: MatScalar *aworkA,*aworkB;
2473: PetscScalar *vals;
2474: PetscMPIInt tag1,tag2,tag3,imdex;
2475: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2476: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2477: MPI_Status recv_status,*send_status;
2478: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2479: PetscInt **rbuf_j=PETSC_NULL;
2480: PetscScalar **rbuf_a=PETSC_NULL;
2481: Mat_Redundant *redund=PETSC_NULL;
2482: PetscContainer container;
2485: MPI_Comm_rank(comm,&rank);
2486: MPI_Comm_size(comm,&size);
2488: if (reuse == MAT_REUSE_MATRIX) {
2489: MatGetSize(C,&M,&N);
2490: if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2491: MatGetLocalSize(C,&M,&N);
2492: if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2493: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
2494: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2495: PetscContainerGetPointer(container,(void **)&redund);
2496: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2498: nsends = redund->nsends;
2499: nrecvs = redund->nrecvs;
2500: send_rank = redund->send_rank;
2501: recv_rank = redund->recv_rank;
2502: sbuf_nz = redund->sbuf_nz;
2503: rbuf_nz = redund->rbuf_nz;
2504: sbuf_j = redund->sbuf_j;
2505: sbuf_a = redund->sbuf_a;
2506: rbuf_j = redund->rbuf_j;
2507: rbuf_a = redund->rbuf_a;
2508: }
2510: if (reuse == MAT_INITIAL_MATRIX){
2511: PetscMPIInt subrank,subsize;
2512: PetscInt nleftover,np_subcomm;
2513: /* get the destination processors' id send_rank, nsends and nrecvs */
2514: MPI_Comm_rank(subcomm,&subrank);
2515: MPI_Comm_size(subcomm,&subsize);
2516: PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2517: np_subcomm = size/nsubcomm;
2518: nleftover = size - nsubcomm*np_subcomm;
2519: nsends = 0; nrecvs = 0;
2520: for (i=0; i<size; i++){ /* i=rank*/
2521: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2522: send_rank[nsends] = i; nsends++;
2523: recv_rank[nrecvs++] = i;
2524: }
2525: }
2526: if (rank >= size - nleftover){/* this proc is a leftover processor */
2527: i = size-nleftover-1;
2528: j = 0;
2529: while (j < nsubcomm - nleftover){
2530: send_rank[nsends++] = i;
2531: i--; j++;
2532: }
2533: }
2535: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2536: for (i=0; i<nleftover; i++){
2537: recv_rank[nrecvs++] = size-nleftover+i;
2538: }
2539: }
2541: /* allocate sbuf_j, sbuf_a */
2542: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2543: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
2544: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
2545: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2547: /* copy mat's local entries into the buffers */
2548: if (reuse == MAT_INITIAL_MATRIX){
2549: rownz_max = 0;
2550: rptr = sbuf_j;
2551: cols = sbuf_j + rend-rstart + 1;
2552: vals = sbuf_a;
2553: rptr[0] = 0;
2554: for (i=0; i<rend-rstart; i++){
2555: row = i + rstart;
2556: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2557: ncols = nzA + nzB;
2558: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2559: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2560: /* load the column indices for this row into cols */
2561: lwrite = 0;
2562: for (l=0; l<nzB; l++) {
2563: if ((ctmp = bmap[cworkB[l]]) < cstart){
2564: vals[lwrite] = aworkB[l];
2565: cols[lwrite++] = ctmp;
2566: }
2567: }
2568: for (l=0; l<nzA; l++){
2569: vals[lwrite] = aworkA[l];
2570: cols[lwrite++] = cstart + cworkA[l];
2571: }
2572: for (l=0; l<nzB; l++) {
2573: if ((ctmp = bmap[cworkB[l]]) >= cend){
2574: vals[lwrite] = aworkB[l];
2575: cols[lwrite++] = ctmp;
2576: }
2577: }
2578: vals += ncols;
2579: cols += ncols;
2580: rptr[i+1] = rptr[i] + ncols;
2581: if (rownz_max < ncols) rownz_max = ncols;
2582: }
2583: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2584: } else { /* only copy matrix values into sbuf_a */
2585: rptr = sbuf_j;
2586: vals = sbuf_a;
2587: rptr[0] = 0;
2588: for (i=0; i<rend-rstart; i++){
2589: row = i + rstart;
2590: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2591: ncols = nzA + nzB;
2592: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2593: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2594: lwrite = 0;
2595: for (l=0; l<nzB; l++) {
2596: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2597: }
2598: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2599: for (l=0; l<nzB; l++) {
2600: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2601: }
2602: vals += ncols;
2603: rptr[i+1] = rptr[i] + ncols;
2604: }
2605: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2607: /* send nzlocal to others, and recv other's nzlocal */
2608: /*--------------------------------------------------*/
2609: if (reuse == MAT_INITIAL_MATRIX){
2610: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2611: s_waits2 = s_waits3 + nsends;
2612: s_waits1 = s_waits2 + nsends;
2613: r_waits1 = s_waits1 + nsends;
2614: r_waits2 = r_waits1 + nrecvs;
2615: r_waits3 = r_waits2 + nrecvs;
2616: } else {
2617: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2618: r_waits3 = s_waits3 + nsends;
2619: }
2621: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2622: if (reuse == MAT_INITIAL_MATRIX){
2623: /* get new tags to keep the communication clean */
2624: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2625: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2626: PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2628: /* post receives of other's nzlocal */
2629: for (i=0; i<nrecvs; i++){
2630: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2631: }
2632: /* send nzlocal to others */
2633: for (i=0; i<nsends; i++){
2634: sbuf_nz[i] = nzlocal;
2635: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2636: }
2637: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2638: count = nrecvs;
2639: while (count) {
2640: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2641: recv_rank[imdex] = recv_status.MPI_SOURCE;
2642: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2643: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2645: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2646: rbuf_nz[imdex] += i + 2;
2647: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2648: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2649: count--;
2650: }
2651: /* wait on sends of nzlocal */
2652: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2653: /* send mat->i,j to others, and recv from other's */
2654: /*------------------------------------------------*/
2655: for (i=0; i<nsends; i++){
2656: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2657: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2658: }
2659: /* wait on receives of mat->i,j */
2660: /*------------------------------*/
2661: count = nrecvs;
2662: while (count) {
2663: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2664: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2665: count--;
2666: }
2667: /* wait on sends of mat->i,j */
2668: /*---------------------------*/
2669: if (nsends) {
2670: MPI_Waitall(nsends,s_waits2,send_status);
2671: }
2672: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2674: /* post receives, send and receive mat->a */
2675: /*----------------------------------------*/
2676: for (imdex=0; imdex<nrecvs; imdex++) {
2677: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2678: }
2679: for (i=0; i<nsends; i++){
2680: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2681: }
2682: count = nrecvs;
2683: while (count) {
2684: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2685: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2686: count--;
2687: }
2688: if (nsends) {
2689: MPI_Waitall(nsends,s_waits3,send_status);
2690: }
2692: PetscFree2(s_waits3,send_status);
2694: /* create redundant matrix */
2695: /*-------------------------*/
2696: if (reuse == MAT_INITIAL_MATRIX){
2697: /* compute rownz_max for preallocation */
2698: for (imdex=0; imdex<nrecvs; imdex++){
2699: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2700: rptr = rbuf_j[imdex];
2701: for (i=0; i<j; i++){
2702: ncols = rptr[i+1] - rptr[i];
2703: if (rownz_max < ncols) rownz_max = ncols;
2704: }
2705: }
2707: MatCreate(subcomm,&C);
2708: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2709: MatSetFromOptions(C);
2710: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
2711: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
2712: } else {
2713: C = *matredundant;
2714: }
2716: /* insert local matrix entries */
2717: rptr = sbuf_j;
2718: cols = sbuf_j + rend-rstart + 1;
2719: vals = sbuf_a;
2720: for (i=0; i<rend-rstart; i++){
2721: row = i + rstart;
2722: ncols = rptr[i+1] - rptr[i];
2723: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2724: vals += ncols;
2725: cols += ncols;
2726: }
2727: /* insert received matrix entries */
2728: for (imdex=0; imdex<nrecvs; imdex++){
2729: rstart = rowrange[recv_rank[imdex]];
2730: rend = rowrange[recv_rank[imdex]+1];
2731: rptr = rbuf_j[imdex];
2732: cols = rbuf_j[imdex] + rend-rstart + 1;
2733: vals = rbuf_a[imdex];
2734: for (i=0; i<rend-rstart; i++){
2735: row = i + rstart;
2736: ncols = rptr[i+1] - rptr[i];
2737: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2738: vals += ncols;
2739: cols += ncols;
2740: }
2741: }
2742: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2743: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2744: MatGetSize(C,&M,&N);
2745: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2746: if (reuse == MAT_INITIAL_MATRIX) {
2747: PetscContainer container;
2748: *matredundant = C;
2749: /* create a supporting struct and attach it to C for reuse */
2750: PetscNewLog(C,Mat_Redundant,&redund);
2751: PetscContainerCreate(PETSC_COMM_SELF,&container);
2752: PetscContainerSetPointer(container,redund);
2753: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2754: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2755: PetscContainerDestroy(&container);
2757: redund->nzlocal = nzlocal;
2758: redund->nsends = nsends;
2759: redund->nrecvs = nrecvs;
2760: redund->send_rank = send_rank;
2761: redund->recv_rank = recv_rank;
2762: redund->sbuf_nz = sbuf_nz;
2763: redund->rbuf_nz = rbuf_nz;
2764: redund->sbuf_j = sbuf_j;
2765: redund->sbuf_a = sbuf_a;
2766: redund->rbuf_j = rbuf_j;
2767: redund->rbuf_a = rbuf_a;
2769: redund->Destroy = C->ops->destroy;
2770: C->ops->destroy = MatDestroy_MatRedundant;
2771: }
2772: return(0);
2773: }
2777: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2778: {
2779: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2781: PetscInt i,*idxb = 0;
2782: PetscScalar *va,*vb;
2783: Vec vtmp;
2786: MatGetRowMaxAbs(a->A,v,idx);
2787: VecGetArray(v,&va);
2788: if (idx) {
2789: for (i=0; i<A->rmap->n; i++) {
2790: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2791: }
2792: }
2794: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2795: if (idx) {
2796: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2797: }
2798: MatGetRowMaxAbs(a->B,vtmp,idxb);
2799: VecGetArray(vtmp,&vb);
2801: for (i=0; i<A->rmap->n; i++){
2802: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2803: va[i] = vb[i];
2804: if (idx) idx[i] = a->garray[idxb[i]];
2805: }
2806: }
2808: VecRestoreArray(v,&va);
2809: VecRestoreArray(vtmp,&vb);
2810: PetscFree(idxb);
2811: VecDestroy(&vtmp);
2812: return(0);
2813: }
2817: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2818: {
2819: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2821: PetscInt i,*idxb = 0;
2822: PetscScalar *va,*vb;
2823: Vec vtmp;
2826: MatGetRowMinAbs(a->A,v,idx);
2827: VecGetArray(v,&va);
2828: if (idx) {
2829: for (i=0; i<A->cmap->n; i++) {
2830: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2831: }
2832: }
2834: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2835: if (idx) {
2836: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2837: }
2838: MatGetRowMinAbs(a->B,vtmp,idxb);
2839: VecGetArray(vtmp,&vb);
2841: for (i=0; i<A->rmap->n; i++){
2842: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2843: va[i] = vb[i];
2844: if (idx) idx[i] = a->garray[idxb[i]];
2845: }
2846: }
2848: VecRestoreArray(v,&va);
2849: VecRestoreArray(vtmp,&vb);
2850: PetscFree(idxb);
2851: VecDestroy(&vtmp);
2852: return(0);
2853: }
2857: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2858: {
2859: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2860: PetscInt n = A->rmap->n;
2861: PetscInt cstart = A->cmap->rstart;
2862: PetscInt *cmap = mat->garray;
2863: PetscInt *diagIdx, *offdiagIdx;
2864: Vec diagV, offdiagV;
2865: PetscScalar *a, *diagA, *offdiagA;
2866: PetscInt r;
2870: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2871: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2872: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2873: MatGetRowMin(mat->A, diagV, diagIdx);
2874: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2875: VecGetArray(v, &a);
2876: VecGetArray(diagV, &diagA);
2877: VecGetArray(offdiagV, &offdiagA);
2878: for(r = 0; r < n; ++r) {
2879: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2880: a[r] = diagA[r];
2881: idx[r] = cstart + diagIdx[r];
2882: } else {
2883: a[r] = offdiagA[r];
2884: idx[r] = cmap[offdiagIdx[r]];
2885: }
2886: }
2887: VecRestoreArray(v, &a);
2888: VecRestoreArray(diagV, &diagA);
2889: VecRestoreArray(offdiagV, &offdiagA);
2890: VecDestroy(&diagV);
2891: VecDestroy(&offdiagV);
2892: PetscFree2(diagIdx, offdiagIdx);
2893: return(0);
2894: }
2898: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2899: {
2900: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2901: PetscInt n = A->rmap->n;
2902: PetscInt cstart = A->cmap->rstart;
2903: PetscInt *cmap = mat->garray;
2904: PetscInt *diagIdx, *offdiagIdx;
2905: Vec diagV, offdiagV;
2906: PetscScalar *a, *diagA, *offdiagA;
2907: PetscInt r;
2911: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2912: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2913: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2914: MatGetRowMax(mat->A, diagV, diagIdx);
2915: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2916: VecGetArray(v, &a);
2917: VecGetArray(diagV, &diagA);
2918: VecGetArray(offdiagV, &offdiagA);
2919: for(r = 0; r < n; ++r) {
2920: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2921: a[r] = diagA[r];
2922: idx[r] = cstart + diagIdx[r];
2923: } else {
2924: a[r] = offdiagA[r];
2925: idx[r] = cmap[offdiagIdx[r]];
2926: }
2927: }
2928: VecRestoreArray(v, &a);
2929: VecRestoreArray(diagV, &diagA);
2930: VecRestoreArray(offdiagV, &offdiagA);
2931: VecDestroy(&diagV);
2932: VecDestroy(&offdiagV);
2933: PetscFree2(diagIdx, offdiagIdx);
2934: return(0);
2935: }
2939: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2940: {
2942: Mat *dummy;
2945: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2946: *newmat = *dummy;
2947: PetscFree(dummy);
2948: return(0);
2949: }
2951: extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2955: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2956: {
2957: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
2961: MatInvertBlockDiagonal(a->A,values);
2962: return(0);
2963: }
2966: /* -------------------------------------------------------------------*/
2967: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2968: MatGetRow_MPIAIJ,
2969: MatRestoreRow_MPIAIJ,
2970: MatMult_MPIAIJ,
2971: /* 4*/ MatMultAdd_MPIAIJ,
2972: MatMultTranspose_MPIAIJ,
2973: MatMultTransposeAdd_MPIAIJ,
2974: #ifdef PETSC_HAVE_PBGL
2975: MatSolve_MPIAIJ,
2976: #else
2977: 0,
2978: #endif
2979: 0,
2980: 0,
2981: /*10*/ 0,
2982: 0,
2983: 0,
2984: MatSOR_MPIAIJ,
2985: MatTranspose_MPIAIJ,
2986: /*15*/ MatGetInfo_MPIAIJ,
2987: MatEqual_MPIAIJ,
2988: MatGetDiagonal_MPIAIJ,
2989: MatDiagonalScale_MPIAIJ,
2990: MatNorm_MPIAIJ,
2991: /*20*/ MatAssemblyBegin_MPIAIJ,
2992: MatAssemblyEnd_MPIAIJ,
2993: MatSetOption_MPIAIJ,
2994: MatZeroEntries_MPIAIJ,
2995: /*24*/ MatZeroRows_MPIAIJ,
2996: 0,
2997: #ifdef PETSC_HAVE_PBGL
2998: 0,
2999: #else
3000: 0,
3001: #endif
3002: 0,
3003: 0,
3004: /*29*/ MatSetUp_MPIAIJ,
3005: #ifdef PETSC_HAVE_PBGL
3006: 0,
3007: #else
3008: 0,
3009: #endif
3010: 0,
3011: 0,
3012: 0,
3013: /*34*/ MatDuplicate_MPIAIJ,
3014: 0,
3015: 0,
3016: 0,
3017: 0,
3018: /*39*/ MatAXPY_MPIAIJ,
3019: MatGetSubMatrices_MPIAIJ,
3020: MatIncreaseOverlap_MPIAIJ,
3021: MatGetValues_MPIAIJ,
3022: MatCopy_MPIAIJ,
3023: /*44*/ MatGetRowMax_MPIAIJ,
3024: MatScale_MPIAIJ,
3025: 0,
3026: 0,
3027: MatZeroRowsColumns_MPIAIJ,
3028: /*49*/ MatSetBlockSize_MPIAIJ,
3029: 0,
3030: 0,
3031: 0,
3032: 0,
3033: /*54*/ MatFDColoringCreate_MPIAIJ,
3034: 0,
3035: MatSetUnfactored_MPIAIJ,
3036: MatPermute_MPIAIJ,
3037: 0,
3038: /*59*/ MatGetSubMatrix_MPIAIJ,
3039: MatDestroy_MPIAIJ,
3040: MatView_MPIAIJ,
3041: 0,
3042: 0,
3043: /*64*/ 0,
3044: 0,
3045: 0,
3046: 0,
3047: 0,
3048: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3049: MatGetRowMinAbs_MPIAIJ,
3050: 0,
3051: MatSetColoring_MPIAIJ,
3052: #if defined(PETSC_HAVE_ADIC)
3053: MatSetValuesAdic_MPIAIJ,
3054: #else
3055: 0,
3056: #endif
3057: MatSetValuesAdifor_MPIAIJ,
3058: /*75*/ MatFDColoringApply_AIJ,
3059: 0,
3060: 0,
3061: 0,
3062: 0,
3063: /*80*/ 0,
3064: 0,
3065: 0,
3066: /*83*/ MatLoad_MPIAIJ,
3067: 0,
3068: 0,
3069: 0,
3070: 0,
3071: 0,
3072: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3073: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3074: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3075: MatPtAP_Basic,
3076: MatPtAPSymbolic_MPIAIJ,
3077: /*94*/ MatPtAPNumeric_MPIAIJ,
3078: 0,
3079: 0,
3080: 0,
3081: 0,
3082: /*99*/ 0,
3083: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3084: MatPtAPNumeric_MPIAIJ_MPIAIJ,
3085: MatConjugate_MPIAIJ,
3086: 0,
3087: /*104*/MatSetValuesRow_MPIAIJ,
3088: MatRealPart_MPIAIJ,
3089: MatImaginaryPart_MPIAIJ,
3090: 0,
3091: 0,
3092: /*109*/0,
3093: MatGetRedundantMatrix_MPIAIJ,
3094: MatGetRowMin_MPIAIJ,
3095: 0,
3096: 0,
3097: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3098: 0,
3099: 0,
3100: 0,
3101: 0,
3102: /*119*/0,
3103: 0,
3104: 0,
3105: 0,
3106: MatGetMultiProcBlock_MPIAIJ,
3107: /*124*/MatFindNonZeroRows_MPIAIJ,
3108: MatGetColumnNorms_MPIAIJ,
3109: MatInvertBlockDiagonal_MPIAIJ,
3110: 0,
3111: MatGetSubMatricesParallel_MPIAIJ,
3112: /*129*/0,
3113: MatTransposeMatMult_MPIAIJ_MPIAIJ,
3114: MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3115: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3116: 0,
3117: /*134*/0,
3118: 0,
3119: 0,
3120: 0,
3121: 0
3122: };
3124: /* ----------------------------------------------------------------------------------------*/
3126: EXTERN_C_BEGIN
3129: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3130: {
3131: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3135: MatStoreValues(aij->A);
3136: MatStoreValues(aij->B);
3137: return(0);
3138: }
3139: EXTERN_C_END
3141: EXTERN_C_BEGIN
3144: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3145: {
3146: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3150: MatRetrieveValues(aij->A);
3151: MatRetrieveValues(aij->B);
3152: return(0);
3153: }
3154: EXTERN_C_END
3156: EXTERN_C_BEGIN
3159: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3160: {
3161: Mat_MPIAIJ *b;
3163: PetscInt i;
3164: PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3167: if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3168: if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3169: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3170: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3171: if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3172: if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3174: PetscLayoutSetBlockSize(B->rmap,1);
3175: PetscLayoutSetBlockSize(B->cmap,1);
3176: PetscLayoutSetUp(B->rmap);
3177: PetscLayoutSetUp(B->cmap);
3178: if (d_nnz) {
3179: for (i=0; i<B->rmap->n; i++) {
3180: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3181: }
3182: }
3183: if (o_nnz) {
3184: for (i=0; i<B->rmap->n; i++) {
3185: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3186: }
3187: }
3188: b = (Mat_MPIAIJ*)B->data;
3190: if (!B->preallocated) {
3191: /* Explicitly create 2 MATSEQAIJ matrices. */
3192: MatCreate(PETSC_COMM_SELF,&b->A);
3193: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3194: MatSetType(b->A,MATSEQAIJ);
3195: PetscLogObjectParent(B,b->A);
3196: MatCreate(PETSC_COMM_SELF,&b->B);
3197: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3198: MatSetType(b->B,MATSEQAIJ);
3199: PetscLogObjectParent(B,b->B);
3200: }
3202: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3203: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3204: /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3205: if (!d_realalloc) {MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);}
3206: if (!o_realalloc) {MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);}
3207: B->preallocated = PETSC_TRUE;
3208: return(0);
3209: }
3210: EXTERN_C_END
3214: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3215: {
3216: Mat mat;
3217: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3221: *newmat = 0;
3222: MatCreate(((PetscObject)matin)->comm,&mat);
3223: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3224: MatSetType(mat,((PetscObject)matin)->type_name);
3225: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3226: a = (Mat_MPIAIJ*)mat->data;
3227:
3228: mat->factortype = matin->factortype;
3229: mat->rmap->bs = matin->rmap->bs;
3230: mat->assembled = PETSC_TRUE;
3231: mat->insertmode = NOT_SET_VALUES;
3232: mat->preallocated = PETSC_TRUE;
3234: a->size = oldmat->size;
3235: a->rank = oldmat->rank;
3236: a->donotstash = oldmat->donotstash;
3237: a->roworiented = oldmat->roworiented;
3238: a->rowindices = 0;
3239: a->rowvalues = 0;
3240: a->getrowactive = PETSC_FALSE;
3242: PetscLayoutReference(matin->rmap,&mat->rmap);
3243: PetscLayoutReference(matin->cmap,&mat->cmap);
3245: if (oldmat->colmap) {
3246: #if defined (PETSC_USE_CTABLE)
3247: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3248: #else
3249: PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);
3250: PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));
3251: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3252: #endif
3253: } else a->colmap = 0;
3254: if (oldmat->garray) {
3255: PetscInt len;
3256: len = oldmat->B->cmap->n;
3257: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
3258: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
3259: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3260: } else a->garray = 0;
3261:
3262: VecDuplicate(oldmat->lvec,&a->lvec);
3263: PetscLogObjectParent(mat,a->lvec);
3264: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3265: PetscLogObjectParent(mat,a->Mvctx);
3266: MatDuplicate(oldmat->A,cpvalues,&a->A);
3267: PetscLogObjectParent(mat,a->A);
3268: MatDuplicate(oldmat->B,cpvalues,&a->B);
3269: PetscLogObjectParent(mat,a->B);
3270: PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3271: *newmat = mat;
3272: return(0);
3273: }
3279: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3280: {
3281: PetscScalar *vals,*svals;
3282: MPI_Comm comm = ((PetscObject)viewer)->comm;
3284: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3285: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3286: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3287: PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3288: PetscInt cend,cstart,n,*rowners,sizesset=1;
3289: int fd;
3292: MPI_Comm_size(comm,&size);
3293: MPI_Comm_rank(comm,&rank);
3294: if (!rank) {
3295: PetscViewerBinaryGetDescriptor(viewer,&fd);
3296: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
3297: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3298: }
3300: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3302: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3303: M = header[1]; N = header[2];
3304: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3305: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3306: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3307:
3308: /* If global sizes are set, check if they are consistent with that given in the file */
3309: if (sizesset) {
3310: MatGetSize(newMat,&grows,&gcols);
3311: }
3312: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3313: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3315: /* determine ownership of all rows */
3316: if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3317: else m = newMat->rmap->n; /* Set by user */
3318:
3319: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
3320: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3322: /* First process needs enough room for process with most rows */
3323: if (!rank) {
3324: mmax = rowners[1];
3325: for (i=2; i<size; i++) {
3326: mmax = PetscMax(mmax,rowners[i]);
3327: }
3328: } else mmax = m;
3330: rowners[0] = 0;
3331: for (i=2; i<=size; i++) {
3332: rowners[i] += rowners[i-1];
3333: }
3334: rstart = rowners[rank];
3335: rend = rowners[rank+1];
3337: /* distribute row lengths to all processors */
3338: PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);
3339: if (!rank) {
3340: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3341: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
3342: PetscMalloc(size*sizeof(PetscInt),&procsnz);
3343: PetscMemzero(procsnz,size*sizeof(PetscInt));
3344: for (j=0; j<m; j++) {
3345: procsnz[0] += ourlens[j];
3346: }
3347: for (i=1; i<size; i++) {
3348: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3349: /* calculate the number of nonzeros on each processor */
3350: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3351: procsnz[i] += rowlengths[j];
3352: }
3353: MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3354: }
3355: PetscFree(rowlengths);
3356: } else {
3357: MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3358: }
3360: if (!rank) {
3361: /* determine max buffer needed and allocate it */
3362: maxnz = 0;
3363: for (i=0; i<size; i++) {
3364: maxnz = PetscMax(maxnz,procsnz[i]);
3365: }
3366: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
3368: /* read in my part of the matrix column indices */
3369: nz = procsnz[0];
3370: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3371: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3373: /* read in every one elses and ship off */
3374: for (i=1; i<size; i++) {
3375: nz = procsnz[i];
3376: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3377: MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);
3378: }
3379: PetscFree(cols);
3380: } else {
3381: /* determine buffer space needed for message */
3382: nz = 0;
3383: for (i=0; i<m; i++) {
3384: nz += ourlens[i];
3385: }
3386: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3388: /* receive message of column indices*/
3389: MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3390: }
3392: /* determine column ownership if matrix is not square */
3393: if (N != M) {
3394: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3395: else n = newMat->cmap->n;
3396: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3397: cstart = cend - n;
3398: } else {
3399: cstart = rstart;
3400: cend = rend;
3401: n = cend - cstart;
3402: }
3404: /* loop over local rows, determining number of off diagonal entries */
3405: PetscMemzero(offlens,m*sizeof(PetscInt));
3406: jj = 0;
3407: for (i=0; i<m; i++) {
3408: for (j=0; j<ourlens[i]; j++) {
3409: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3410: jj++;
3411: }
3412: }
3414: for (i=0; i<m; i++) {
3415: ourlens[i] -= offlens[i];
3416: }
3417: if (!sizesset) {
3418: MatSetSizes(newMat,m,n,M,N);
3419: }
3420: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3422: for (i=0; i<m; i++) {
3423: ourlens[i] += offlens[i];
3424: }
3426: if (!rank) {
3427: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
3429: /* read in my part of the matrix numerical values */
3430: nz = procsnz[0];
3431: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3432:
3433: /* insert into matrix */
3434: jj = rstart;
3435: smycols = mycols;
3436: svals = vals;
3437: for (i=0; i<m; i++) {
3438: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3439: smycols += ourlens[i];
3440: svals += ourlens[i];
3441: jj++;
3442: }
3444: /* read in other processors and ship out */
3445: for (i=1; i<size; i++) {
3446: nz = procsnz[i];
3447: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3448: MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3449: }
3450: PetscFree(procsnz);
3451: } else {
3452: /* receive numeric values */
3453: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
3455: /* receive message of values*/
3456: MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3458: /* insert into matrix */
3459: jj = rstart;
3460: smycols = mycols;
3461: svals = vals;
3462: for (i=0; i<m; i++) {
3463: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3464: smycols += ourlens[i];
3465: svals += ourlens[i];
3466: jj++;
3467: }
3468: }
3469: PetscFree2(ourlens,offlens);
3470: PetscFree(vals);
3471: PetscFree(mycols);
3472: PetscFree(rowners);
3474: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3475: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3476: return(0);
3477: }
3481: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3482: {
3484: IS iscol_local;
3485: PetscInt csize;
3488: ISGetLocalSize(iscol,&csize);
3489: if (call == MAT_REUSE_MATRIX) {
3490: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3491: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3492: } else {
3493: ISAllGather(iscol,&iscol_local);
3494: }
3495: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3496: if (call == MAT_INITIAL_MATRIX) {
3497: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3498: ISDestroy(&iscol_local);
3499: }
3500: return(0);
3501: }
3505: /*
3506: Not great since it makes two copies of the submatrix, first an SeqAIJ
3507: in local and then by concatenating the local matrices the end result.
3508: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3510: Note: This requires a sequential iscol with all indices.
3511: */
3512: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3513: {
3515: PetscMPIInt rank,size;
3516: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j;
3517: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518: Mat *local,M,Mreuse;
3519: MatScalar *vwork,*aa;
3520: MPI_Comm comm = ((PetscObject)mat)->comm;
3521: Mat_SeqAIJ *aij;
3525: MPI_Comm_rank(comm,&rank);
3526: MPI_Comm_size(comm,&size);
3528: if (call == MAT_REUSE_MATRIX) {
3529: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
3530: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3531: local = &Mreuse;
3532: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
3533: } else {
3534: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
3535: Mreuse = *local;
3536: PetscFree(local);
3537: }
3539: /*
3540: m - number of local rows
3541: n - number of columns (same on all processors)
3542: rstart - first row in new global matrix generated
3543: */
3544: MatGetSize(Mreuse,&m,&n);
3545: if (call == MAT_INITIAL_MATRIX) {
3546: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3547: ii = aij->i;
3548: jj = aij->j;
3550: /*
3551: Determine the number of non-zeros in the diagonal and off-diagonal
3552: portions of the matrix in order to do correct preallocation
3553: */
3555: /* first get start and end of "diagonal" columns */
3556: if (csize == PETSC_DECIDE) {
3557: ISGetSize(isrow,&mglobal);
3558: if (mglobal == n) { /* square matrix */
3559: nlocal = m;
3560: } else {
3561: nlocal = n/size + ((n % size) > rank);
3562: }
3563: } else {
3564: nlocal = csize;
3565: }
3566: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3567: rstart = rend - nlocal;
3568: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3570: /* next, compute all the lengths */
3571: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
3572: olens = dlens + m;
3573: for (i=0; i<m; i++) {
3574: jend = ii[i+1] - ii[i];
3575: olen = 0;
3576: dlen = 0;
3577: for (j=0; j<jend; j++) {
3578: if (*jj < rstart || *jj >= rend) olen++;
3579: else dlen++;
3580: jj++;
3581: }
3582: olens[i] = olen;
3583: dlens[i] = dlen;
3584: }
3585: MatCreate(comm,&M);
3586: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3587: MatSetType(M,((PetscObject)mat)->type_name);
3588: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3589: PetscFree(dlens);
3590: } else {
3591: PetscInt ml,nl;
3593: M = *newmat;
3594: MatGetLocalSize(M,&ml,&nl);
3595: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3596: MatZeroEntries(M);
3597: /*
3598: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3599: rather than the slower MatSetValues().
3600: */
3601: M->was_assembled = PETSC_TRUE;
3602: M->assembled = PETSC_FALSE;
3603: }
3604: MatGetOwnershipRange(M,&rstart,&rend);
3605: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3606: ii = aij->i;
3607: jj = aij->j;
3608: aa = aij->a;
3609: for (i=0; i<m; i++) {
3610: row = rstart + i;
3611: nz = ii[i+1] - ii[i];
3612: cwork = jj; jj += nz;
3613: vwork = aa; aa += nz;
3614: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3615: }
3617: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3618: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3619: *newmat = M;
3621: /* save submatrix used in processor for next request */
3622: if (call == MAT_INITIAL_MATRIX) {
3623: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3624: MatDestroy(&Mreuse);
3625: }
3627: return(0);
3628: }
3630: EXTERN_C_BEGIN
3633: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3634: {
3635: PetscInt m,cstart, cend,j,nnz,i,d;
3636: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3637: const PetscInt *JJ;
3638: PetscScalar *values;
3642: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3644: PetscLayoutSetBlockSize(B->rmap,1);
3645: PetscLayoutSetBlockSize(B->cmap,1);
3646: PetscLayoutSetUp(B->rmap);
3647: PetscLayoutSetUp(B->cmap);
3648: m = B->rmap->n;
3649: cstart = B->cmap->rstart;
3650: cend = B->cmap->rend;
3651: rstart = B->rmap->rstart;
3653: PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);
3655: #if defined(PETSC_USE_DEBUGGING)
3656: for (i=0; i<m; i++) {
3657: nnz = Ii[i+1]- Ii[i];
3658: JJ = J + Ii[i];
3659: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3660: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3661: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3662: }
3663: #endif
3665: for (i=0; i<m; i++) {
3666: nnz = Ii[i+1]- Ii[i];
3667: JJ = J + Ii[i];
3668: nnz_max = PetscMax(nnz_max,nnz);
3669: d = 0;
3670: for (j=0; j<nnz; j++) {
3671: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3672: }
3673: d_nnz[i] = d;
3674: o_nnz[i] = nnz - d;
3675: }
3676: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3677: PetscFree2(d_nnz,o_nnz);
3679: if (v) values = (PetscScalar*)v;
3680: else {
3681: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
3682: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
3683: }
3685: for (i=0; i<m; i++) {
3686: ii = i + rstart;
3687: nnz = Ii[i+1]- Ii[i];
3688: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3689: }
3690: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3691: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3693: if (!v) {
3694: PetscFree(values);
3695: }
3696: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3697: return(0);
3698: }
3699: EXTERN_C_END
3703: /*@
3704: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3705: (the default parallel PETSc format).
3707: Collective on MPI_Comm
3709: Input Parameters:
3710: + B - the matrix
3711: . i - the indices into j for the start of each local row (starts with zero)
3712: . j - the column indices for each local row (starts with zero)
3713: - v - optional values in the matrix
3715: Level: developer
3717: Notes:
3718: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3719: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3720: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3722: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3724: The format which is used for the sparse matrix input, is equivalent to a
3725: row-major ordering.. i.e for the following matrix, the input data expected is
3726: as shown:
3728: 1 0 0
3729: 2 0 3 P0
3730: -------
3731: 4 5 6 P1
3733: Process0 [P0]: rows_owned=[0,1]
3734: i = {0,1,3} [size = nrow+1 = 2+1]
3735: j = {0,0,2} [size = nz = 6]
3736: v = {1,2,3} [size = nz = 6]
3738: Process1 [P1]: rows_owned=[2]
3739: i = {0,3} [size = nrow+1 = 1+1]
3740: j = {0,1,2} [size = nz = 6]
3741: v = {4,5,6} [size = nz = 6]
3743: .keywords: matrix, aij, compressed row, sparse, parallel
3745: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3746: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3747: @*/
3748: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3749: {
3753: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3754: return(0);
3755: }
3759: /*@C
3760: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3761: (the default parallel PETSc format). For good matrix assembly performance
3762: the user should preallocate the matrix storage by setting the parameters
3763: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3764: performance can be increased by more than a factor of 50.
3766: Collective on MPI_Comm
3768: Input Parameters:
3769: + A - the matrix
3770: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3771: (same value is used for all local rows)
3772: . d_nnz - array containing the number of nonzeros in the various rows of the
3773: DIAGONAL portion of the local submatrix (possibly different for each row)
3774: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3775: The size of this array is equal to the number of local rows, i.e 'm'.
3776: For matrices that will be factored, you must leave room for (and set)
3777: the diagonal entry even if it is zero.
3778: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3779: submatrix (same value is used for all local rows).
3780: - o_nnz - array containing the number of nonzeros in the various rows of the
3781: OFF-DIAGONAL portion of the local submatrix (possibly different for
3782: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3783: structure. The size of this array is equal to the number
3784: of local rows, i.e 'm'.
3786: If the *_nnz parameter is given then the *_nz parameter is ignored
3788: The AIJ format (also called the Yale sparse matrix format or
3789: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3790: storage. The stored row and column indices begin with zero.
3791: See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3793: The parallel matrix is partitioned such that the first m0 rows belong to
3794: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3795: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3797: The DIAGONAL portion of the local submatrix of a processor can be defined
3798: as the submatrix which is obtained by extraction the part corresponding to
3799: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3800: first row that belongs to the processor, r2 is the last row belonging to
3801: the this processor, and c1-c2 is range of indices of the local part of a
3802: vector suitable for applying the matrix to. This is an mxn matrix. In the
3803: common case of a square matrix, the row and column ranges are the same and
3804: the DIAGONAL part is also square. The remaining portion of the local
3805: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3807: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3809: You can call MatGetInfo() to get information on how effective the preallocation was;
3810: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3811: You can also run with the option -info and look for messages with the string
3812: malloc in them to see if additional memory allocation was needed.
3814: Example usage:
3815:
3816: Consider the following 8x8 matrix with 34 non-zero values, that is
3817: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3818: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3819: as follows:
3821: .vb
3822: 1 2 0 | 0 3 0 | 0 4
3823: Proc0 0 5 6 | 7 0 0 | 8 0
3824: 9 0 10 | 11 0 0 | 12 0
3825: -------------------------------------
3826: 13 0 14 | 15 16 17 | 0 0
3827: Proc1 0 18 0 | 19 20 21 | 0 0
3828: 0 0 0 | 22 23 0 | 24 0
3829: -------------------------------------
3830: Proc2 25 26 27 | 0 0 28 | 29 0
3831: 30 0 0 | 31 32 33 | 0 34
3832: .ve
3834: This can be represented as a collection of submatrices as:
3836: .vb
3837: A B C
3838: D E F
3839: G H I
3840: .ve
3842: Where the submatrices A,B,C are owned by proc0, D,E,F are
3843: owned by proc1, G,H,I are owned by proc2.
3845: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3846: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3847: The 'M','N' parameters are 8,8, and have the same values on all procs.
3849: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3850: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3851: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3852: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3853: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3854: matrix, ans [DF] as another SeqAIJ matrix.
3856: When d_nz, o_nz parameters are specified, d_nz storage elements are
3857: allocated for every row of the local diagonal submatrix, and o_nz
3858: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3859: One way to choose d_nz and o_nz is to use the max nonzerors per local
3860: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3861: In this case, the values of d_nz,o_nz are:
3862: .vb
3863: proc0 : dnz = 2, o_nz = 2
3864: proc1 : dnz = 3, o_nz = 2
3865: proc2 : dnz = 1, o_nz = 4
3866: .ve
3867: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3868: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3869: for proc3. i.e we are using 12+15+10=37 storage locations to store
3870: 34 values.
3872: When d_nnz, o_nnz parameters are specified, the storage is specified
3873: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3874: In the above case the values for d_nnz,o_nnz are:
3875: .vb
3876: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3877: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3878: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3879: .ve
3880: Here the space allocated is sum of all the above values i.e 34, and
3881: hence pre-allocation is perfect.
3883: Level: intermediate
3885: .keywords: matrix, aij, compressed row, sparse, parallel
3887: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3888: MPIAIJ, MatGetInfo()
3889: @*/
3890: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3891: {
3897: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3898: return(0);
3899: }
3903: /*@
3904: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3905: CSR format the local rows.
3907: Collective on MPI_Comm
3909: Input Parameters:
3910: + comm - MPI communicator
3911: . m - number of local rows (Cannot be PETSC_DECIDE)
3912: . n - This value should be the same as the local size used in creating the
3913: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3914: calculated if N is given) For square matrices n is almost always m.
3915: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3916: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3917: . i - row indices
3918: . j - column indices
3919: - a - matrix values
3921: Output Parameter:
3922: . mat - the matrix
3924: Level: intermediate
3926: Notes:
3927: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3928: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3929: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3931: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3933: The format which is used for the sparse matrix input, is equivalent to a
3934: row-major ordering.. i.e for the following matrix, the input data expected is
3935: as shown:
3937: 1 0 0
3938: 2 0 3 P0
3939: -------
3940: 4 5 6 P1
3942: Process0 [P0]: rows_owned=[0,1]
3943: i = {0,1,3} [size = nrow+1 = 2+1]
3944: j = {0,0,2} [size = nz = 6]
3945: v = {1,2,3} [size = nz = 6]
3947: Process1 [P1]: rows_owned=[2]
3948: i = {0,3} [size = nrow+1 = 1+1]
3949: j = {0,1,2} [size = nz = 6]
3950: v = {4,5,6} [size = nz = 6]
3952: .keywords: matrix, aij, compressed row, sparse, parallel
3954: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3955: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3956: @*/
3957: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3958: {
3962: if (i[0]) {
3963: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3964: }
3965: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3966: MatCreate(comm,mat);
3967: MatSetSizes(*mat,m,n,M,N);
3968: MatSetType(*mat,MATMPIAIJ);
3969: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
3970: return(0);
3971: }
3975: /*@C
3976: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3977: (the default parallel PETSc format). For good matrix assembly performance
3978: the user should preallocate the matrix storage by setting the parameters
3979: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3980: performance can be increased by more than a factor of 50.
3982: Collective on MPI_Comm
3984: Input Parameters:
3985: + comm - MPI communicator
3986: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3987: This value should be the same as the local size used in creating the
3988: y vector for the matrix-vector product y = Ax.
3989: . n - This value should be the same as the local size used in creating the
3990: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3991: calculated if N is given) For square matrices n is almost always m.
3992: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3993: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3994: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3995: (same value is used for all local rows)
3996: . d_nnz - array containing the number of nonzeros in the various rows of the
3997: DIAGONAL portion of the local submatrix (possibly different for each row)
3998: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3999: The size of this array is equal to the number of local rows, i.e 'm'.
4000: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4001: submatrix (same value is used for all local rows).
4002: - o_nnz - array containing the number of nonzeros in the various rows of the
4003: OFF-DIAGONAL portion of the local submatrix (possibly different for
4004: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4005: structure. The size of this array is equal to the number
4006: of local rows, i.e 'm'.
4008: Output Parameter:
4009: . A - the matrix
4011: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4012: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4013: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4015: Notes:
4016: If the *_nnz parameter is given then the *_nz parameter is ignored
4018: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4019: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4020: storage requirements for this matrix.
4022: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4023: processor than it must be used on all processors that share the object for
4024: that argument.
4026: The user MUST specify either the local or global matrix dimensions
4027: (possibly both).
4029: The parallel matrix is partitioned across processors such that the
4030: first m0 rows belong to process 0, the next m1 rows belong to
4031: process 1, the next m2 rows belong to process 2 etc.. where
4032: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4033: values corresponding to [m x N] submatrix.
4035: The columns are logically partitioned with the n0 columns belonging
4036: to 0th partition, the next n1 columns belonging to the next
4037: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4039: The DIAGONAL portion of the local submatrix on any given processor
4040: is the submatrix corresponding to the rows and columns m,n
4041: corresponding to the given processor. i.e diagonal matrix on
4042: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4043: etc. The remaining portion of the local submatrix [m x (N-n)]
4044: constitute the OFF-DIAGONAL portion. The example below better
4045: illustrates this concept.
4047: For a square global matrix we define each processor's diagonal portion
4048: to be its local rows and the corresponding columns (a square submatrix);
4049: each processor's off-diagonal portion encompasses the remainder of the
4050: local matrix (a rectangular submatrix).
4052: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4054: When calling this routine with a single process communicator, a matrix of
4055: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4056: type of communicator, use the construction mechanism:
4057: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4058:
4059: By default, this format uses inodes (identical nodes) when possible.
4060: We search for consecutive rows with the same nonzero structure, thereby
4061: reusing matrix information to achieve increased efficiency.
4063: Options Database Keys:
4064: + -mat_no_inode - Do not use inodes
4065: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4066: - -mat_aij_oneindex - Internally use indexing starting at 1
4067: rather than 0. Note that when calling MatSetValues(),
4068: the user still MUST index entries starting at 0!
4071: Example usage:
4072:
4073: Consider the following 8x8 matrix with 34 non-zero values, that is
4074: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4075: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4076: as follows:
4078: .vb
4079: 1 2 0 | 0 3 0 | 0 4
4080: Proc0 0 5 6 | 7 0 0 | 8 0
4081: 9 0 10 | 11 0 0 | 12 0
4082: -------------------------------------
4083: 13 0 14 | 15 16 17 | 0 0
4084: Proc1 0 18 0 | 19 20 21 | 0 0
4085: 0 0 0 | 22 23 0 | 24 0
4086: -------------------------------------
4087: Proc2 25 26 27 | 0 0 28 | 29 0
4088: 30 0 0 | 31 32 33 | 0 34
4089: .ve
4091: This can be represented as a collection of submatrices as:
4093: .vb
4094: A B C
4095: D E F
4096: G H I
4097: .ve
4099: Where the submatrices A,B,C are owned by proc0, D,E,F are
4100: owned by proc1, G,H,I are owned by proc2.
4102: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4103: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4104: The 'M','N' parameters are 8,8, and have the same values on all procs.
4106: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4107: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4108: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4109: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4110: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4111: matrix, ans [DF] as another SeqAIJ matrix.
4113: When d_nz, o_nz parameters are specified, d_nz storage elements are
4114: allocated for every row of the local diagonal submatrix, and o_nz
4115: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4116: One way to choose d_nz and o_nz is to use the max nonzerors per local
4117: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4118: In this case, the values of d_nz,o_nz are:
4119: .vb
4120: proc0 : dnz = 2, o_nz = 2
4121: proc1 : dnz = 3, o_nz = 2
4122: proc2 : dnz = 1, o_nz = 4
4123: .ve
4124: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4125: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4126: for proc3. i.e we are using 12+15+10=37 storage locations to store
4127: 34 values.
4129: When d_nnz, o_nnz parameters are specified, the storage is specified
4130: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4131: In the above case the values for d_nnz,o_nnz are:
4132: .vb
4133: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4134: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4135: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4136: .ve
4137: Here the space allocated is sum of all the above values i.e 34, and
4138: hence pre-allocation is perfect.
4140: Level: intermediate
4142: .keywords: matrix, aij, compressed row, sparse, parallel
4144: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4145: MPIAIJ, MatCreateMPIAIJWithArrays()
4146: @*/
4147: PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4148: {
4150: PetscMPIInt size;
4153: MatCreate(comm,A);
4154: MatSetSizes(*A,m,n,M,N);
4155: MPI_Comm_size(comm,&size);
4156: if (size > 1) {
4157: MatSetType(*A,MATMPIAIJ);
4158: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4159: } else {
4160: MatSetType(*A,MATSEQAIJ);
4161: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4162: }
4163: return(0);
4164: }
4168: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4169: {
4170: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4173: *Ad = a->A;
4174: *Ao = a->B;
4175: *colmap = a->garray;
4176: return(0);
4177: }
4181: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4182: {
4184: PetscInt i;
4185: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4188: if (coloring->ctype == IS_COLORING_GLOBAL) {
4189: ISColoringValue *allcolors,*colors;
4190: ISColoring ocoloring;
4192: /* set coloring for diagonal portion */
4193: MatSetColoring_SeqAIJ(a->A,coloring);
4195: /* set coloring for off-diagonal portion */
4196: ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
4197: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4198: for (i=0; i<a->B->cmap->n; i++) {
4199: colors[i] = allcolors[a->garray[i]];
4200: }
4201: PetscFree(allcolors);
4202: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4203: MatSetColoring_SeqAIJ(a->B,ocoloring);
4204: ISColoringDestroy(&ocoloring);
4205: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4206: ISColoringValue *colors;
4207: PetscInt *larray;
4208: ISColoring ocoloring;
4210: /* set coloring for diagonal portion */
4211: PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);
4212: for (i=0; i<a->A->cmap->n; i++) {
4213: larray[i] = i + A->cmap->rstart;
4214: }
4215: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);
4216: PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);
4217: for (i=0; i<a->A->cmap->n; i++) {
4218: colors[i] = coloring->colors[larray[i]];
4219: }
4220: PetscFree(larray);
4221: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4222: MatSetColoring_SeqAIJ(a->A,ocoloring);
4223: ISColoringDestroy(&ocoloring);
4225: /* set coloring for off-diagonal portion */
4226: PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);
4227: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);
4228: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4229: for (i=0; i<a->B->cmap->n; i++) {
4230: colors[i] = coloring->colors[larray[i]];
4231: }
4232: PetscFree(larray);
4233: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4234: MatSetColoring_SeqAIJ(a->B,ocoloring);
4235: ISColoringDestroy(&ocoloring);
4236: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4238: return(0);
4239: }
4241: #if defined(PETSC_HAVE_ADIC)
4244: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4245: {
4246: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4250: MatSetValuesAdic_SeqAIJ(a->A,advalues);
4251: MatSetValuesAdic_SeqAIJ(a->B,advalues);
4252: return(0);
4253: }
4254: #endif
4258: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4259: {
4260: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4264: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4265: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4266: return(0);
4267: }
4271: PetscErrorCode MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4272: {
4274: PetscInt m,N,i,rstart,nnz,*dnz,*onz;
4275: PetscInt *indx;
4278: /* This routine will ONLY return MPIAIJ type matrix */
4279: MatGetSize(inmat,&m,&N);
4280: if (n == PETSC_DECIDE){
4281: PetscSplitOwnership(comm,&n,&N);
4282: }
4283: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4284: rstart -= m;
4286: MatPreallocateInitialize(comm,m,n,dnz,onz);
4287: for (i=0;i<m;i++) {
4288: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4289: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4290: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4291: }
4292:
4293: MatCreate(comm,outmat);
4294: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4295: MatSetType(*outmat,MATMPIAIJ);
4296: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4297: MatPreallocateFinalize(dnz,onz);
4298: return(0);
4299: }
4303: PetscErrorCode MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4304: {
4306: PetscInt m,N,i,rstart,nnz,Ii;
4307: PetscInt *indx;
4308: PetscScalar *values;
4311: MatGetSize(inmat,&m,&N);
4312: MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);
4313: for (i=0;i<m;i++) {
4314: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4315: Ii = i + rstart;
4316: MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4317: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4318: }
4319: MatDestroy(&inmat);
4320: MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);
4321: MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);
4322: return(0);
4323: }
4327: /*@
4328: MatMerge - Creates a single large PETSc matrix by concatinating sequential
4329: matrices from each processor
4331: Collective on MPI_Comm
4333: Input Parameters:
4334: + comm - the communicators the parallel matrix will live on
4335: . inmat - the input sequential matrices
4336: . n - number of local columns (or PETSC_DECIDE)
4337: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4339: Output Parameter:
4340: . outmat - the parallel matrix generated
4342: Level: advanced
4344: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4346: @*/
4347: PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4348: {
4352: PetscLogEventBegin(MAT_Merge,inmat,0,0,0);
4353: if (scall == MAT_INITIAL_MATRIX){
4354: MatMergeSymbolic(comm,inmat,n,outmat);
4355: }
4356: MatMergeNumeric(comm,inmat,n,*outmat);
4357: PetscLogEventEnd(MAT_Merge,inmat,0,0,0);
4358: return(0);
4359: }
4363: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4364: {
4365: PetscErrorCode ierr;
4366: PetscMPIInt rank;
4367: PetscInt m,N,i,rstart,nnz;
4368: size_t len;
4369: const PetscInt *indx;
4370: PetscViewer out;
4371: char *name;
4372: Mat B;
4373: const PetscScalar *values;
4376: MatGetLocalSize(A,&m,0);
4377: MatGetSize(A,0,&N);
4378: /* Should this be the type of the diagonal block of A? */
4379: MatCreate(PETSC_COMM_SELF,&B);
4380: MatSetSizes(B,m,N,m,N);
4381: MatSetType(B,MATSEQAIJ);
4382: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
4383: MatGetOwnershipRange(A,&rstart,0);
4384: for (i=0;i<m;i++) {
4385: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4386: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4387: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4388: }
4389: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4390: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4392: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
4393: PetscStrlen(outfile,&len);
4394: PetscMalloc((len+5)*sizeof(char),&name);
4395: sprintf(name,"%s.%d",outfile,rank);
4396: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4397: PetscFree(name);
4398: MatView(B,out);
4399: PetscViewerDestroy(&out);
4400: MatDestroy(&B);
4401: return(0);
4402: }
4404: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4407: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4408: {
4409: PetscErrorCode ierr;
4410: Mat_Merge_SeqsToMPI *merge;
4411: PetscContainer container;
4414: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
4415: if (container) {
4416: PetscContainerGetPointer(container,(void **)&merge);
4417: PetscFree(merge->id_r);
4418: PetscFree(merge->len_s);
4419: PetscFree(merge->len_r);
4420: PetscFree(merge->bi);
4421: PetscFree(merge->bj);
4422: PetscFree(merge->buf_ri[0]);
4423: PetscFree(merge->buf_ri);
4424: PetscFree(merge->buf_rj[0]);
4425: PetscFree(merge->buf_rj);
4426: PetscFree(merge->coi);
4427: PetscFree(merge->coj);
4428: PetscFree(merge->owners_co);
4429: PetscLayoutDestroy(&merge->rowmap);
4430: PetscFree(merge);
4431: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4432: }
4433: MatDestroy_MPIAIJ(A);
4434: return(0);
4435: }
4437: #include <../src/mat/utils/freespace.h>
4438: #include <petscbt.h>
4442: /*@C
4443: MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4444: matrices from each processor
4446: Collective on MPI_Comm
4448: Input Parameters:
4449: + comm - the communicators the parallel matrix will live on
4450: . seqmat - the input sequential matrices
4451: . m - number of local rows (or PETSC_DECIDE)
4452: . n - number of local columns (or PETSC_DECIDE)
4453: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4455: Output Parameter:
4456: . mpimat - the parallel matrix generated
4458: Level: advanced
4460: Notes:
4461: The dimensions of the sequential matrix in each processor MUST be the same.
4462: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4463: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4464: @*/
4465: PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4466: {
4467: PetscErrorCode ierr;
4468: MPI_Comm comm=((PetscObject)mpimat)->comm;
4469: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4470: PetscMPIInt size,rank,taga,*len_s;
4471: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4472: PetscInt proc,m;
4473: PetscInt **buf_ri,**buf_rj;
4474: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4475: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4476: MPI_Request *s_waits,*r_waits;
4477: MPI_Status *status;
4478: MatScalar *aa=a->a;
4479: MatScalar **abuf_r,*ba_i;
4480: Mat_Merge_SeqsToMPI *merge;
4481: PetscContainer container;
4484: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4486: MPI_Comm_size(comm,&size);
4487: MPI_Comm_rank(comm,&rank);
4489: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
4490: PetscContainerGetPointer(container,(void **)&merge);
4492: bi = merge->bi;
4493: bj = merge->bj;
4494: buf_ri = merge->buf_ri;
4495: buf_rj = merge->buf_rj;
4497: PetscMalloc(size*sizeof(MPI_Status),&status);
4498: owners = merge->rowmap->range;
4499: len_s = merge->len_s;
4501: /* send and recv matrix values */
4502: /*-----------------------------*/
4503: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4504: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4506: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
4507: for (proc=0,k=0; proc<size; proc++){
4508: if (!len_s[proc]) continue;
4509: i = owners[proc];
4510: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4511: k++;
4512: }
4514: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4515: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4516: PetscFree(status);
4518: PetscFree(s_waits);
4519: PetscFree(r_waits);
4521: /* insert mat values of mpimat */
4522: /*----------------------------*/
4523: PetscMalloc(N*sizeof(PetscScalar),&ba_i);
4524: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4526: for (k=0; k<merge->nrecv; k++){
4527: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4528: nrows = *(buf_ri_k[k]);
4529: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4530: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4531: }
4533: /* set values of ba */
4534: m = merge->rowmap->n;
4535: for (i=0; i<m; i++) {
4536: arow = owners[rank] + i;
4537: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4538: bnzi = bi[i+1] - bi[i];
4539: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4541: /* add local non-zero vals of this proc's seqmat into ba */
4542: anzi = ai[arow+1] - ai[arow];
4543: aj = a->j + ai[arow];
4544: aa = a->a + ai[arow];
4545: nextaj = 0;
4546: for (j=0; nextaj<anzi; j++){
4547: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4548: ba_i[j] += aa[nextaj++];
4549: }
4550: }
4552: /* add received vals into ba */
4553: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4554: /* i-th row */
4555: if (i == *nextrow[k]) {
4556: anzi = *(nextai[k]+1) - *nextai[k];
4557: aj = buf_rj[k] + *(nextai[k]);
4558: aa = abuf_r[k] + *(nextai[k]);
4559: nextaj = 0;
4560: for (j=0; nextaj<anzi; j++){
4561: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4562: ba_i[j] += aa[nextaj++];
4563: }
4564: }
4565: nextrow[k]++; nextai[k]++;
4566: }
4567: }
4568: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4569: }
4570: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4571: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4573: PetscFree(abuf_r[0]);
4574: PetscFree(abuf_r);
4575: PetscFree(ba_i);
4576: PetscFree3(buf_ri_k,nextrow,nextai);
4577: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4578: return(0);
4579: }
4581: extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4585: PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4586: {
4587: PetscErrorCode ierr;
4588: Mat B_mpi;
4589: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4590: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4591: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4592: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4593: PetscInt len,proc,*dnz,*onz;
4594: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4595: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4596: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4597: MPI_Status *status;
4598: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
4599: PetscBT lnkbt;
4600: Mat_Merge_SeqsToMPI *merge;
4601: PetscContainer container;
4604: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4606: /* make sure it is a PETSc comm */
4607: PetscCommDuplicate(comm,&comm,PETSC_NULL);
4608: MPI_Comm_size(comm,&size);
4609: MPI_Comm_rank(comm,&rank);
4611: PetscNew(Mat_Merge_SeqsToMPI,&merge);
4612: PetscMalloc(size*sizeof(MPI_Status),&status);
4614: /* determine row ownership */
4615: /*---------------------------------------------------------*/
4616: PetscLayoutCreate(comm,&merge->rowmap);
4617: PetscLayoutSetLocalSize(merge->rowmap,m);
4618: PetscLayoutSetSize(merge->rowmap,M);
4619: PetscLayoutSetBlockSize(merge->rowmap,1);
4620: PetscLayoutSetUp(merge->rowmap);
4621: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
4622: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
4624: m = merge->rowmap->n;
4625: M = merge->rowmap->N;
4626: owners = merge->rowmap->range;
4628: /* determine the number of messages to send, their lengths */
4629: /*---------------------------------------------------------*/
4630: len_s = merge->len_s;
4632: len = 0; /* length of buf_si[] */
4633: merge->nsend = 0;
4634: for (proc=0; proc<size; proc++){
4635: len_si[proc] = 0;
4636: if (proc == rank){
4637: len_s[proc] = 0;
4638: } else {
4639: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4640: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4641: }
4642: if (len_s[proc]) {
4643: merge->nsend++;
4644: nrows = 0;
4645: for (i=owners[proc]; i<owners[proc+1]; i++){
4646: if (ai[i+1] > ai[i]) nrows++;
4647: }
4648: len_si[proc] = 2*(nrows+1);
4649: len += len_si[proc];
4650: }
4651: }
4653: /* determine the number and length of messages to receive for ij-structure */
4654: /*-------------------------------------------------------------------------*/
4655: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
4656: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4658: /* post the Irecv of j-structure */
4659: /*-------------------------------*/
4660: PetscCommGetNewTag(comm,&tagj);
4661: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4663: /* post the Isend of j-structure */
4664: /*--------------------------------*/
4665: PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);
4667: for (proc=0, k=0; proc<size; proc++){
4668: if (!len_s[proc]) continue;
4669: i = owners[proc];
4670: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4671: k++;
4672: }
4674: /* receives and sends of j-structure are complete */
4675: /*------------------------------------------------*/
4676: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4677: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4679: /* send and recv i-structure */
4680: /*---------------------------*/
4681: PetscCommGetNewTag(comm,&tagi);
4682: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4684: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
4685: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4686: for (proc=0,k=0; proc<size; proc++){
4687: if (!len_s[proc]) continue;
4688: /* form outgoing message for i-structure:
4689: buf_si[0]: nrows to be sent
4690: [1:nrows]: row index (global)
4691: [nrows+1:2*nrows+1]: i-structure index
4692: */
4693: /*-------------------------------------------*/
4694: nrows = len_si[proc]/2 - 1;
4695: buf_si_i = buf_si + nrows+1;
4696: buf_si[0] = nrows;
4697: buf_si_i[0] = 0;
4698: nrows = 0;
4699: for (i=owners[proc]; i<owners[proc+1]; i++){
4700: anzi = ai[i+1] - ai[i];
4701: if (anzi) {
4702: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4703: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4704: nrows++;
4705: }
4706: }
4707: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4708: k++;
4709: buf_si += len_si[proc];
4710: }
4712: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4713: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4715: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4716: for (i=0; i<merge->nrecv; i++){
4717: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4718: }
4720: PetscFree(len_si);
4721: PetscFree(len_ri);
4722: PetscFree(rj_waits);
4723: PetscFree2(si_waits,sj_waits);
4724: PetscFree(ri_waits);
4725: PetscFree(buf_s);
4726: PetscFree(status);
4728: /* compute a local seq matrix in each processor */
4729: /*----------------------------------------------*/
4730: /* allocate bi array and free space for accumulating nonzero column info */
4731: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
4732: bi[0] = 0;
4734: /* create and initialize a linked list */
4735: nlnk = N+1;
4736: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4738: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4739: len = 0;
4740: len = ai[owners[rank+1]] - ai[owners[rank]];
4741: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4742: current_space = free_space;
4744: /* determine symbolic info for each local row */
4745: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4747: for (k=0; k<merge->nrecv; k++){
4748: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4749: nrows = *buf_ri_k[k];
4750: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4751: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4752: }
4754: MatPreallocateInitialize(comm,m,n,dnz,onz);
4755: len = 0;
4756: for (i=0;i<m;i++) {
4757: bnzi = 0;
4758: /* add local non-zero cols of this proc's seqmat into lnk */
4759: arow = owners[rank] + i;
4760: anzi = ai[arow+1] - ai[arow];
4761: aj = a->j + ai[arow];
4762: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4763: bnzi += nlnk;
4764: /* add received col data into lnk */
4765: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4766: if (i == *nextrow[k]) { /* i-th row */
4767: anzi = *(nextai[k]+1) - *nextai[k];
4768: aj = buf_rj[k] + *nextai[k];
4769: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4770: bnzi += nlnk;
4771: nextrow[k]++; nextai[k]++;
4772: }
4773: }
4774: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4776: /* if free space is not available, make more free space */
4777: if (current_space->local_remaining<bnzi) {
4778: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4779: nspacedouble++;
4780: }
4781: /* copy data into free space, then initialize lnk */
4782: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4783: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4785: current_space->array += bnzi;
4786: current_space->local_used += bnzi;
4787: current_space->local_remaining -= bnzi;
4789: bi[i+1] = bi[i] + bnzi;
4790: }
4792: PetscFree3(buf_ri_k,nextrow,nextai);
4794: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
4795: PetscFreeSpaceContiguous(&free_space,bj);
4796: PetscLLDestroy(lnk,lnkbt);
4798: /* create symbolic parallel matrix B_mpi */
4799: /*---------------------------------------*/
4800: MatCreate(comm,&B_mpi);
4801: if (n==PETSC_DECIDE) {
4802: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4803: } else {
4804: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4805: }
4806: MatSetType(B_mpi,MATMPIAIJ);
4807: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4808: MatPreallocateFinalize(dnz,onz);
4810: /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4811: B_mpi->assembled = PETSC_FALSE;
4812: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4813: merge->bi = bi;
4814: merge->bj = bj;
4815: merge->buf_ri = buf_ri;
4816: merge->buf_rj = buf_rj;
4817: merge->coi = PETSC_NULL;
4818: merge->coj = PETSC_NULL;
4819: merge->owners_co = PETSC_NULL;
4821: PetscCommDestroy(&comm);
4823: /* attach the supporting struct to B_mpi for reuse */
4824: PetscContainerCreate(PETSC_COMM_SELF,&container);
4825: PetscContainerSetPointer(container,merge);
4826: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4827: PetscContainerDestroy(&container);
4828: *mpimat = B_mpi;
4830: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4831: return(0);
4832: }
4836: PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4837: {
4838: PetscErrorCode ierr;
4841: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4842: if (scall == MAT_INITIAL_MATRIX){
4843: MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);
4844: }
4845: MatMerge_SeqsToMPINumeric(seqmat,*mpimat);
4846: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4847: return(0);
4848: }
4852: /*@
4853: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4854: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4855: with MatGetSize()
4857: Not Collective
4859: Input Parameters:
4860: + A - the matrix
4861: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4863: Output Parameter:
4864: . A_loc - the local sequential matrix generated
4866: Level: developer
4868: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4870: @*/
4871: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4872: {
4873: PetscErrorCode ierr;
4874: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4875: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4876: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4877: MatScalar *aa=a->a,*ba=b->a,*cam;
4878: PetscScalar *ca;
4879: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4880: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4881: PetscBool match;
4884: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4885: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4886: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4887: if (scall == MAT_INITIAL_MATRIX){
4888: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
4889: ci[0] = 0;
4890: for (i=0; i<am; i++){
4891: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4892: }
4893: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
4894: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
4895: k = 0;
4896: for (i=0; i<am; i++) {
4897: ncols_o = bi[i+1] - bi[i];
4898: ncols_d = ai[i+1] - ai[i];
4899: /* off-diagonal portion of A */
4900: for (jo=0; jo<ncols_o; jo++) {
4901: col = cmap[*bj];
4902: if (col >= cstart) break;
4903: cj[k] = col; bj++;
4904: ca[k++] = *ba++;
4905: }
4906: /* diagonal portion of A */
4907: for (j=0; j<ncols_d; j++) {
4908: cj[k] = cstart + *aj++;
4909: ca[k++] = *aa++;
4910: }
4911: /* off-diagonal portion of A */
4912: for (j=jo; j<ncols_o; j++) {
4913: cj[k] = cmap[*bj++];
4914: ca[k++] = *ba++;
4915: }
4916: }
4917: /* put together the new matrix */
4918: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4919: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4920: /* Since these are PETSc arrays, change flags to free them as necessary. */
4921: mat = (Mat_SeqAIJ*)(*A_loc)->data;
4922: mat->free_a = PETSC_TRUE;
4923: mat->free_ij = PETSC_TRUE;
4924: mat->nonew = 0;
4925: } else if (scall == MAT_REUSE_MATRIX){
4926: mat=(Mat_SeqAIJ*)(*A_loc)->data;
4927: ci = mat->i; cj = mat->j; cam = mat->a;
4928: for (i=0; i<am; i++) {
4929: /* off-diagonal portion of A */
4930: ncols_o = bi[i+1] - bi[i];
4931: for (jo=0; jo<ncols_o; jo++) {
4932: col = cmap[*bj];
4933: if (col >= cstart) break;
4934: *cam++ = *ba++; bj++;
4935: }
4936: /* diagonal portion of A */
4937: ncols_d = ai[i+1] - ai[i];
4938: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4939: /* off-diagonal portion of A */
4940: for (j=jo; j<ncols_o; j++) {
4941: *cam++ = *ba++; bj++;
4942: }
4943: }
4944: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4945: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
4946: return(0);
4947: }
4951: /*@C
4952: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4954: Not Collective
4956: Input Parameters:
4957: + A - the matrix
4958: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4959: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
4961: Output Parameter:
4962: . A_loc - the local sequential matrix generated
4964: Level: developer
4966: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4968: @*/
4969: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4970: {
4971: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4972: PetscErrorCode ierr;
4973: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4974: IS isrowa,iscola;
4975: Mat *aloc;
4976: PetscBool match;
4979: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4980: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4981: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
4982: if (!row){
4983: start = A->rmap->rstart; end = A->rmap->rend;
4984: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
4985: } else {
4986: isrowa = *row;
4987: }
4988: if (!col){
4989: start = A->cmap->rstart;
4990: cmap = a->garray;
4991: nzA = a->A->cmap->n;
4992: nzB = a->B->cmap->n;
4993: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
4994: ncols = 0;
4995: for (i=0; i<nzB; i++) {
4996: if (cmap[i] < start