Actual source code: mpiaij.c
petsc-3.3-p2 2012-07-13
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petscblaslapack.h>
5: /*MC
6: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9: and MATMPIAIJ otherwise. As a result, for single process communicators,
10: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11: for communicators controlling multiple processes. It is recommended that you call both of
12: the above preallocation routines for simplicity.
14: Options Database Keys:
15: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17: Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18: enough exist.
20: Level: beginner
22: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23: M*/
25: /*MC
26: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
30: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31: for communicators controlling multiple processes. It is recommended that you call both of
32: the above preallocation routines for simplicity.
34: Options Database Keys:
35: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37: Level: beginner
39: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40: M*/
44: PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45: {
46: PetscErrorCode ierr;
47: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
48: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
49: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
50: const PetscInt *ia,*ib;
51: const MatScalar *aa,*bb;
52: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
53: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
56: *keptrows = 0;
57: ia = a->i;
58: ib = b->i;
59: for (i=0; i<m; i++) {
60: na = ia[i+1] - ia[i];
61: nb = ib[i+1] - ib[i];
62: if (!na && !nb) {
63: cnt++;
64: goto ok1;
65: }
66: aa = a->a + ia[i];
67: for (j=0; j<na; j++) {
68: if (aa[j] != 0.0) goto ok1;
69: }
70: bb = b->a + ib[i];
71: for (j=0; j <nb; j++) {
72: if (bb[j] != 0.0) goto ok1;
73: }
74: cnt++;
75: ok1:;
76: }
77: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);
78: if (!n0rows) return(0);
79: PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);
80: cnt = 0;
81: for (i=0; i<m; i++) {
82: na = ia[i+1] - ia[i];
83: nb = ib[i+1] - ib[i];
84: if (!na && !nb) continue;
85: aa = a->a + ia[i];
86: for(j=0; j<na;j++) {
87: if (aa[j] != 0.0) {
88: rows[cnt++] = rstart + i;
89: goto ok2;
90: }
91: }
92: bb = b->a + ib[i];
93: for (j=0; j<nb; j++) {
94: if (bb[j] != 0.0) {
95: rows[cnt++] = rstart + i;
96: goto ok2;
97: }
98: }
99: ok2:;
100: }
101: ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);
102: return(0);
103: }
107: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108: {
110: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
111: PetscInt i,n,*garray = aij->garray;
112: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
113: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
114: PetscReal *work;
117: MatGetSize(A,PETSC_NULL,&n);
118: PetscMalloc(n*sizeof(PetscReal),&work);
119: PetscMemzero(work,n*sizeof(PetscReal));
120: if (type == NORM_2) {
121: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123: }
124: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126: }
127: } else if (type == NORM_1) {
128: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130: }
131: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133: }
134: } else if (type == NORM_INFINITY) {
135: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137: }
138: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140: }
142: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143: if (type == NORM_INFINITY) {
144: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
145: } else {
146: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
147: }
148: PetscFree(work);
149: if (type == NORM_2) {
150: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151: }
152: return(0);
153: }
157: /*
158: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
161: Only for square matrices
162: */
163: PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164: {
165: PetscMPIInt rank,size;
166: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
168: Mat mat;
169: Mat_SeqAIJ *gmata;
170: PetscMPIInt tag;
171: MPI_Status status;
172: PetscBool aij;
173: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
176: CHKMEMQ;
177: MPI_Comm_rank(comm,&rank);
178: MPI_Comm_size(comm,&size);
179: if (!rank) {
180: PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
181: if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182: }
183: if (reuse == MAT_INITIAL_MATRIX) {
184: MatCreate(comm,&mat);
185: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
186: MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);
187: MatSetType(mat,MATAIJ);
188: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
189: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
190: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
191: rowners[0] = 0;
192: for (i=2; i<=size; i++) {
193: rowners[i] += rowners[i-1];
194: }
195: rstart = rowners[rank];
196: rend = rowners[rank+1];
197: PetscObjectGetNewTag((PetscObject)mat,&tag);
198: if (!rank) {
199: gmata = (Mat_SeqAIJ*) gmat->data;
200: /* send row lengths to all processors */
201: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202: for (i=1; i<size; i++) {
203: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
204: }
205: /* determine number diagonal and off-diagonal counts */
206: PetscMemzero(olens,m*sizeof(PetscInt));
207: PetscMalloc(m*sizeof(PetscInt),&ld);
208: PetscMemzero(ld,m*sizeof(PetscInt));
209: jj = 0;
210: for (i=0; i<m; i++) {
211: for (j=0; j<dlens[i]; j++) {
212: if (gmata->j[jj] < rstart) ld[i]++;
213: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214: jj++;
215: }
216: }
217: /* send column indices to other processes */
218: for (i=1; i<size; i++) {
219: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
221: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
222: }
224: /* send numerical values to other processes */
225: for (i=1; i<size; i++) {
226: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
228: }
229: gmataa = gmata->a;
230: gmataj = gmata->j;
232: } else {
233: /* receive row lengths */
234: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
235: /* receive column indices */
236: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
237: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
238: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
239: /* determine number diagonal and off-diagonal counts */
240: PetscMemzero(olens,m*sizeof(PetscInt));
241: PetscMalloc(m*sizeof(PetscInt),&ld);
242: PetscMemzero(ld,m*sizeof(PetscInt));
243: jj = 0;
244: for (i=0; i<m; i++) {
245: for (j=0; j<dlens[i]; j++) {
246: if (gmataj[jj] < rstart) ld[i]++;
247: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248: jj++;
249: }
250: }
251: /* receive numerical values */
252: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
253: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
254: }
255: /* set preallocation */
256: for (i=0; i<m; i++) {
257: dlens[i] -= olens[i];
258: }
259: MatSeqAIJSetPreallocation(mat,0,dlens);
260: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
261:
262: for (i=0; i<m; i++) {
263: dlens[i] += olens[i];
264: }
265: cnt = 0;
266: for (i=0; i<m; i++) {
267: row = rstart + i;
268: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
269: cnt += dlens[i];
270: }
271: if (rank) {
272: PetscFree2(gmataa,gmataj);
273: }
274: PetscFree2(dlens,olens);
275: PetscFree(rowners);
276: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277: *inmat = mat;
278: } else { /* column indices are already set; only need to move over numerical values from process 0 */
279: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281: mat = *inmat;
282: PetscObjectGetNewTag((PetscObject)mat,&tag);
283: if (!rank) {
284: /* send numerical values to other processes */
285: gmata = (Mat_SeqAIJ*) gmat->data;
286: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
287: gmataa = gmata->a;
288: for (i=1; i<size; i++) {
289: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
291: }
292: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293: } else {
294: /* receive numerical values from process 0*/
295: nz = Ad->nz + Ao->nz;
296: PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
297: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
298: }
299: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301: ad = Ad->a;
302: ao = Ao->a;
303: if (mat->rmap->n) {
304: i = 0;
305: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
306: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
307: }
308: for (i=1; i<mat->rmap->n; i++) {
309: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
310: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
311: }
312: i--;
313: if (mat->rmap->n) {
314: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
315: }
316: if (rank) {
317: PetscFree(gmataarestore);
318: }
319: }
320: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
321: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
322: CHKMEMQ;
323: return(0);
324: }
326: /*
327: Local utility routine that creates a mapping from the global column
328: number to the local number in the off-diagonal part of the local
329: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
330: a slightly higher hash table cost; without it it is not scalable (each processor
331: has an order N integer array but is fast to acess.
332: */
335: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336: {
337: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
339: PetscInt n = aij->B->cmap->n,i;
342: if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343: #if defined (PETSC_USE_CTABLE)
344: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
345: for (i=0; i<n; i++){
346: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
347: }
348: #else
349: PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);
350: PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));
351: PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));
352: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353: #endif
354: return(0);
355: }
357: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358: { \
359: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360: lastcol1 = col;\
361: while (high1-low1 > 5) { \
362: t = (low1+high1)/2; \
363: if (rp1[t] > col) high1 = t; \
364: else low1 = t; \
365: } \
366: for (_i=low1; _i<high1; _i++) { \
367: if (rp1[_i] > col) break; \
368: if (rp1[_i] == col) { \
369: if (addv == ADD_VALUES) ap1[_i] += value; \
370: else ap1[_i] = value; \
371: goto a_noinsert; \
372: } \
373: } \
374: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
376: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378: N = nrow1++ - 1; a->nz++; high1++; \
379: /* shift up all the later entries in this row */ \
380: for (ii=N; ii>=_i; ii--) { \
381: rp1[ii+1] = rp1[ii]; \
382: ap1[ii+1] = ap1[ii]; \
383: } \
384: rp1[_i] = col; \
385: ap1[_i] = value; \
386: a_noinsert: ; \
387: ailen[row] = nrow1; \
388: }
391: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392: { \
393: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394: lastcol2 = col;\
395: while (high2-low2 > 5) { \
396: t = (low2+high2)/2; \
397: if (rp2[t] > col) high2 = t; \
398: else low2 = t; \
399: } \
400: for (_i=low2; _i<high2; _i++) { \
401: if (rp2[_i] > col) break; \
402: if (rp2[_i] == col) { \
403: if (addv == ADD_VALUES) ap2[_i] += value; \
404: else ap2[_i] = value; \
405: goto b_noinsert; \
406: } \
407: } \
408: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
410: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412: N = nrow2++ - 1; b->nz++; high2++; \
413: /* shift up all the later entries in this row */ \
414: for (ii=N; ii>=_i; ii--) { \
415: rp2[ii+1] = rp2[ii]; \
416: ap2[ii+1] = ap2[ii]; \
417: } \
418: rp2[_i] = col; \
419: ap2[_i] = value; \
420: b_noinsert: ; \
421: bilen[row] = nrow2; \
422: }
426: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427: {
428: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
429: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
431: PetscInt l,*garray = mat->garray,diag;
434: /* code only works for square matrices A */
436: /* find size of row to the left of the diagonal part */
437: MatGetOwnershipRange(A,&diag,0);
438: row = row - diag;
439: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440: if (garray[b->j[b->i[row]+l]] > diag) break;
441: }
442: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
444: /* diagonal part */
445: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
447: /* right of diagonal part */
448: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
449: return(0);
450: }
454: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455: {
456: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
457: PetscScalar value;
459: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461: PetscBool roworiented = aij->roworiented;
463: /* Some Variables required in the macro */
464: Mat A = aij->A;
465: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
466: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467: MatScalar *aa = a->a;
468: PetscBool ignorezeroentries = a->ignorezeroentries;
469: Mat B = aij->B;
470: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
471: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472: MatScalar *ba = b->a;
474: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475: PetscInt nonew;
476: MatScalar *ap1,*ap2;
480: for (i=0; i<m; i++) {
481: if (im[i] < 0) continue;
482: #if defined(PETSC_USE_DEBUG)
483: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484: #endif
485: if (im[i] >= rstart && im[i] < rend) {
486: row = im[i] - rstart;
487: lastcol1 = -1;
488: rp1 = aj + ai[row];
489: ap1 = aa + ai[row];
490: rmax1 = aimax[row];
491: nrow1 = ailen[row];
492: low1 = 0;
493: high1 = nrow1;
494: lastcol2 = -1;
495: rp2 = bj + bi[row];
496: ap2 = ba + bi[row];
497: rmax2 = bimax[row];
498: nrow2 = bilen[row];
499: low2 = 0;
500: high2 = nrow2;
502: for (j=0; j<n; j++) {
503: if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505: if (in[j] >= cstart && in[j] < cend){
506: col = in[j] - cstart;
507: nonew = a->nonew;
508: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509: } else if (in[j] < 0) continue;
510: #if defined(PETSC_USE_DEBUG)
511: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512: #endif
513: else {
514: if (mat->was_assembled) {
515: if (!aij->colmap) {
516: MatCreateColmap_MPIAIJ_Private(mat);
517: }
518: #if defined (PETSC_USE_CTABLE)
519: PetscTableFind(aij->colmap,in[j]+1,&col);
520: col--;
521: #else
522: col = aij->colmap[in[j]] - 1;
523: #endif
524: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525: MatDisAssemble_MPIAIJ(mat);
526: col = in[j];
527: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528: B = aij->B;
529: b = (Mat_SeqAIJ*)B->data;
530: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531: rp2 = bj + bi[row];
532: ap2 = ba + bi[row];
533: rmax2 = bimax[row];
534: nrow2 = bilen[row];
535: low2 = 0;
536: high2 = nrow2;
537: bm = aij->B->rmap->n;
538: ba = b->a;
539: }
540: } else col = in[j];
541: nonew = b->nonew;
542: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543: }
544: }
545: } else {
546: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547: if (!aij->donotstash) {
548: mat->assembled = PETSC_FALSE;
549: if (roworiented) {
550: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
551: } else {
552: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
553: }
554: }
555: }
556: }
557: return(0);
558: }
562: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563: {
564: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
566: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
570: for (i=0; i<m; i++) {
571: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573: if (idxm[i] >= rstart && idxm[i] < rend) {
574: row = idxm[i] - rstart;
575: for (j=0; j<n; j++) {
576: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578: if (idxn[j] >= cstart && idxn[j] < cend){
579: col = idxn[j] - cstart;
580: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
581: } else {
582: if (!aij->colmap) {
583: MatCreateColmap_MPIAIJ_Private(mat);
584: }
585: #if defined (PETSC_USE_CTABLE)
586: PetscTableFind(aij->colmap,idxn[j]+1,&col);
587: col --;
588: #else
589: col = aij->colmap[idxn[j]] - 1;
590: #endif
591: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592: else {
593: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
594: }
595: }
596: }
597: } else {
598: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599: }
600: }
601: return(0);
602: }
604: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
608: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609: {
610: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
612: PetscInt nstash,reallocs;
613: InsertMode addv;
616: if (aij->donotstash || mat->nooffprocentries) {
617: return(0);
618: }
620: /* make sure all processors are either in INSERTMODE or ADDMODE */
621: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
622: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623: mat->insertmode = addv; /* in case this processor had no cache */
625: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
626: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
627: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
628: return(0);
629: }
633: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634: {
635: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
636: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data;
638: PetscMPIInt n;
639: PetscInt i,j,rstart,ncols,flg;
640: PetscInt *row,*col;
641: PetscBool other_disassembled;
642: PetscScalar *val;
643: InsertMode addv = mat->insertmode;
645: /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
647: if (!aij->donotstash && !mat->nooffprocentries) {
648: while (1) {
649: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
650: if (!flg) break;
652: for (i=0; i<n;) {
653: /* Now identify the consecutive vals belonging to the same row */
654: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655: if (j < n) ncols = j-i;
656: else ncols = n-i;
657: /* Now assemble all these values with a single function call */
658: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
659: i = j;
660: }
661: }
662: MatStashScatterEnd_Private(&mat->stash);
663: }
664: MatAssemblyBegin(aij->A,mode);
665: MatAssemblyEnd(aij->A,mode);
667: /* determine if any processor has disassembled, if so we must
668: also disassemble ourselfs, in order that we may reassemble. */
669: /*
670: if nonzero structure of submatrix B cannot change then we know that
671: no processor disassembled thus we can skip this stuff
672: */
673: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
674: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
675: if (mat->was_assembled && !other_disassembled) {
676: MatDisAssemble_MPIAIJ(mat);
677: }
678: }
679: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680: MatSetUpMultiply_MPIAIJ(mat);
681: }
682: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
683: MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);
684: MatAssemblyBegin(aij->B,mode);
685: MatAssemblyEnd(aij->B,mode);
687: PetscFree2(aij->rowvalues,aij->rowindices);
688: aij->rowvalues = 0;
690: /* used by MatAXPY() */
691: a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
692: a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
694: VecDestroy(&aij->diag);
695: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696: return(0);
697: }
701: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702: {
703: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
707: MatZeroEntries(l->A);
708: MatZeroEntries(l->B);
709: return(0);
710: }
714: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715: {
716: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
717: PetscErrorCode ierr;
718: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719: PetscInt i,*owners = A->rmap->range;
720: PetscInt *nprocs,j,idx,nsends,row;
721: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
722: PetscInt *rvalues,count,base,slen,*source;
723: PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart;
724: MPI_Comm comm = ((PetscObject)A)->comm;
725: MPI_Request *send_waits,*recv_waits;
726: MPI_Status recv_status,*send_status;
727: const PetscScalar *xx;
728: PetscScalar *bb;
729: #if defined(PETSC_DEBUG)
730: PetscBool found = PETSC_FALSE;
731: #endif
734: /* first count number of contributors to each processor */
735: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
736: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
737: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
738: j = 0;
739: for (i=0; i<N; i++) {
740: if (lastidx > (idx = rows[i])) j = 0;
741: lastidx = idx;
742: for (; j<size; j++) {
743: if (idx >= owners[j] && idx < owners[j+1]) {
744: nprocs[2*j]++;
745: nprocs[2*j+1] = 1;
746: owner[i] = j;
747: #if defined(PETSC_DEBUG)
748: found = PETSC_TRUE;
749: #endif
750: break;
751: }
752: }
753: #if defined(PETSC_DEBUG)
754: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755: found = PETSC_FALSE;
756: #endif
757: }
758: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
760: if (A->nooffproczerorows) {
761: if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762: nrecvs = nsends;
763: nmax = N;
764: } else {
765: /* inform other processors of number of messages and max length*/
766: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
767: }
769: /* post receives: */
770: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
771: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
772: for (i=0; i<nrecvs; i++) {
773: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
774: }
776: /* do sends:
777: 1) starts[i] gives the starting index in svalues for stuff going to
778: the ith processor
779: */
780: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
781: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
782: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
783: starts[0] = 0;
784: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785: for (i=0; i<N; i++) {
786: svalues[starts[owner[i]]++] = rows[i];
787: }
789: starts[0] = 0;
790: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791: count = 0;
792: for (i=0; i<size; i++) {
793: if (nprocs[2*i+1]) {
794: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
795: }
796: }
797: PetscFree(starts);
799: base = owners[rank];
801: /* wait on receives */
802: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
803: count = nrecvs; slen = 0;
804: while (count) {
805: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
806: /* unpack receives into our local space */
807: MPI_Get_count(&recv_status,MPIU_INT,&n);
808: source[imdex] = recv_status.MPI_SOURCE;
809: lens[imdex] = n;
810: slen += n;
811: count--;
812: }
813: PetscFree(recv_waits);
814:
815: /* move the data into the send scatter */
816: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
817: count = 0;
818: for (i=0; i<nrecvs; i++) {
819: values = rvalues + i*nmax;
820: for (j=0; j<lens[i]; j++) {
821: lrows[count++] = values[j] - base;
822: }
823: }
824: PetscFree(rvalues);
825: PetscFree2(lens,source);
826: PetscFree(owner);
827: PetscFree(nprocs);
828:
829: /* fix right hand side if needed */
830: if (x && b) {
831: VecGetArrayRead(x,&xx);
832: VecGetArray(b,&bb);
833: for (i=0; i<slen; i++) {
834: bb[lrows[i]] = diag*xx[lrows[i]];
835: }
836: VecRestoreArrayRead(x,&xx);
837: VecRestoreArray(b,&bb);
838: }
839: /*
840: Zero the required rows. If the "diagonal block" of the matrix
841: is square and the user wishes to set the diagonal we use separate
842: code so that MatSetValues() is not called for each diagonal allocating
843: new memory, thus calling lots of mallocs and slowing things down.
845: */
846: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847: MatZeroRows(l->B,slen,lrows,0.0,0,0);
848: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849: MatZeroRows(l->A,slen,lrows,diag,0,0);
850: } else if (diag != 0.0) {
851: MatZeroRows(l->A,slen,lrows,0.0,0,0);
852: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855: }
856: for (i = 0; i < slen; i++) {
857: row = lrows[i] + rstart;
858: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
859: }
860: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
861: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
862: } else {
863: MatZeroRows(l->A,slen,lrows,0.0,0,0);
864: }
865: PetscFree(lrows);
867: /* wait on sends */
868: if (nsends) {
869: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
870: MPI_Waitall(nsends,send_waits,send_status);
871: PetscFree(send_status);
872: }
873: PetscFree(send_waits);
874: PetscFree(svalues);
875: return(0);
876: }
880: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881: {
882: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
883: PetscErrorCode ierr;
884: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885: PetscInt i,*owners = A->rmap->range;
886: PetscInt *nprocs,j,idx,nsends;
887: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
888: PetscInt *rvalues,count,base,slen,*source;
889: PetscInt *lens,*lrows,*values,m;
890: MPI_Comm comm = ((PetscObject)A)->comm;
891: MPI_Request *send_waits,*recv_waits;
892: MPI_Status recv_status,*send_status;
893: const PetscScalar *xx;
894: PetscScalar *bb,*mask;
895: Vec xmask,lmask;
896: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
897: const PetscInt *aj, *ii,*ridx;
898: PetscScalar *aa;
899: #if defined(PETSC_DEBUG)
900: PetscBool found = PETSC_FALSE;
901: #endif
904: /* first count number of contributors to each processor */
905: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
906: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
907: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
908: j = 0;
909: for (i=0; i<N; i++) {
910: if (lastidx > (idx = rows[i])) j = 0;
911: lastidx = idx;
912: for (; j<size; j++) {
913: if (idx >= owners[j] && idx < owners[j+1]) {
914: nprocs[2*j]++;
915: nprocs[2*j+1] = 1;
916: owner[i] = j;
917: #if defined(PETSC_DEBUG)
918: found = PETSC_TRUE;
919: #endif
920: break;
921: }
922: }
923: #if defined(PETSC_DEBUG)
924: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925: found = PETSC_FALSE;
926: #endif
927: }
928: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
930: /* inform other processors of number of messages and max length*/
931: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
933: /* post receives: */
934: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
935: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
936: for (i=0; i<nrecvs; i++) {
937: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
938: }
940: /* do sends:
941: 1) starts[i] gives the starting index in svalues for stuff going to
942: the ith processor
943: */
944: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
945: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
946: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
947: starts[0] = 0;
948: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949: for (i=0; i<N; i++) {
950: svalues[starts[owner[i]]++] = rows[i];
951: }
953: starts[0] = 0;
954: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955: count = 0;
956: for (i=0; i<size; i++) {
957: if (nprocs[2*i+1]) {
958: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
959: }
960: }
961: PetscFree(starts);
963: base = owners[rank];
965: /* wait on receives */
966: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
967: count = nrecvs; slen = 0;
968: while (count) {
969: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
970: /* unpack receives into our local space */
971: MPI_Get_count(&recv_status,MPIU_INT,&n);
972: source[imdex] = recv_status.MPI_SOURCE;
973: lens[imdex] = n;
974: slen += n;
975: count--;
976: }
977: PetscFree(recv_waits);
978:
979: /* move the data into the send scatter */
980: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
981: count = 0;
982: for (i=0; i<nrecvs; i++) {
983: values = rvalues + i*nmax;
984: for (j=0; j<lens[i]; j++) {
985: lrows[count++] = values[j] - base;
986: }
987: }
988: PetscFree(rvalues);
989: PetscFree2(lens,source);
990: PetscFree(owner);
991: PetscFree(nprocs);
992: /* lrows are the local rows to be zeroed, slen is the number of local rows */
994: /* zero diagonal part of matrix */
995: MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);
996:
997: /* handle off diagonal part of matrix */
998: MatGetVecs(A,&xmask,PETSC_NULL);
999: VecDuplicate(l->lvec,&lmask);
1000: VecGetArray(xmask,&bb);
1001: for (i=0; i<slen; i++) {
1002: bb[lrows[i]] = 1;
1003: }
1004: VecRestoreArray(xmask,&bb);
1005: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1006: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1007: VecDestroy(&xmask);
1008: if (x) {
1009: VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1010: VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1011: VecGetArrayRead(l->lvec,&xx);
1012: VecGetArray(b,&bb);
1013: }
1014: VecGetArray(lmask,&mask);
1016: /* remove zeroed rows of off diagonal matrix */
1017: ii = aij->i;
1018: for (i=0; i<slen; i++) {
1019: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
1020: }
1022: /* loop over all elements of off process part of matrix zeroing removed columns*/
1023: if (aij->compressedrow.use){
1024: m = aij->compressedrow.nrows;
1025: ii = aij->compressedrow.i;
1026: ridx = aij->compressedrow.rindex;
1027: for (i=0; i<m; i++){
1028: n = ii[i+1] - ii[i];
1029: aj = aij->j + ii[i];
1030: aa = aij->a + ii[i];
1032: for (j=0; j<n; j++) {
1033: if (PetscAbsScalar(mask[*aj])) {
1034: if (b) bb[*ridx] -= *aa*xx[*aj];
1035: *aa = 0.0;
1036: }
1037: aa++;
1038: aj++;
1039: }
1040: ridx++;
1041: }
1042: } else { /* do not use compressed row format */
1043: m = l->B->rmap->n;
1044: for (i=0; i<m; i++) {
1045: n = ii[i+1] - ii[i];
1046: aj = aij->j + ii[i];
1047: aa = aij->a + ii[i];
1048: for (j=0; j<n; j++) {
1049: if (PetscAbsScalar(mask[*aj])) {
1050: if (b) bb[i] -= *aa*xx[*aj];
1051: *aa = 0.0;
1052: }
1053: aa++;
1054: aj++;
1055: }
1056: }
1057: }
1058: if (x) {
1059: VecRestoreArray(b,&bb);
1060: VecRestoreArrayRead(l->lvec,&xx);
1061: }
1062: VecRestoreArray(lmask,&mask);
1063: VecDestroy(&lmask);
1064: PetscFree(lrows);
1066: /* wait on sends */
1067: if (nsends) {
1068: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1069: MPI_Waitall(nsends,send_waits,send_status);
1070: PetscFree(send_status);
1071: }
1072: PetscFree(send_waits);
1073: PetscFree(svalues);
1075: return(0);
1076: }
1080: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081: {
1082: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1084: PetscInt nt;
1087: VecGetLocalSize(xx,&nt);
1088: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1090: (*a->A->ops->mult)(a->A,xx,yy);
1091: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1092: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1093: return(0);
1094: }
1098: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099: {
1100: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1104: MatMultDiagonalBlock(a->A,bb,xx);
1105: return(0);
1106: }
1110: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111: {
1112: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1116: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1117: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1118: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1119: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1120: return(0);
1121: }
1125: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126: {
1127: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1129: PetscBool merged;
1132: VecScatterGetMerged(a->Mvctx,&merged);
1133: /* do nondiagonal part */
1134: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1135: if (!merged) {
1136: /* send it on its way */
1137: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1138: /* do local part */
1139: (*a->A->ops->multtranspose)(a->A,xx,yy);
1140: /* receive remote parts: note this assumes the values are not actually */
1141: /* added in yy until the next line, */
1142: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1143: } else {
1144: /* do local part */
1145: (*a->A->ops->multtranspose)(a->A,xx,yy);
1146: /* send it on its way */
1147: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1148: /* values actually were received in the Begin() but we need to call this nop */
1149: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1150: }
1151: return(0);
1152: }
1154: EXTERN_C_BEGIN
1157: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1158: {
1159: MPI_Comm comm;
1160: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162: IS Me,Notme;
1164: PetscInt M,N,first,last,*notme,i;
1165: PetscMPIInt size;
1169: /* Easy test: symmetric diagonal block */
1170: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171: MatIsTranspose(Adia,Bdia,tol,f);
1172: if (!*f) return(0);
1173: PetscObjectGetComm((PetscObject)Amat,&comm);
1174: MPI_Comm_size(comm,&size);
1175: if (size == 1) return(0);
1177: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178: MatGetSize(Amat,&M,&N);
1179: MatGetOwnershipRange(Amat,&first,&last);
1180: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
1181: for (i=0; i<first; i++) notme[i] = i;
1182: for (i=last; i<M; i++) notme[i-last+first] = i;
1183: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1184: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1185: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1186: Aoff = Aoffs[0];
1187: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1188: Boff = Boffs[0];
1189: MatIsTranspose(Aoff,Boff,tol,f);
1190: MatDestroyMatrices(1,&Aoffs);
1191: MatDestroyMatrices(1,&Boffs);
1192: ISDestroy(&Me);
1193: ISDestroy(&Notme);
1194: PetscFree(notme);
1195: return(0);
1196: }
1197: EXTERN_C_END
1201: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202: {
1203: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1207: /* do nondiagonal part */
1208: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1209: /* send it on its way */
1210: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1211: /* do local part */
1212: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1213: /* receive remote parts */
1214: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1215: return(0);
1216: }
1218: /*
1219: This only works correctly for square matrices where the subblock A->A is the
1220: diagonal block
1221: */
1224: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225: {
1227: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1230: if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232: MatGetDiagonal(a->A,v);
1233: return(0);
1234: }
1238: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239: {
1240: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1244: MatScale(a->A,aa);
1245: MatScale(a->B,aa);
1246: return(0);
1247: }
1251: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252: {
1253: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1257: #if defined(PETSC_USE_LOG)
1258: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259: #endif
1260: MatStashDestroy_Private(&mat->stash);
1261: VecDestroy(&aij->diag);
1262: MatDestroy(&aij->A);
1263: MatDestroy(&aij->B);
1264: #if defined (PETSC_USE_CTABLE)
1265: PetscTableDestroy(&aij->colmap);
1266: #else
1267: PetscFree(aij->colmap);
1268: #endif
1269: PetscFree(aij->garray);
1270: VecDestroy(&aij->lvec);
1271: VecScatterDestroy(&aij->Mvctx);
1272: PetscFree2(aij->rowvalues,aij->rowindices);
1273: PetscFree(aij->ld);
1274: PetscFree(mat->data);
1276: PetscObjectChangeTypeName((PetscObject)mat,0);
1277: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1278: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1279: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1280: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
1281: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
1282: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
1283: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1284: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);
1285: return(0);
1286: }
1290: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291: {
1292: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1293: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
1294: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
1295: PetscErrorCode ierr;
1296: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1297: int fd;
1298: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1299: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300: PetscScalar *column_values;
1301: PetscInt message_count,flowcontrolcount;
1304: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1305: MPI_Comm_size(((PetscObject)mat)->comm,&size);
1306: nz = A->nz + B->nz;
1307: if (!rank) {
1308: header[0] = MAT_FILE_CLASSID;
1309: header[1] = mat->rmap->N;
1310: header[2] = mat->cmap->N;
1311: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1312: PetscViewerBinaryGetDescriptor(viewer,&fd);
1313: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1314: /* get largest number of rows any processor has */
1315: rlen = mat->rmap->n;
1316: range = mat->rmap->range;
1317: for (i=1; i<size; i++) {
1318: rlen = PetscMax(rlen,range[i+1] - range[i]);
1319: }
1320: } else {
1321: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1322: rlen = mat->rmap->n;
1323: }
1325: /* load up the local row counts */
1326: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
1327: for (i=0; i<mat->rmap->n; i++) {
1328: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1329: }
1331: /* store the row lengths to the file */
1332: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1333: if (!rank) {
1334: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1335: for (i=1; i<size; i++) {
1336: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1337: rlen = range[i+1] - range[i];
1338: MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1339: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1340: }
1341: PetscViewerFlowControlEndMaster(viewer,message_count);
1342: } else {
1343: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1344: MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1345: PetscViewerFlowControlEndWorker(viewer,message_count);
1346: }
1347: PetscFree(row_lengths);
1349: /* load up the local column indices */
1350: nzmax = nz; /* )th processor needs space a largest processor needs */
1351: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);
1352: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
1353: cnt = 0;
1354: for (i=0; i<mat->rmap->n; i++) {
1355: for (j=B->i[i]; j<B->i[i+1]; j++) {
1356: if ( (col = garray[B->j[j]]) > cstart) break;
1357: column_indices[cnt++] = col;
1358: }
1359: for (k=A->i[i]; k<A->i[i+1]; k++) {
1360: column_indices[cnt++] = A->j[k] + cstart;
1361: }
1362: for (; j<B->i[i+1]; j++) {
1363: column_indices[cnt++] = garray[B->j[j]];
1364: }
1365: }
1366: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1368: /* store the column indices to the file */
1369: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1370: if (!rank) {
1371: MPI_Status status;
1372: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1373: for (i=1; i<size; i++) {
1374: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1375: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1376: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1377: MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1378: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1379: }
1380: PetscViewerFlowControlEndMaster(viewer,message_count);
1381: } else {
1382: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1383: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1384: MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1385: PetscViewerFlowControlEndWorker(viewer,message_count);
1386: }
1387: PetscFree(column_indices);
1389: /* load up the local column values */
1390: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
1391: cnt = 0;
1392: for (i=0; i<mat->rmap->n; i++) {
1393: for (j=B->i[i]; j<B->i[i+1]; j++) {
1394: if ( garray[B->j[j]] > cstart) break;
1395: column_values[cnt++] = B->a[j];
1396: }
1397: for (k=A->i[i]; k<A->i[i+1]; k++) {
1398: column_values[cnt++] = A->a[k];
1399: }
1400: for (; j<B->i[i+1]; j++) {
1401: column_values[cnt++] = B->a[j];
1402: }
1403: }
1404: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1406: /* store the column values to the file */
1407: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1408: if (!rank) {
1409: MPI_Status status;
1410: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1411: for (i=1; i<size; i++) {
1412: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1413: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1414: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1415: MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);
1416: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1417: }
1418: PetscViewerFlowControlEndMaster(viewer,message_count);
1419: } else {
1420: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1421: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1422: MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
1423: PetscViewerFlowControlEndWorker(viewer,message_count);
1424: }
1425: PetscFree(column_values);
1426: return(0);
1427: }
1431: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1432: {
1433: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1434: PetscErrorCode ierr;
1435: PetscMPIInt rank = aij->rank,size = aij->size;
1436: PetscBool isdraw,iascii,isbinary;
1437: PetscViewer sviewer;
1438: PetscViewerFormat format;
1441: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1442: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1443: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1444: if (iascii) {
1445: PetscViewerGetFormat(viewer,&format);
1446: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1447: MatInfo info;
1448: PetscBool inodes;
1450: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1451: MatGetInfo(mat,MAT_LOCAL,&info);
1452: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
1453: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1454: if (!inodes) {
1455: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1456: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1457: } else {
1458: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1459: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1460: }
1461: MatGetInfo(aij->A,MAT_LOCAL,&info);
1462: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1463: MatGetInfo(aij->B,MAT_LOCAL,&info);
1464: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1465: PetscViewerFlush(viewer);
1466: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1467: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1468: VecScatterView(aij->Mvctx,viewer);
1469: return(0);
1470: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1471: PetscInt inodecount,inodelimit,*inodes;
1472: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1473: if (inodes) {
1474: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1475: } else {
1476: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1477: }
1478: return(0);
1479: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1480: return(0);
1481: }
1482: } else if (isbinary) {
1483: if (size == 1) {
1484: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1485: MatView(aij->A,viewer);
1486: } else {
1487: MatView_MPIAIJ_Binary(mat,viewer);
1488: }
1489: return(0);
1490: } else if (isdraw) {
1491: PetscDraw draw;
1492: PetscBool isnull;
1493: PetscViewerDrawGetDraw(viewer,0,&draw);
1494: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1495: }
1497: if (size == 1) {
1498: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1499: MatView(aij->A,viewer);
1500: } else {
1501: /* assemble the entire matrix onto first processor. */
1502: Mat A;
1503: Mat_SeqAIJ *Aloc;
1504: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1505: MatScalar *a;
1507: if (mat->rmap->N > 1024) {
1508: PetscBool flg = PETSC_FALSE;
1510: PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);
1511: if (!flg) {
1512: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1513: }
1514: }
1516: MatCreate(((PetscObject)mat)->comm,&A);
1517: if (!rank) {
1518: MatSetSizes(A,M,N,M,N);
1519: } else {
1520: MatSetSizes(A,0,0,M,N);
1521: }
1522: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1523: MatSetType(A,MATMPIAIJ);
1524: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
1525: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1526: PetscLogObjectParent(mat,A);
1528: /* copy over the A part */
1529: Aloc = (Mat_SeqAIJ*)aij->A->data;
1530: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1531: row = mat->rmap->rstart;
1532: for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1533: for (i=0; i<m; i++) {
1534: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1535: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1536: }
1537: aj = Aloc->j;
1538: for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1540: /* copy over the B part */
1541: Aloc = (Mat_SeqAIJ*)aij->B->data;
1542: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543: row = mat->rmap->rstart;
1544: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
1545: ct = cols;
1546: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1547: for (i=0; i<m; i++) {
1548: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1549: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1550: }
1551: PetscFree(ct);
1552: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1553: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1554: /*
1555: Everyone has to call to draw the matrix since the graphics waits are
1556: synchronized across all processors that share the PetscDraw object
1557: */
1558: PetscViewerGetSingleton(viewer,&sviewer);
1559: if (!rank) {
1560: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
1561: /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1562: PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1563: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1564: }
1565: PetscViewerRestoreSingleton(viewer,&sviewer);
1566: MatDestroy(&A);
1567: }
1568: return(0);
1569: }
1573: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1574: {
1576: PetscBool iascii,isdraw,issocket,isbinary;
1577:
1579: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1580: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1581: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1582: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1583: if (iascii || isdraw || isbinary || issocket) {
1584: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1585: } else {
1586: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1587: }
1588: return(0);
1589: }
1593: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1594: {
1595: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1597: Vec bb1 = 0;
1598: PetscBool hasop;
1601: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1602: VecDuplicate(bb,&bb1);
1603: }
1605: if (flag == SOR_APPLY_UPPER) {
1606: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1607: return(0);
1608: }
1610: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1611: if (flag & SOR_ZERO_INITIAL_GUESS) {
1612: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1613: its--;
1614: }
1615:
1616: while (its--) {
1617: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1618: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1620: /* update rhs: bb1 = bb - B*x */
1621: VecScale(mat->lvec,-1.0);
1622: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1624: /* local sweep */
1625: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1626: }
1627: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1628: if (flag & SOR_ZERO_INITIAL_GUESS) {
1629: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1630: its--;
1631: }
1632: while (its--) {
1633: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1634: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1636: /* update rhs: bb1 = bb - B*x */
1637: VecScale(mat->lvec,-1.0);
1638: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1640: /* local sweep */
1641: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1642: }
1643: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1644: if (flag & SOR_ZERO_INITIAL_GUESS) {
1645: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1646: its--;
1647: }
1648: while (its--) {
1649: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1650: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1652: /* update rhs: bb1 = bb - B*x */
1653: VecScale(mat->lvec,-1.0);
1654: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1656: /* local sweep */
1657: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1658: }
1659: } else if (flag & SOR_EISENSTAT) {
1660: Vec xx1;
1662: VecDuplicate(bb,&xx1);
1663: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1665: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1666: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1667: if (!mat->diag) {
1668: MatGetVecs(matin,&mat->diag,PETSC_NULL);
1669: MatGetDiagonal(matin,mat->diag);
1670: }
1671: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1672: if (hasop) {
1673: MatMultDiagonalBlock(matin,xx,bb1);
1674: } else {
1675: VecPointwiseMult(bb1,mat->diag,xx);
1676: }
1677: VecAYPX(bb1,(omega-2.0)/omega,bb);
1679: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1681: /* local sweep */
1682: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1683: VecAXPY(xx,1.0,xx1);
1684: VecDestroy(&xx1);
1685: } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1687: VecDestroy(&bb1);
1688: return(0);
1689: }
1693: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1694: {
1695: MPI_Comm comm;
1696: PetscInt first,local_rowsize,local_colsize;
1697: const PetscInt *rows;
1698: IS crowp,growp,irowp,lrowp,lcolp,icolp;
1702: PetscObjectGetComm((PetscObject)A,&comm);
1703: /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1704: ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);
1705: /* collect the global row permutation and invert it */
1706: ISAllGather(crowp,&growp);
1707: ISSetPermutation(growp);
1708: ISDestroy(&crowp);
1709: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1710: ISDestroy(&growp);
1711: /* get the local target indices */
1712: MatGetOwnershipRange(A,&first,PETSC_NULL);
1713: MatGetLocalSize(A,&local_rowsize,&local_colsize);
1714: ISGetIndices(irowp,&rows);
1715: ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);
1716: ISRestoreIndices(irowp,&rows);
1717: ISDestroy(&irowp);
1718: /* the column permutation is so much easier;
1719: make a local version of 'colp' and invert it */
1720: ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);
1721: ISSetPermutation(lcolp);
1722: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1723: ISDestroy(&lcolp);
1724: /* now we just get the submatrix */
1725: MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);
1726: /* clean up */
1727: ISDestroy(&lrowp);
1728: ISDestroy(&icolp);
1729: return(0);
1730: }
1734: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1735: {
1736: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1737: Mat A = mat->A,B = mat->B;
1739: PetscReal isend[5],irecv[5];
1742: info->block_size = 1.0;
1743: MatGetInfo(A,MAT_LOCAL,info);
1744: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1745: isend[3] = info->memory; isend[4] = info->mallocs;
1746: MatGetInfo(B,MAT_LOCAL,info);
1747: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1748: isend[3] += info->memory; isend[4] += info->mallocs;
1749: if (flag == MAT_LOCAL) {
1750: info->nz_used = isend[0];
1751: info->nz_allocated = isend[1];
1752: info->nz_unneeded = isend[2];
1753: info->memory = isend[3];
1754: info->mallocs = isend[4];
1755: } else if (flag == MAT_GLOBAL_MAX) {
1756: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);
1757: info->nz_used = irecv[0];
1758: info->nz_allocated = irecv[1];
1759: info->nz_unneeded = irecv[2];
1760: info->memory = irecv[3];
1761: info->mallocs = irecv[4];
1762: } else if (flag == MAT_GLOBAL_SUM) {
1763: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);
1764: info->nz_used = irecv[0];
1765: info->nz_allocated = irecv[1];
1766: info->nz_unneeded = irecv[2];
1767: info->memory = irecv[3];
1768: info->mallocs = irecv[4];
1769: }
1770: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1771: info->fill_ratio_needed = 0;
1772: info->factor_mallocs = 0;
1774: return(0);
1775: }
1779: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1780: {
1781: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1785: switch (op) {
1786: case MAT_NEW_NONZERO_LOCATIONS:
1787: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1788: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1789: case MAT_KEEP_NONZERO_PATTERN:
1790: case MAT_NEW_NONZERO_LOCATION_ERR:
1791: case MAT_USE_INODES:
1792: case MAT_IGNORE_ZERO_ENTRIES:
1793: MatCheckPreallocated(A,1);
1794: MatSetOption(a->A,op,flg);
1795: MatSetOption(a->B,op,flg);
1796: break;
1797: case MAT_ROW_ORIENTED:
1798: a->roworiented = flg;
1799: MatSetOption(a->A,op,flg);
1800: MatSetOption(a->B,op,flg);
1801: break;
1802: case MAT_NEW_DIAGONALS:
1803: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1804: break;
1805: case MAT_IGNORE_OFF_PROC_ENTRIES:
1806: a->donotstash = flg;
1807: break;
1808: case MAT_SPD:
1809: A->spd_set = PETSC_TRUE;
1810: A->spd = flg;
1811: if (flg) {
1812: A->symmetric = PETSC_TRUE;
1813: A->structurally_symmetric = PETSC_TRUE;
1814: A->symmetric_set = PETSC_TRUE;
1815: A->structurally_symmetric_set = PETSC_TRUE;
1816: }
1817: break;
1818: case MAT_SYMMETRIC:
1819: MatSetOption(a->A,op,flg);
1820: break;
1821: case MAT_STRUCTURALLY_SYMMETRIC:
1822: MatSetOption(a->A,op,flg);
1823: break;
1824: case MAT_HERMITIAN:
1825: MatSetOption(a->A,op,flg);
1826: break;
1827: case MAT_SYMMETRY_ETERNAL:
1828: MatSetOption(a->A,op,flg);
1829: break;
1830: default:
1831: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1832: }
1833: return(0);
1834: }
1838: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1839: {
1840: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1841: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1843: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1844: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1845: PetscInt *cmap,*idx_p;
1848: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1849: mat->getrowactive = PETSC_TRUE;
1851: if (!mat->rowvalues && (idx || v)) {
1852: /*
1853: allocate enough space to hold information from the longest row.
1854: */
1855: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1856: PetscInt max = 1,tmp;
1857: for (i=0; i<matin->rmap->n; i++) {
1858: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1859: if (max < tmp) { max = tmp; }
1860: }
1861: PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);
1862: }
1864: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1865: lrow = row - rstart;
1867: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1868: if (!v) {pvA = 0; pvB = 0;}
1869: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1870: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1871: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1872: nztot = nzA + nzB;
1874: cmap = mat->garray;
1875: if (v || idx) {
1876: if (nztot) {
1877: /* Sort by increasing column numbers, assuming A and B already sorted */
1878: PetscInt imark = -1;
1879: if (v) {
1880: *v = v_p = mat->rowvalues;
1881: for (i=0; i<nzB; i++) {
1882: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1883: else break;
1884: }
1885: imark = i;
1886: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1887: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1888: }
1889: if (idx) {
1890: *idx = idx_p = mat->rowindices;
1891: if (imark > -1) {
1892: for (i=0; i<imark; i++) {
1893: idx_p[i] = cmap[cworkB[i]];
1894: }
1895: } else {
1896: for (i=0; i<nzB; i++) {
1897: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1898: else break;
1899: }
1900: imark = i;
1901: }
1902: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1903: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1904: }
1905: } else {
1906: if (idx) *idx = 0;
1907: if (v) *v = 0;
1908: }
1909: }
1910: *nz = nztot;
1911: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1912: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1913: return(0);
1914: }
1918: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1919: {
1920: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1923: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1924: aij->getrowactive = PETSC_FALSE;
1925: return(0);
1926: }
1930: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1931: {
1932: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1933: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1935: PetscInt i,j,cstart = mat->cmap->rstart;
1936: PetscReal sum = 0.0;
1937: MatScalar *v;
1940: if (aij->size == 1) {
1941: MatNorm(aij->A,type,norm);
1942: } else {
1943: if (type == NORM_FROBENIUS) {
1944: v = amat->a;
1945: for (i=0; i<amat->nz; i++) {
1946: #if defined(PETSC_USE_COMPLEX)
1947: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948: #else
1949: sum += (*v)*(*v); v++;
1950: #endif
1951: }
1952: v = bmat->a;
1953: for (i=0; i<bmat->nz; i++) {
1954: #if defined(PETSC_USE_COMPLEX)
1955: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1956: #else
1957: sum += (*v)*(*v); v++;
1958: #endif
1959: }
1960: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1961: *norm = PetscSqrtReal(*norm);
1962: } else if (type == NORM_1) { /* max column norm */
1963: PetscReal *tmp,*tmp2;
1964: PetscInt *jj,*garray = aij->garray;
1965: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);
1966: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);
1967: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
1968: *norm = 0.0;
1969: v = amat->a; jj = amat->j;
1970: for (j=0; j<amat->nz; j++) {
1971: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1972: }
1973: v = bmat->a; jj = bmat->j;
1974: for (j=0; j<bmat->nz; j++) {
1975: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1976: }
1977: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1978: for (j=0; j<mat->cmap->N; j++) {
1979: if (tmp2[j] > *norm) *norm = tmp2[j];
1980: }
1981: PetscFree(tmp);
1982: PetscFree(tmp2);
1983: } else if (type == NORM_INFINITY) { /* max row norm */
1984: PetscReal ntemp = 0.0;
1985: for (j=0; j<aij->A->rmap->n; j++) {
1986: v = amat->a + amat->i[j];
1987: sum = 0.0;
1988: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1989: sum += PetscAbsScalar(*v); v++;
1990: }
1991: v = bmat->a + bmat->i[j];
1992: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1993: sum += PetscAbsScalar(*v); v++;
1994: }
1995: if (sum > ntemp) ntemp = sum;
1996: }
1997: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);
1998: } else {
1999: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2000: }
2001: }
2002: return(0);
2003: }
2007: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2008: {
2009: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2010: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2012: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2013: PetscInt cstart=A->cmap->rstart,ncol;
2014: Mat B;
2015: MatScalar *array;
2018: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2020: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2021: ai = Aloc->i; aj = Aloc->j;
2022: bi = Bloc->i; bj = Bloc->j;
2023: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2024: /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2025: PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);
2026: PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));
2027: for (i=0; i<ai[ma]; i++){
2028: d_nnz[aj[i]] ++;
2029: aj[i] += cstart; /* global col index to be used by MatSetValues() */
2030: }
2032: MatCreate(((PetscObject)A)->comm,&B);
2033: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
2034: MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);
2035: MatSetType(B,((PetscObject)A)->type_name);
2036: MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);
2037: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
2038: PetscFree(d_nnz);
2039: } else {
2040: B = *matout;
2041: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2042: for (i=0; i<ai[ma]; i++){
2043: aj[i] += cstart; /* global col index to be used by MatSetValues() */
2044: }
2045: }
2047: /* copy over the A part */
2048: array = Aloc->a;
2049: row = A->rmap->rstart;
2050: for (i=0; i<ma; i++) {
2051: ncol = ai[i+1]-ai[i];
2052: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
2053: row++; array += ncol; aj += ncol;
2054: }
2055: aj = Aloc->j;
2056: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2058: /* copy over the B part */
2059: PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);
2060: PetscMemzero(cols,bi[mb]*sizeof(PetscInt));
2061: array = Bloc->a;
2062: row = A->rmap->rstart;
2063: for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2064: cols_tmp = cols;
2065: for (i=0; i<mb; i++) {
2066: ncol = bi[i+1]-bi[i];
2067: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2068: row++; array += ncol; cols_tmp += ncol;
2069: }
2070: PetscFree(cols);
2071:
2072: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2073: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2074: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2075: *matout = B;
2076: } else {
2077: MatHeaderMerge(A,B);
2078: }
2079: return(0);
2080: }
2084: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2085: {
2086: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2087: Mat a = aij->A,b = aij->B;
2089: PetscInt s1,s2,s3;
2092: MatGetLocalSize(mat,&s2,&s3);
2093: if (rr) {
2094: VecGetLocalSize(rr,&s1);
2095: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2096: /* Overlap communication with computation. */
2097: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2098: }
2099: if (ll) {
2100: VecGetLocalSize(ll,&s1);
2101: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2102: (*b->ops->diagonalscale)(b,ll,0);
2103: }
2104: /* scale the diagonal block */
2105: (*a->ops->diagonalscale)(a,ll,rr);
2107: if (rr) {
2108: /* Do a scatter end and then right scale the off-diagonal block */
2109: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2110: (*b->ops->diagonalscale)(b,0,aij->lvec);
2111: }
2112:
2113: return(0);
2114: }
2118: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119: {
2120: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2124: MatSetUnfactored(a->A);
2125: return(0);
2126: }
2130: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2131: {
2132: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2133: Mat a,b,c,d;
2134: PetscBool flg;
2138: a = matA->A; b = matA->B;
2139: c = matB->A; d = matB->B;
2141: MatEqual(a,c,&flg);
2142: if (flg) {
2143: MatEqual(b,d,&flg);
2144: }
2145: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2146: return(0);
2147: }
2151: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2152: {
2154: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2155: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2158: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2159: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2160: /* because of the column compression in the off-processor part of the matrix a->B,
2161: the number of columns in a->B and b->B may be different, hence we cannot call
2162: the MatCopy() directly on the two parts. If need be, we can provide a more
2163: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2164: then copying the submatrices */
2165: MatCopy_Basic(A,B,str);
2166: } else {
2167: MatCopy(a->A,b->A,str);
2168: MatCopy(a->B,b->B,str);
2169: }
2170: return(0);
2171: }
2175: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2176: {
2180: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2181: return(0);
2182: }
2186: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2187: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2188: {
2189: PetscInt i,m=Y->rmap->N;
2190: Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2191: Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2192: const PetscInt *xi = x->i,*yi = y->i;
2195: /* Set the number of nonzeros in the new matrix */
2196: for(i=0; i<m; i++) {
2197: PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2198: const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2199: nnz[i] = 0;
2200: for (j=0,k=0; j<nzx; j++) { /* Point in X */
2201: for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2202: if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */
2203: nnz[i]++;
2204: }
2205: for (; k<nzy; k++) nnz[i]++;
2206: }
2207: return(0);
2208: }
2212: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2213: {
2215: PetscInt i;
2216: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2217: PetscBLASInt bnz,one=1;
2218: Mat_SeqAIJ *x,*y;
2221: if (str == SAME_NONZERO_PATTERN) {
2222: PetscScalar alpha = a;
2223: x = (Mat_SeqAIJ *)xx->A->data;
2224: y = (Mat_SeqAIJ *)yy->A->data;
2225: bnz = PetscBLASIntCast(x->nz);
2226: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2227: x = (Mat_SeqAIJ *)xx->B->data;
2228: y = (Mat_SeqAIJ *)yy->B->data;
2229: bnz = PetscBLASIntCast(x->nz);
2230: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2231: } else if (str == SUBSET_NONZERO_PATTERN) {
2232: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
2234: x = (Mat_SeqAIJ *)xx->B->data;
2235: y = (Mat_SeqAIJ *)yy->B->data;
2236: if (y->xtoy && y->XtoY != xx->B) {
2237: PetscFree(y->xtoy);
2238: MatDestroy(&y->XtoY);
2239: }
2240: if (!y->xtoy) { /* get xtoy */
2241: MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
2242: y->XtoY = xx->B;
2243: PetscObjectReference((PetscObject)xx->B);
2244: }
2245: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2246: } else {
2247: Mat B;
2248: PetscInt *nnz_d,*nnz_o;
2249: PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);
2250: PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);
2251: MatCreate(((PetscObject)Y)->comm,&B);
2252: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2253: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2254: MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);
2255: MatSetType(B,MATMPIAIJ);
2256: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2257: MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2258: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2259: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2260: MatHeaderReplace(Y,B);
2261: PetscFree(nnz_d);
2262: PetscFree(nnz_o);
2263: }
2264: return(0);
2265: }
2267: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2271: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2272: {
2273: #if defined(PETSC_USE_COMPLEX)
2275: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2278: MatConjugate_SeqAIJ(aij->A);
2279: MatConjugate_SeqAIJ(aij->B);
2280: #else
2282: #endif
2283: return(0);
2284: }
2288: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2289: {
2290: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2294: MatRealPart(a->A);
2295: MatRealPart(a->B);
2296: return(0);
2297: }
2301: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2302: {
2303: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2307: MatImaginaryPart(a->A);
2308: MatImaginaryPart(a->B);
2309: return(0);
2310: }
2312: #ifdef PETSC_HAVE_PBGL
2314: #include <boost/parallel/mpi/bsp_process_group.hpp>
2315: #include <boost/graph/distributed/ilu_default_graph.hpp>
2316: #include <boost/graph/distributed/ilu_0_block.hpp>
2317: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2318: #include <boost/graph/distributed/petsc/interface.hpp>
2319: #include <boost/multi_array.hpp>
2320: #include <boost/parallel/distributed_property_map->hpp>
2324: /*
2325: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2326: */
2327: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2328: {
2329: namespace petsc = boost::distributed::petsc;
2330:
2331: namespace graph_dist = boost::graph::distributed;
2332: using boost::graph::distributed::ilu_default::process_group_type;
2333: using boost::graph::ilu_permuted;
2335: PetscBool row_identity, col_identity;
2336: PetscContainer c;
2337: PetscInt m, n, M, N;
2338: PetscErrorCode ierr;
2341: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2342: ISIdentity(isrow, &row_identity);
2343: ISIdentity(iscol, &col_identity);
2344: if (!row_identity || !col_identity) {
2345: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2346: }
2348: process_group_type pg;
2349: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2350: lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2351: lgraph_type& level_graph = *lgraph_p;
2352: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2354: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2355: ilu_permuted(level_graph);
2357: /* put together the new matrix */
2358: MatCreate(((PetscObject)A)->comm, fact);
2359: MatGetLocalSize(A, &m, &n);
2360: MatGetSize(A, &M, &N);
2361: MatSetSizes(fact, m, n, M, N);
2362: MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);
2363: MatSetType(fact, ((PetscObject)A)->type_name);
2364: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2365: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2367: PetscContainerCreate(((PetscObject)A)->comm, &c);
2368: PetscContainerSetPointer(c, lgraph_p);
2369: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2370: PetscContainerDestroy(&c);
2371: return(0);
2372: }
2376: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2377: {
2379: return(0);
2380: }
2384: /*
2385: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2386: */
2387: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2388: {
2389: namespace graph_dist = boost::graph::distributed;
2391: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2392: lgraph_type* lgraph_p;
2393: PetscContainer c;
2397: PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);
2398: PetscContainerGetPointer(c, (void **) &lgraph_p);
2399: VecCopy(b, x);
2401: PetscScalar* array_x;
2402: VecGetArray(x, &array_x);
2403: PetscInt sx;
2404: VecGetSize(x, &sx);
2405:
2406: PetscScalar* array_b;
2407: VecGetArray(b, &array_b);
2408: PetscInt sb;
2409: VecGetSize(b, &sb);
2411: lgraph_type& level_graph = *lgraph_p;
2412: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2414: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2415: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]),
2416: ref_x(array_x, boost::extents[num_vertices(graph)]);
2418: typedef boost::iterator_property_map<array_ref_type::iterator,
2419: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2420: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2421: vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2422:
2423: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2425: return(0);
2426: }
2427: #endif
2429: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2430: PetscInt nzlocal,nsends,nrecvs;
2431: PetscMPIInt *send_rank,*recv_rank;
2432: PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2433: PetscScalar *sbuf_a,**rbuf_a;
2434: PetscErrorCode (*Destroy)(Mat);
2435: } Mat_Redundant;
2439: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2440: {
2441: PetscErrorCode ierr;
2442: Mat_Redundant *redund=(Mat_Redundant*)ptr;
2443: PetscInt i;
2446: PetscFree2(redund->send_rank,redund->recv_rank);
2447: PetscFree(redund->sbuf_j);
2448: PetscFree(redund->sbuf_a);
2449: for (i=0; i<redund->nrecvs; i++){
2450: PetscFree(redund->rbuf_j[i]);
2451: PetscFree(redund->rbuf_a[i]);
2452: }
2453: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
2454: PetscFree(redund);
2455: return(0);
2456: }
2460: PetscErrorCode MatDestroy_MatRedundant(Mat A)
2461: {
2462: PetscErrorCode ierr;
2463: PetscContainer container;
2464: Mat_Redundant *redund=PETSC_NULL;
2467: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
2468: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2469: PetscContainerGetPointer(container,(void **)&redund);
2470: A->ops->destroy = redund->Destroy;
2471: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
2472: if (A->ops->destroy) {
2473: (*A->ops->destroy)(A);
2474: }
2475: return(0);
2476: }
2480: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2481: {
2482: PetscMPIInt rank,size;
2483: MPI_Comm comm=((PetscObject)mat)->comm;
2485: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
2486: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2487: PetscInt *rowrange=mat->rmap->range;
2488: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2489: Mat A=aij->A,B=aij->B,C=*matredundant;
2490: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2491: PetscScalar *sbuf_a;
2492: PetscInt nzlocal=a->nz+b->nz;
2493: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2494: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2495: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2496: MatScalar *aworkA,*aworkB;
2497: PetscScalar *vals;
2498: PetscMPIInt tag1,tag2,tag3,imdex;
2499: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2500: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2501: MPI_Status recv_status,*send_status;
2502: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2503: PetscInt **rbuf_j=PETSC_NULL;
2504: PetscScalar **rbuf_a=PETSC_NULL;
2505: Mat_Redundant *redund=PETSC_NULL;
2506: PetscContainer container;
2509: MPI_Comm_rank(comm,&rank);
2510: MPI_Comm_size(comm,&size);
2512: if (reuse == MAT_REUSE_MATRIX) {
2513: MatGetSize(C,&M,&N);
2514: if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2515: MatGetLocalSize(C,&M,&N);
2516: if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2517: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
2518: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2519: PetscContainerGetPointer(container,(void **)&redund);
2520: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2522: nsends = redund->nsends;
2523: nrecvs = redund->nrecvs;
2524: send_rank = redund->send_rank;
2525: recv_rank = redund->recv_rank;
2526: sbuf_nz = redund->sbuf_nz;
2527: rbuf_nz = redund->rbuf_nz;
2528: sbuf_j = redund->sbuf_j;
2529: sbuf_a = redund->sbuf_a;
2530: rbuf_j = redund->rbuf_j;
2531: rbuf_a = redund->rbuf_a;
2532: }
2534: if (reuse == MAT_INITIAL_MATRIX){
2535: PetscMPIInt subrank,subsize;
2536: PetscInt nleftover,np_subcomm;
2537: /* get the destination processors' id send_rank, nsends and nrecvs */
2538: MPI_Comm_rank(subcomm,&subrank);
2539: MPI_Comm_size(subcomm,&subsize);
2540: PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2541: np_subcomm = size/nsubcomm;
2542: nleftover = size - nsubcomm*np_subcomm;
2543: nsends = 0; nrecvs = 0;
2544: for (i=0; i<size; i++){ /* i=rank*/
2545: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2546: send_rank[nsends] = i; nsends++;
2547: recv_rank[nrecvs++] = i;
2548: }
2549: }
2550: if (rank >= size - nleftover){/* this proc is a leftover processor */
2551: i = size-nleftover-1;
2552: j = 0;
2553: while (j < nsubcomm - nleftover){
2554: send_rank[nsends++] = i;
2555: i--; j++;
2556: }
2557: }
2559: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2560: for (i=0; i<nleftover; i++){
2561: recv_rank[nrecvs++] = size-nleftover+i;
2562: }
2563: }
2565: /* allocate sbuf_j, sbuf_a */
2566: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2567: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
2568: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
2569: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2571: /* copy mat's local entries into the buffers */
2572: if (reuse == MAT_INITIAL_MATRIX){
2573: rownz_max = 0;
2574: rptr = sbuf_j;
2575: cols = sbuf_j + rend-rstart + 1;
2576: vals = sbuf_a;
2577: rptr[0] = 0;
2578: for (i=0; i<rend-rstart; i++){
2579: row = i + rstart;
2580: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2581: ncols = nzA + nzB;
2582: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2583: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2584: /* load the column indices for this row into cols */
2585: lwrite = 0;
2586: for (l=0; l<nzB; l++) {
2587: if ((ctmp = bmap[cworkB[l]]) < cstart){
2588: vals[lwrite] = aworkB[l];
2589: cols[lwrite++] = ctmp;
2590: }
2591: }
2592: for (l=0; l<nzA; l++){
2593: vals[lwrite] = aworkA[l];
2594: cols[lwrite++] = cstart + cworkA[l];
2595: }
2596: for (l=0; l<nzB; l++) {
2597: if ((ctmp = bmap[cworkB[l]]) >= cend){
2598: vals[lwrite] = aworkB[l];
2599: cols[lwrite++] = ctmp;
2600: }
2601: }
2602: vals += ncols;
2603: cols += ncols;
2604: rptr[i+1] = rptr[i] + ncols;
2605: if (rownz_max < ncols) rownz_max = ncols;
2606: }
2607: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2608: } else { /* only copy matrix values into sbuf_a */
2609: rptr = sbuf_j;
2610: vals = sbuf_a;
2611: rptr[0] = 0;
2612: for (i=0; i<rend-rstart; i++){
2613: row = i + rstart;
2614: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2615: ncols = nzA + nzB;
2616: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2617: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2618: lwrite = 0;
2619: for (l=0; l<nzB; l++) {
2620: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2621: }
2622: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2623: for (l=0; l<nzB; l++) {
2624: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2625: }
2626: vals += ncols;
2627: rptr[i+1] = rptr[i] + ncols;
2628: }
2629: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2631: /* send nzlocal to others, and recv other's nzlocal */
2632: /*--------------------------------------------------*/
2633: if (reuse == MAT_INITIAL_MATRIX){
2634: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2635: s_waits2 = s_waits3 + nsends;
2636: s_waits1 = s_waits2 + nsends;
2637: r_waits1 = s_waits1 + nsends;
2638: r_waits2 = r_waits1 + nrecvs;
2639: r_waits3 = r_waits2 + nrecvs;
2640: } else {
2641: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2642: r_waits3 = s_waits3 + nsends;
2643: }
2645: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2646: if (reuse == MAT_INITIAL_MATRIX){
2647: /* get new tags to keep the communication clean */
2648: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2649: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2650: PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2652: /* post receives of other's nzlocal */
2653: for (i=0; i<nrecvs; i++){
2654: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2655: }
2656: /* send nzlocal to others */
2657: for (i=0; i<nsends; i++){
2658: sbuf_nz[i] = nzlocal;
2659: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2660: }
2661: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2662: count = nrecvs;
2663: while (count) {
2664: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2665: recv_rank[imdex] = recv_status.MPI_SOURCE;
2666: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2667: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2669: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2670: rbuf_nz[imdex] += i + 2;
2671: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2672: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2673: count--;
2674: }
2675: /* wait on sends of nzlocal */
2676: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2677: /* send mat->i,j to others, and recv from other's */
2678: /*------------------------------------------------*/
2679: for (i=0; i<nsends; i++){
2680: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2681: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2682: }
2683: /* wait on receives of mat->i,j */
2684: /*------------------------------*/
2685: count = nrecvs;
2686: while (count) {
2687: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2688: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2689: count--;
2690: }
2691: /* wait on sends of mat->i,j */
2692: /*---------------------------*/
2693: if (nsends) {
2694: MPI_Waitall(nsends,s_waits2,send_status);
2695: }
2696: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2698: /* post receives, send and receive mat->a */
2699: /*----------------------------------------*/
2700: for (imdex=0; imdex<nrecvs; imdex++) {
2701: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2702: }
2703: for (i=0; i<nsends; i++){
2704: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2705: }
2706: count = nrecvs;
2707: while (count) {
2708: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2709: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2710: count--;
2711: }
2712: if (nsends) {
2713: MPI_Waitall(nsends,s_waits3,send_status);
2714: }
2716: PetscFree2(s_waits3,send_status);
2718: /* create redundant matrix */
2719: /*-------------------------*/
2720: if (reuse == MAT_INITIAL_MATRIX){
2721: /* compute rownz_max for preallocation */
2722: for (imdex=0; imdex<nrecvs; imdex++){
2723: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2724: rptr = rbuf_j[imdex];
2725: for (i=0; i<j; i++){
2726: ncols = rptr[i+1] - rptr[i];
2727: if (rownz_max < ncols) rownz_max = ncols;
2728: }
2729: }
2731: MatCreate(subcomm,&C);
2732: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2733: MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);
2734: MatSetFromOptions(C);
2735: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
2736: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
2737: } else {
2738: C = *matredundant;
2739: }
2741: /* insert local matrix entries */
2742: rptr = sbuf_j;
2743: cols = sbuf_j + rend-rstart + 1;
2744: vals = sbuf_a;
2745: for (i=0; i<rend-rstart; i++){
2746: row = i + rstart;
2747: ncols = rptr[i+1] - rptr[i];
2748: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2749: vals += ncols;
2750: cols += ncols;
2751: }
2752: /* insert received matrix entries */
2753: for (imdex=0; imdex<nrecvs; imdex++){
2754: rstart = rowrange[recv_rank[imdex]];
2755: rend = rowrange[recv_rank[imdex]+1];
2756: rptr = rbuf_j[imdex];
2757: cols = rbuf_j[imdex] + rend-rstart + 1;
2758: vals = rbuf_a[imdex];
2759: for (i=0; i<rend-rstart; i++){
2760: row = i + rstart;
2761: ncols = rptr[i+1] - rptr[i];
2762: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2763: vals += ncols;
2764: cols += ncols;
2765: }
2766: }
2767: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2768: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2769: MatGetSize(C,&M,&N);
2770: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2771: if (reuse == MAT_INITIAL_MATRIX) {
2772: PetscContainer container;
2773: *matredundant = C;
2774: /* create a supporting struct and attach it to C for reuse */
2775: PetscNewLog(C,Mat_Redundant,&redund);
2776: PetscContainerCreate(PETSC_COMM_SELF,&container);
2777: PetscContainerSetPointer(container,redund);
2778: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2779: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2780: PetscContainerDestroy(&container);
2782: redund->nzlocal = nzlocal;
2783: redund->nsends = nsends;
2784: redund->nrecvs = nrecvs;
2785: redund->send_rank = send_rank;
2786: redund->recv_rank = recv_rank;
2787: redund->sbuf_nz = sbuf_nz;
2788: redund->rbuf_nz = rbuf_nz;
2789: redund->sbuf_j = sbuf_j;
2790: redund->sbuf_a = sbuf_a;
2791: redund->rbuf_j = rbuf_j;
2792: redund->rbuf_a = rbuf_a;
2794: redund->Destroy = C->ops->destroy;
2795: C->ops->destroy = MatDestroy_MatRedundant;
2796: }
2797: return(0);
2798: }
2802: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2803: {
2804: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2806: PetscInt i,*idxb = 0;
2807: PetscScalar *va,*vb;
2808: Vec vtmp;
2811: MatGetRowMaxAbs(a->A,v,idx);
2812: VecGetArray(v,&va);
2813: if (idx) {
2814: for (i=0; i<A->rmap->n; i++) {
2815: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2816: }
2817: }
2819: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2820: if (idx) {
2821: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2822: }
2823: MatGetRowMaxAbs(a->B,vtmp,idxb);
2824: VecGetArray(vtmp,&vb);
2826: for (i=0; i<A->rmap->n; i++){
2827: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2828: va[i] = vb[i];
2829: if (idx) idx[i] = a->garray[idxb[i]];
2830: }
2831: }
2833: VecRestoreArray(v,&va);
2834: VecRestoreArray(vtmp,&vb);
2835: PetscFree(idxb);
2836: VecDestroy(&vtmp);
2837: return(0);
2838: }
2842: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2843: {
2844: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2846: PetscInt i,*idxb = 0;
2847: PetscScalar *va,*vb;
2848: Vec vtmp;
2851: MatGetRowMinAbs(a->A,v,idx);
2852: VecGetArray(v,&va);
2853: if (idx) {
2854: for (i=0; i<A->cmap->n; i++) {
2855: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2856: }
2857: }
2859: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2860: if (idx) {
2861: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2862: }
2863: MatGetRowMinAbs(a->B,vtmp,idxb);
2864: VecGetArray(vtmp,&vb);
2866: for (i=0; i<A->rmap->n; i++){
2867: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2868: va[i] = vb[i];
2869: if (idx) idx[i] = a->garray[idxb[i]];
2870: }
2871: }
2873: VecRestoreArray(v,&va);
2874: VecRestoreArray(vtmp,&vb);
2875: PetscFree(idxb);
2876: VecDestroy(&vtmp);
2877: return(0);
2878: }
2882: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2883: {
2884: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2885: PetscInt n = A->rmap->n;
2886: PetscInt cstart = A->cmap->rstart;
2887: PetscInt *cmap = mat->garray;
2888: PetscInt *diagIdx, *offdiagIdx;
2889: Vec diagV, offdiagV;
2890: PetscScalar *a, *diagA, *offdiagA;
2891: PetscInt r;
2895: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2896: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2897: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2898: MatGetRowMin(mat->A, diagV, diagIdx);
2899: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2900: VecGetArray(v, &a);
2901: VecGetArray(diagV, &diagA);
2902: VecGetArray(offdiagV, &offdiagA);
2903: for(r = 0; r < n; ++r) {
2904: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2905: a[r] = diagA[r];
2906: idx[r] = cstart + diagIdx[r];
2907: } else {
2908: a[r] = offdiagA[r];
2909: idx[r] = cmap[offdiagIdx[r]];
2910: }
2911: }
2912: VecRestoreArray(v, &a);
2913: VecRestoreArray(diagV, &diagA);
2914: VecRestoreArray(offdiagV, &offdiagA);
2915: VecDestroy(&diagV);
2916: VecDestroy(&offdiagV);
2917: PetscFree2(diagIdx, offdiagIdx);
2918: return(0);
2919: }
2923: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2924: {
2925: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2926: PetscInt n = A->rmap->n;
2927: PetscInt cstart = A->cmap->rstart;
2928: PetscInt *cmap = mat->garray;
2929: PetscInt *diagIdx, *offdiagIdx;
2930: Vec diagV, offdiagV;
2931: PetscScalar *a, *diagA, *offdiagA;
2932: PetscInt r;
2936: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2937: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2938: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2939: MatGetRowMax(mat->A, diagV, diagIdx);
2940: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2941: VecGetArray(v, &a);
2942: VecGetArray(diagV, &diagA);
2943: VecGetArray(offdiagV, &offdiagA);
2944: for(r = 0; r < n; ++r) {
2945: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2946: a[r] = diagA[r];
2947: idx[r] = cstart + diagIdx[r];
2948: } else {
2949: a[r] = offdiagA[r];
2950: idx[r] = cmap[offdiagIdx[r]];
2951: }
2952: }
2953: VecRestoreArray(v, &a);
2954: VecRestoreArray(diagV, &diagA);
2955: VecRestoreArray(offdiagV, &offdiagA);
2956: VecDestroy(&diagV);
2957: VecDestroy(&offdiagV);
2958: PetscFree2(diagIdx, offdiagIdx);
2959: return(0);
2960: }
2964: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2965: {
2967: Mat *dummy;
2970: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2971: *newmat = *dummy;
2972: PetscFree(dummy);
2973: return(0);
2974: }
2976: extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2980: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2981: {
2982: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
2986: MatInvertBlockDiagonal(a->A,values);
2987: return(0);
2988: }
2991: /* -------------------------------------------------------------------*/
2992: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2993: MatGetRow_MPIAIJ,
2994: MatRestoreRow_MPIAIJ,
2995: MatMult_MPIAIJ,
2996: /* 4*/ MatMultAdd_MPIAIJ,
2997: MatMultTranspose_MPIAIJ,
2998: MatMultTransposeAdd_MPIAIJ,
2999: #ifdef PETSC_HAVE_PBGL
3000: MatSolve_MPIAIJ,
3001: #else
3002: 0,
3003: #endif
3004: 0,
3005: 0,
3006: /*10*/ 0,
3007: 0,
3008: 0,
3009: MatSOR_MPIAIJ,
3010: MatTranspose_MPIAIJ,
3011: /*15*/ MatGetInfo_MPIAIJ,
3012: MatEqual_MPIAIJ,
3013: MatGetDiagonal_MPIAIJ,
3014: MatDiagonalScale_MPIAIJ,
3015: MatNorm_MPIAIJ,
3016: /*20*/ MatAssemblyBegin_MPIAIJ,
3017: MatAssemblyEnd_MPIAIJ,
3018: MatSetOption_MPIAIJ,
3019: MatZeroEntries_MPIAIJ,
3020: /*24*/ MatZeroRows_MPIAIJ,
3021: 0,
3022: #ifdef PETSC_HAVE_PBGL
3023: 0,
3024: #else
3025: 0,
3026: #endif
3027: 0,
3028: 0,
3029: /*29*/ MatSetUp_MPIAIJ,
3030: #ifdef PETSC_HAVE_PBGL
3031: 0,
3032: #else
3033: 0,
3034: #endif
3035: 0,
3036: 0,
3037: 0,
3038: /*34*/ MatDuplicate_MPIAIJ,
3039: 0,
3040: 0,
3041: 0,
3042: 0,
3043: /*39*/ MatAXPY_MPIAIJ,
3044: MatGetSubMatrices_MPIAIJ,
3045: MatIncreaseOverlap_MPIAIJ,
3046: MatGetValues_MPIAIJ,
3047: MatCopy_MPIAIJ,
3048: /*44*/ MatGetRowMax_MPIAIJ,
3049: MatScale_MPIAIJ,
3050: 0,
3051: 0,
3052: MatZeroRowsColumns_MPIAIJ,
3053: /*49*/ 0,
3054: 0,
3055: 0,
3056: 0,
3057: 0,
3058: /*54*/ MatFDColoringCreate_MPIAIJ,
3059: 0,
3060: MatSetUnfactored_MPIAIJ,
3061: 0, /* MatPermute_MPIAIJ, impl currently broken */
3062: 0,
3063: /*59*/ MatGetSubMatrix_MPIAIJ,
3064: MatDestroy_MPIAIJ,
3065: MatView_MPIAIJ,
3066: 0,
3067: 0,
3068: /*64*/ 0,
3069: 0,
3070: 0,
3071: 0,
3072: 0,
3073: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3074: MatGetRowMinAbs_MPIAIJ,
3075: 0,
3076: MatSetColoring_MPIAIJ,
3077: #if defined(PETSC_HAVE_ADIC)
3078: MatSetValuesAdic_MPIAIJ,
3079: #else
3080: 0,
3081: #endif
3082: MatSetValuesAdifor_MPIAIJ,
3083: /*75*/ MatFDColoringApply_AIJ,
3084: 0,
3085: 0,
3086: 0,
3087: 0,
3088: /*80*/ 0,
3089: 0,
3090: 0,
3091: /*83*/ MatLoad_MPIAIJ,
3092: 0,
3093: 0,
3094: 0,
3095: 0,
3096: 0,
3097: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3098: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3099: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3100: MatPtAP_Basic,
3101: MatPtAPSymbolic_MPIAIJ,
3102: /*94*/ MatPtAPNumeric_MPIAIJ,
3103: 0,
3104: 0,
3105: 0,
3106: 0,
3107: /*99*/ 0,
3108: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3109: MatPtAPNumeric_MPIAIJ_MPIAIJ,
3110: MatConjugate_MPIAIJ,
3111: 0,
3112: /*104*/MatSetValuesRow_MPIAIJ,
3113: MatRealPart_MPIAIJ,
3114: MatImaginaryPart_MPIAIJ,
3115: 0,
3116: 0,
3117: /*109*/0,
3118: MatGetRedundantMatrix_MPIAIJ,
3119: MatGetRowMin_MPIAIJ,
3120: 0,
3121: 0,
3122: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3123: 0,
3124: 0,
3125: 0,
3126: 0,
3127: /*119*/0,
3128: 0,
3129: 0,
3130: 0,
3131: MatGetMultiProcBlock_MPIAIJ,
3132: /*124*/MatFindNonZeroRows_MPIAIJ,
3133: MatGetColumnNorms_MPIAIJ,
3134: MatInvertBlockDiagonal_MPIAIJ,
3135: 0,
3136: MatGetSubMatricesParallel_MPIAIJ,
3137: /*129*/0,
3138: MatTransposeMatMult_MPIAIJ_MPIAIJ,
3139: MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3140: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3141: 0,
3142: /*134*/0,
3143: 0,
3144: 0,
3145: 0,
3146: 0
3147: };
3149: /* ----------------------------------------------------------------------------------------*/
3151: EXTERN_C_BEGIN
3154: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3155: {
3156: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3160: MatStoreValues(aij->A);
3161: MatStoreValues(aij->B);
3162: return(0);
3163: }
3164: EXTERN_C_END
3166: EXTERN_C_BEGIN
3169: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3170: {
3171: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3175: MatRetrieveValues(aij->A);
3176: MatRetrieveValues(aij->B);
3177: return(0);
3178: }
3179: EXTERN_C_END
3181: EXTERN_C_BEGIN
3184: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3185: {
3186: Mat_MPIAIJ *b;
3188: PetscInt i;
3189: PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3192: if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3193: if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3194: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3195: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3196: if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3197: if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3199: PetscLayoutSetUp(B->rmap);
3200: PetscLayoutSetUp(B->cmap);
3201: if (d_nnz) {
3202: for (i=0; i<B->rmap->n; i++) {
3203: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3204: }
3205: }
3206: if (o_nnz) {
3207: for (i=0; i<B->rmap->n; i++) {
3208: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3209: }
3210: }
3211: b = (Mat_MPIAIJ*)B->data;
3213: if (!B->preallocated) {
3214: /* Explicitly create 2 MATSEQAIJ matrices. */
3215: MatCreate(PETSC_COMM_SELF,&b->A);
3216: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3217: MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);
3218: MatSetType(b->A,MATSEQAIJ);
3219: PetscLogObjectParent(B,b->A);
3220: MatCreate(PETSC_COMM_SELF,&b->B);
3221: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3222: MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);
3223: MatSetType(b->B,MATSEQAIJ);
3224: PetscLogObjectParent(B,b->B);
3225: }
3227: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3228: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3229: /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3230: if (!d_realalloc) {MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);}
3231: if (!o_realalloc) {MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);}
3232: B->preallocated = PETSC_TRUE;
3233: return(0);
3234: }
3235: EXTERN_C_END
3239: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3240: {
3241: Mat mat;
3242: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3246: *newmat = 0;
3247: MatCreate(((PetscObject)matin)->comm,&mat);
3248: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3249: MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);
3250: MatSetType(mat,((PetscObject)matin)->type_name);
3251: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3252: a = (Mat_MPIAIJ*)mat->data;
3253:
3254: mat->factortype = matin->factortype;
3255: mat->rmap->bs = matin->rmap->bs;
3256: mat->cmap->bs = matin->cmap->bs;
3257: mat->assembled = PETSC_TRUE;
3258: mat->insertmode = NOT_SET_VALUES;
3259: mat->preallocated = PETSC_TRUE;
3261: a->size = oldmat->size;
3262: a->rank = oldmat->rank;
3263: a->donotstash = oldmat->donotstash;
3264: a->roworiented = oldmat->roworiented;
3265: a->rowindices = 0;
3266: a->rowvalues = 0;
3267: a->getrowactive = PETSC_FALSE;
3269: PetscLayoutReference(matin->rmap,&mat->rmap);
3270: PetscLayoutReference(matin->cmap,&mat->cmap);
3272: if (oldmat->colmap) {
3273: #if defined (PETSC_USE_CTABLE)
3274: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3275: #else
3276: PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);
3277: PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));
3278: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3279: #endif
3280: } else a->colmap = 0;
3281: if (oldmat->garray) {
3282: PetscInt len;
3283: len = oldmat->B->cmap->n;
3284: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
3285: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
3286: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3287: } else a->garray = 0;
3288:
3289: VecDuplicate(oldmat->lvec,&a->lvec);
3290: PetscLogObjectParent(mat,a->lvec);
3291: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3292: PetscLogObjectParent(mat,a->Mvctx);
3293: MatDuplicate(oldmat->A,cpvalues,&a->A);
3294: PetscLogObjectParent(mat,a->A);
3295: MatDuplicate(oldmat->B,cpvalues,&a->B);
3296: PetscLogObjectParent(mat,a->B);
3297: PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3298: *newmat = mat;
3299: return(0);
3300: }
3306: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3307: {
3308: PetscScalar *vals,*svals;
3309: MPI_Comm comm = ((PetscObject)viewer)->comm;
3311: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3312: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3313: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3314: PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3315: PetscInt cend,cstart,n,*rowners,sizesset=1;
3316: int fd;
3319: MPI_Comm_size(comm,&size);
3320: MPI_Comm_rank(comm,&rank);
3321: if (!rank) {
3322: PetscViewerBinaryGetDescriptor(viewer,&fd);
3323: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
3324: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3325: }
3327: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3329: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3330: M = header[1]; N = header[2];
3331: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3332: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3333: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3334:
3335: /* If global sizes are set, check if they are consistent with that given in the file */
3336: if (sizesset) {
3337: MatGetSize(newMat,&grows,&gcols);
3338: }
3339: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3340: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3342: /* determine ownership of all rows */
3343: if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3344: else m = newMat->rmap->n; /* Set by user */
3345:
3346: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
3347: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3349: /* First process needs enough room for process with most rows */
3350: if (!rank) {
3351: mmax = rowners[1];
3352: for (i=2; i<size; i++) {
3353: mmax = PetscMax(mmax,rowners[i]);
3354: }
3355: } else mmax = m;
3357: rowners[0] = 0;
3358: for (i=2; i<=size; i++) {
3359: rowners[i] += rowners[i-1];
3360: }
3361: rstart = rowners[rank];
3362: rend = rowners[rank+1];
3364: /* distribute row lengths to all processors */
3365: PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);
3366: if (!rank) {
3367: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3368: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
3369: PetscMalloc(size*sizeof(PetscInt),&procsnz);
3370: PetscMemzero(procsnz,size*sizeof(PetscInt));
3371: for (j=0; j<m; j++) {
3372: procsnz[0] += ourlens[j];
3373: }
3374: for (i=1; i<size; i++) {
3375: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3376: /* calculate the number of nonzeros on each processor */
3377: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3378: procsnz[i] += rowlengths[j];
3379: }
3380: MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3381: }
3382: PetscFree(rowlengths);
3383: } else {
3384: MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3385: }
3387: if (!rank) {
3388: /* determine max buffer needed and allocate it */
3389: maxnz = 0;
3390: for (i=0; i<size; i++) {
3391: maxnz = PetscMax(maxnz,procsnz[i]);
3392: }
3393: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
3395: /* read in my part of the matrix column indices */
3396: nz = procsnz[0];
3397: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3398: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3400: /* read in every one elses and ship off */
3401: for (i=1; i<size; i++) {
3402: nz = procsnz[i];
3403: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3404: MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);
3405: }
3406: PetscFree(cols);
3407: } else {
3408: /* determine buffer space needed for message */
3409: nz = 0;
3410: for (i=0; i<m; i++) {
3411: nz += ourlens[i];
3412: }
3413: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3415: /* receive message of column indices*/
3416: MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3417: }
3419: /* determine column ownership if matrix is not square */
3420: if (N != M) {
3421: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3422: else n = newMat->cmap->n;
3423: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3424: cstart = cend - n;
3425: } else {
3426: cstart = rstart;
3427: cend = rend;
3428: n = cend - cstart;
3429: }
3431: /* loop over local rows, determining number of off diagonal entries */
3432: PetscMemzero(offlens,m*sizeof(PetscInt));
3433: jj = 0;
3434: for (i=0; i<m; i++) {
3435: for (j=0; j<ourlens[i]; j++) {
3436: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3437: jj++;
3438: }
3439: }
3441: for (i=0; i<m; i++) {
3442: ourlens[i] -= offlens[i];
3443: }
3444: if (!sizesset) {
3445: MatSetSizes(newMat,m,n,M,N);
3446: }
3447: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3449: for (i=0; i<m; i++) {
3450: ourlens[i] += offlens[i];
3451: }
3453: if (!rank) {
3454: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
3456: /* read in my part of the matrix numerical values */
3457: nz = procsnz[0];
3458: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3459:
3460: /* insert into matrix */
3461: jj = rstart;
3462: smycols = mycols;
3463: svals = vals;
3464: for (i=0; i<m; i++) {
3465: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3466: smycols += ourlens[i];
3467: svals += ourlens[i];
3468: jj++;
3469: }
3471: /* read in other processors and ship out */
3472: for (i=1; i<size; i++) {
3473: nz = procsnz[i];
3474: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3475: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3476: }
3477: PetscFree(procsnz);
3478: } else {
3479: /* receive numeric values */
3480: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
3482: /* receive message of values*/
3483: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3485: /* insert into matrix */
3486: jj = rstart;
3487: smycols = mycols;
3488: svals = vals;
3489: for (i=0; i<m; i++) {
3490: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3491: smycols += ourlens[i];
3492: svals += ourlens[i];
3493: jj++;
3494: }
3495: }
3496: PetscFree2(ourlens,offlens);
3497: PetscFree(vals);
3498: PetscFree(mycols);
3499: PetscFree(rowners);
3501: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3502: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3503: return(0);
3504: }
3508: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3509: {
3511: IS iscol_local;
3512: PetscInt csize;
3515: ISGetLocalSize(iscol,&csize);
3516: if (call == MAT_REUSE_MATRIX) {
3517: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3518: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3519: } else {
3520: PetscInt cbs;
3521: ISGetBlockSize(iscol,&cbs);
3522: ISAllGather(iscol,&iscol_local);
3523: ISSetBlockSize(iscol_local,cbs);
3524: }
3525: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3526: if (call == MAT_INITIAL_MATRIX) {
3527: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3528: ISDestroy(&iscol_local);
3529: }
3530: return(0);
3531: }
3533: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3536: /*
3537: Not great since it makes two copies of the submatrix, first an SeqAIJ
3538: in local and then by concatenating the local matrices the end result.
3539: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3541: Note: This requires a sequential iscol with all indices.
3542: */
3543: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3544: {
3546: PetscMPIInt rank,size;
3547: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3548: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3549: PetscBool allcolumns, colflag;
3550: Mat M,Mreuse;
3551: MatScalar *vwork,*aa;
3552: MPI_Comm comm = ((PetscObject)mat)->comm;
3553: Mat_SeqAIJ *aij;
3557: MPI_Comm_rank(comm,&rank);
3558: MPI_Comm_size(comm,&size);
3560: ISIdentity(iscol,&colflag);
3561: ISGetLocalSize(iscol,&ncol);
3562: if (colflag && ncol == mat->cmap->N){
3563: allcolumns = PETSC_TRUE;
3564: } else {
3565: allcolumns = PETSC_FALSE;
3566: }
3567: if (call == MAT_REUSE_MATRIX) {
3568: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
3569: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3570: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);
3571: } else {
3572: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);
3573: }
3575: /*
3576: m - number of local rows
3577: n - number of columns (same on all processors)
3578: rstart - first row in new global matrix generated
3579: */
3580: MatGetSize(Mreuse,&m,&n);
3581: MatGetBlockSizes(Mreuse,&bs,&cbs);
3582: if (call == MAT_INITIAL_MATRIX) {
3583: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3584: ii = aij->i;
3585: jj = aij->j;
3587: /*
3588: Determine the number of non-zeros in the diagonal and off-diagonal
3589: portions of the matrix in order to do correct preallocation
3590: */
3592: /* first get start and end of "diagonal" columns */
3593: if (csize == PETSC_DECIDE) {
3594: ISGetSize(isrow,&mglobal);
3595: if (mglobal == n) { /* square matrix */
3596: nlocal = m;
3597: } else {
3598: nlocal = n/size + ((n % size) > rank);
3599: }
3600: } else {
3601: nlocal = csize;
3602: }
3603: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3604: rstart = rend - nlocal;
3605: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3607: /* next, compute all the lengths */
3608: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
3609: olens = dlens + m;
3610: for (i=0; i<m; i++) {
3611: jend = ii[i+1] - ii[i];
3612: olen = 0;
3613: dlen = 0;
3614: for (j=0; j<jend; j++) {
3615: if (*jj < rstart || *jj >= rend) olen++;
3616: else dlen++;
3617: jj++;
3618: }
3619: olens[i] = olen;
3620: dlens[i] = dlen;
3621: }
3622: MatCreate(comm,&M);
3623: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3624: MatSetBlockSizes(M,bs,cbs);
3625: MatSetType(M,((PetscObject)mat)->type_name);
3626: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3627: PetscFree(dlens);
3628: } else {
3629: PetscInt ml,nl;
3631: M = *newmat;
3632: MatGetLocalSize(M,&ml,&nl);
3633: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3634: MatZeroEntries(M);
3635: /*
3636: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3637: rather than the slower MatSetValues().
3638: */
3639: M->was_assembled = PETSC_TRUE;
3640: M->assembled = PETSC_FALSE;
3641: }
3642: MatGetOwnershipRange(M,&rstart,&rend);
3643: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3644: ii = aij->i;
3645: jj = aij->j;
3646: aa = aij->a;
3647: for (i=0; i<m; i++) {
3648: row = rstart + i;
3649: nz = ii[i+1] - ii[i];
3650: cwork = jj; jj += nz;
3651: vwork = aa; aa += nz;
3652: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3653: }
3655: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3656: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3657: *newmat = M;
3659: /* save submatrix used in processor for next request */
3660: if (call == MAT_INITIAL_MATRIX) {
3661: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3662: MatDestroy(&Mreuse);
3663: }
3665: return(0);
3666: }
3668: EXTERN_C_BEGIN
3671: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3672: {
3673: PetscInt m,cstart, cend,j,nnz,i,d;
3674: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3675: const PetscInt *JJ;
3676: PetscScalar *values;
3680: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3682: PetscLayoutSetUp(B->rmap);
3683: PetscLayoutSetUp(B->cmap);
3684: m = B->rmap->n;
3685: cstart = B->cmap->rstart;
3686: cend = B->cmap->rend;
3687: rstart = B->rmap->rstart;
3689: PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);
3691: #if defined(PETSC_USE_DEBUGGING)
3692: for (i=0; i<m; i++) {
3693: nnz = Ii[i+1]- Ii[i];
3694: JJ = J + Ii[i];
3695: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3696: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3697: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3698: }
3699: #endif
3701: for (i=0; i<m; i++) {
3702: nnz = Ii[i+1]- Ii[i];
3703: JJ = J + Ii[i];
3704: nnz_max = PetscMax(nnz_max,nnz);
3705: d = 0;
3706: for (j=0; j<nnz; j++) {
3707: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3708: }
3709: d_nnz[i] = d;
3710: o_nnz[i] = nnz - d;
3711: }
3712: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3713: PetscFree2(d_nnz,o_nnz);
3715: if (v) values = (PetscScalar*)v;
3716: else {
3717: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
3718: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
3719: }
3721: for (i=0; i<m; i++) {
3722: ii = i + rstart;
3723: nnz = Ii[i+1]- Ii[i];
3724: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3725: }
3726: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3727: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3729: if (!v) {
3730: PetscFree(values);
3731: }
3732: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3733: return(0);
3734: }
3735: EXTERN_C_END
3739: /*@
3740: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3741: (the default parallel PETSc format).
3743: Collective on MPI_Comm
3745: Input Parameters:
3746: + B - the matrix
3747: . i - the indices into j for the start of each local row (starts with zero)
3748: . j - the column indices for each local row (starts with zero)
3749: - v - optional values in the matrix
3751: Level: developer
3753: Notes:
3754: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3755: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3756: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3758: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3760: The format which is used for the sparse matrix input, is equivalent to a
3761: row-major ordering.. i.e for the following matrix, the input data expected is
3762: as shown:
3764: 1 0 0
3765: 2 0 3 P0
3766: -------
3767: 4 5 6 P1
3769: Process0 [P0]: rows_owned=[0,1]
3770: i = {0,1,3} [size = nrow+1 = 2+1]
3771: j = {0,0,2} [size = nz = 6]
3772: v = {1,2,3} [size = nz = 6]
3774: Process1 [P1]: rows_owned=[2]
3775: i = {0,3} [size = nrow+1 = 1+1]
3776: j = {0,1,2} [size = nz = 6]
3777: v = {4,5,6} [size = nz = 6]
3779: .keywords: matrix, aij, compressed row, sparse, parallel
3781: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3782: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3783: @*/
3784: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3785: {
3789: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3790: return(0);
3791: }
3795: /*@C
3796: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3797: (the default parallel PETSc format). For good matrix assembly performance
3798: the user should preallocate the matrix storage by setting the parameters
3799: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3800: performance can be increased by more than a factor of 50.
3802: Collective on MPI_Comm
3804: Input Parameters:
3805: + A - the matrix
3806: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3807: (same value is used for all local rows)
3808: . d_nnz - array containing the number of nonzeros in the various rows of the
3809: DIAGONAL portion of the local submatrix (possibly different for each row)
3810: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3811: The size of this array is equal to the number of local rows, i.e 'm'.
3812: For matrices that will be factored, you must leave room for (and set)
3813: the diagonal entry even if it is zero.
3814: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3815: submatrix (same value is used for all local rows).
3816: - o_nnz - array containing the number of nonzeros in the various rows of the
3817: OFF-DIAGONAL portion of the local submatrix (possibly different for
3818: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3819: structure. The size of this array is equal to the number
3820: of local rows, i.e 'm'.
3822: If the *_nnz parameter is given then the *_nz parameter is ignored
3824: The AIJ format (also called the Yale sparse matrix format or
3825: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3826: storage. The stored row and column indices begin with zero.
3827: See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3829: The parallel matrix is partitioned such that the first m0 rows belong to
3830: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3831: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3833: The DIAGONAL portion of the local submatrix of a processor can be defined
3834: as the submatrix which is obtained by extraction the part corresponding to
3835: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3836: first row that belongs to the processor, r2 is the last row belonging to
3837: the this processor, and c1-c2 is range of indices of the local part of a
3838: vector suitable for applying the matrix to. This is an mxn matrix. In the
3839: common case of a square matrix, the row and column ranges are the same and
3840: the DIAGONAL part is also square. The remaining portion of the local
3841: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3843: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3845: You can call MatGetInfo() to get information on how effective the preallocation was;
3846: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3847: You can also run with the option -info and look for messages with the string
3848: malloc in them to see if additional memory allocation was needed.
3850: Example usage:
3851:
3852: Consider the following 8x8 matrix with 34 non-zero values, that is
3853: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3854: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3855: as follows:
3857: .vb
3858: 1 2 0 | 0 3 0 | 0 4
3859: Proc0 0 5 6 | 7 0 0 | 8 0
3860: 9 0 10 | 11 0 0 | 12 0
3861: -------------------------------------
3862: 13 0 14 | 15 16 17 | 0 0
3863: Proc1 0 18 0 | 19 20 21 | 0 0
3864: 0 0 0 | 22 23 0 | 24 0
3865: -------------------------------------
3866: Proc2 25 26 27 | 0 0 28 | 29 0
3867: 30 0 0 | 31 32 33 | 0 34
3868: .ve
3870: This can be represented as a collection of submatrices as:
3872: .vb
3873: A B C
3874: D E F
3875: G H I
3876: .ve
3878: Where the submatrices A,B,C are owned by proc0, D,E,F are
3879: owned by proc1, G,H,I are owned by proc2.
3881: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3882: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3883: The 'M','N' parameters are 8,8, and have the same values on all procs.
3885: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3886: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3887: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3888: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3889: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3890: matrix, ans [DF] as another SeqAIJ matrix.
3892: When d_nz, o_nz parameters are specified, d_nz storage elements are
3893: allocated for every row of the local diagonal submatrix, and o_nz
3894: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3895: One way to choose d_nz and o_nz is to use the max nonzerors per local
3896: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3897: In this case, the values of d_nz,o_nz are:
3898: .vb
3899: proc0 : dnz = 2, o_nz = 2
3900: proc1 : dnz = 3, o_nz = 2
3901: proc2 : dnz = 1, o_nz = 4
3902: .ve
3903: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3904: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3905: for proc3. i.e we are using 12+15+10=37 storage locations to store
3906: 34 values.
3908: When d_nnz, o_nnz parameters are specified, the storage is specified
3909: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3910: In the above case the values for d_nnz,o_nnz are:
3911: .vb
3912: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3913: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3914: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3915: .ve
3916: Here the space allocated is sum of all the above values i.e 34, and
3917: hence pre-allocation is perfect.
3919: Level: intermediate
3921: .keywords: matrix, aij, compressed row, sparse, parallel
3923: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3924: MPIAIJ, MatGetInfo()
3925: @*/
3926: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3927: {
3933: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3934: return(0);
3935: }
3939: /*@
3940: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3941: CSR format the local rows.
3943: Collective on MPI_Comm
3945: Input Parameters:
3946: + comm - MPI communicator
3947: . m - number of local rows (Cannot be PETSC_DECIDE)
3948: . n - This value should be the same as the local size used in creating the
3949: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3950: calculated if N is given) For square matrices n is almost always m.
3951: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3952: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3953: . i - row indices
3954: . j - column indices
3955: - a - matrix values
3957: Output Parameter:
3958: . mat - the matrix
3960: Level: intermediate
3962: Notes:
3963: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3964: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3965: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3967: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3969: The format which is used for the sparse matrix input, is equivalent to a
3970: row-major ordering.. i.e for the following matrix, the input data expected is
3971: as shown:
3973: 1 0 0
3974: 2 0 3 P0
3975: -------
3976: 4 5 6 P1
3978: Process0 [P0]: rows_owned=[0,1]
3979: i = {0,1,3} [size = nrow+1 = 2+1]
3980: j = {0,0,2} [size = nz = 6]
3981: v = {1,2,3} [size = nz = 6]
3983: Process1 [P1]: rows_owned=[2]
3984: i = {0,3} [size = nrow+1 = 1+1]
3985: j = {0,1,2} [size = nz = 6]
3986: v = {4,5,6} [size = nz = 6]
3988: .keywords: matrix, aij, compressed row, sparse, parallel
3990: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3991: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3992: @*/
3993: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3994: {
3998: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3999: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4000: MatCreate(comm,mat);
4001: MatSetSizes(*mat,m,n,M,N);
4002: /* MatSetBlockSizes(M,bs,cbs); */
4003: MatSetType(*mat,MATMPIAIJ);
4004: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4005: return(0);
4006: }
4010: /*@C
4011: MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4012: (the default parallel PETSc format). For good matrix assembly performance
4013: the user should preallocate the matrix storage by setting the parameters
4014: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4015: performance can be increased by more than a factor of 50.
4017: Collective on MPI_Comm
4019: Input Parameters:
4020: + comm - MPI communicator
4021: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4022: This value should be the same as the local size used in creating the
4023: y vector for the matrix-vector product y = Ax.
4024: . n - This value should be the same as the local size used in creating the
4025: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4026: calculated if N is given) For square matrices n is almost always m.
4027: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4028: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4029: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4030: (same value is used for all local rows)
4031: . d_nnz - array containing the number of nonzeros in the various rows of the
4032: DIAGONAL portion of the local submatrix (possibly different for each row)
4033: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4034: The size of this array is equal to the number of local rows, i.e 'm'.
4035: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4036: submatrix (same value is used for all local rows).
4037: - o_nnz - array containing the number of nonzeros in the various rows of the
4038: OFF-DIAGONAL portion of the local submatrix (possibly different for
4039: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4040: structure. The size of this array is equal to the number
4041: of local rows, i.e 'm'.
4043: Output Parameter:
4044: . A - the matrix
4046: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4047: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4048: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4050: Notes:
4051: If the *_nnz parameter is given then the *_nz parameter is ignored
4053: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4054: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4055: storage requirements for this matrix.
4057: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4058: processor than it must be used on all processors that share the object for
4059: that argument.
4061: The user MUST specify either the local or global matrix dimensions
4062: (possibly both).
4064: The parallel matrix is partitioned across processors such that the
4065: first m0 rows belong to process 0, the next m1 rows belong to
4066: process 1, the next m2 rows belong to process 2 etc.. where
4067: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4068: values corresponding to [m x N] submatrix.
4070: The columns are logically partitioned with the n0 columns belonging
4071: to 0th partition, the next n1 columns belonging to the next
4072: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4074: The DIAGONAL portion of the local submatrix on any given processor
4075: is the submatrix corresponding to the rows and columns m,n
4076: corresponding to the given processor. i.e diagonal matrix on
4077: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4078: etc. The remaining portion of the local submatrix [m x (N-n)]
4079: constitute the OFF-DIAGONAL portion. The example below better
4080: illustrates this concept.
4082: For a square global matrix we define each processor's diagonal portion
4083: to be its local rows and the corresponding columns (a square submatrix);
4084: each processor's off-diagonal portion encompasses the remainder of the
4085: local matrix (a rectangular submatrix).
4087: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4089: When calling this routine with a single process communicator, a matrix of
4090: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4091: type of communicator, use the construction mechanism:
4092: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4093:
4094: By default, this format uses inodes (identical nodes) when possible.
4095: We search for consecutive rows with the same nonzero structure, thereby
4096: reusing matrix information to achieve increased efficiency.
4098: Options Database Keys:
4099: + -mat_no_inode - Do not use inodes
4100: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4101: - -mat_aij_oneindex - Internally use indexing starting at 1
4102: rather than 0. Note that when calling MatSetValues(),
4103: the user still MUST index entries starting at 0!
4106: Example usage:
4107:
4108: Consider the following 8x8 matrix with 34 non-zero values, that is
4109: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4110: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4111: as follows:
4113: .vb
4114: 1 2 0 | 0 3 0 | 0 4
4115: Proc0 0 5 6 | 7 0 0 | 8 0
4116: 9 0 10 | 11 0 0 | 12 0
4117: -------------------------------------
4118: 13 0 14 | 15 16 17 | 0 0
4119: Proc1 0 18 0 | 19 20 21 | 0 0
4120: 0 0 0 | 22 23 0 | 24 0
4121: -------------------------------------
4122: Proc2 25 26 27 | 0 0 28 | 29 0
4123: 30 0 0 | 31 32 33 | 0 34
4124: .ve
4126: This can be represented as a collection of submatrices as:
4128: .vb
4129: A B C
4130: D E F
4131: G H I
4132: .ve
4134: Where the submatrices A,B,C are owned by proc0, D,E,F are
4135: owned by proc1, G,H,I are owned by proc2.
4137: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4138: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4139: The 'M','N' parameters are 8,8, and have the same values on all procs.
4141: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4142: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4143: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4144: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4145: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4146: matrix, ans [DF] as another SeqAIJ matrix.
4148: When d_nz, o_nz parameters are specified, d_nz storage elements are
4149: allocated for every row of the local diagonal submatrix, and o_nz
4150: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4151: One way to choose d_nz and o_nz is to use the max nonzerors per local
4152: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4153: In this case, the values of d_nz,o_nz are:
4154: .vb
4155: proc0 : dnz = 2, o_nz = 2
4156: proc1 : dnz = 3, o_nz = 2
4157: proc2 : dnz = 1, o_nz = 4
4158: .ve
4159: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4160: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4161: for proc3. i.e we are using 12+15+10=37 storage locations to store
4162: 34 values.
4164: When d_nnz, o_nnz parameters are specified, the storage is specified
4165: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4166: In the above case the values for d_nnz,o_nnz are:
4167: .vb
4168: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4169: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4170: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4171: .ve
4172: Here the space allocated is sum of all the above values i.e 34, and
4173: hence pre-allocation is perfect.
4175: Level: intermediate
4177: .keywords: matrix, aij, compressed row, sparse, parallel
4179: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4180: MPIAIJ, MatCreateMPIAIJWithArrays()
4181: @*/
4182: PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4183: {
4185: PetscMPIInt size;
4188: MatCreate(comm,A);
4189: MatSetSizes(*A,m,n,M,N);
4190: MPI_Comm_size(comm,&size);
4191: if (size > 1) {
4192: MatSetType(*A,MATMPIAIJ);
4193: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4194: } else {
4195: MatSetType(*A,MATSEQAIJ);
4196: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4197: }
4198: return(0);
4199: }
4203: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4204: {
4205: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4208: *Ad = a->A;
4209: *Ao = a->B;
4210: *colmap = a->garray;
4211: return(0);
4212: }
4216: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4217: {
4219: PetscInt i;
4220: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4223: if (coloring->ctype == IS_COLORING_GLOBAL) {
4224: ISColoringValue *allcolors,*colors;
4225: ISColoring ocoloring;
4227: /* set coloring for diagonal portion */
4228: MatSetColoring_SeqAIJ(a->A,coloring);
4230: /* set coloring for off-diagonal portion */
4231: ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
4232: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4233: for (i=0; i<a->B->cmap->n; i++) {
4234: colors[i] = allcolors[a->garray[i]];
4235: }
4236: PetscFree(allcolors);
4237: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4238: MatSetColoring_SeqAIJ(a->B,ocoloring);
4239: ISColoringDestroy(&ocoloring);
4240: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4241: ISColoringValue *colors;
4242: PetscInt *larray;
4243: ISColoring ocoloring;
4245: /* set coloring for diagonal portion */
4246: PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);
4247: for (i=0; i<a->A->cmap->n; i++) {
4248: larray[i] = i + A->cmap->rstart;
4249: }
4250: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);
4251: PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);
4252: for (i=0; i<a->A->cmap->n; i++) {
4253: colors[i] = coloring->colors[larray[i]];
4254: }
4255: PetscFree(larray);
4256: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4257: MatSetColoring_SeqAIJ(a->A,ocoloring);
4258: ISColoringDestroy(&ocoloring);
4260: /* set coloring for off-diagonal portion */
4261: PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);
4262: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);
4263: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4264: for (i=0; i<a->B->cmap->n; i++) {
4265: colors[i] = coloring->colors[larray[i]];
4266: }
4267: PetscFree(larray);
4268: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4269: MatSetColoring_SeqAIJ(a->B,ocoloring);
4270: ISColoringDestroy(&ocoloring);
4271: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4273: return(0);
4274: }
4276: #if defined(PETSC_HAVE_ADIC)
4279: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4280: {
4281: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4285: MatSetValuesAdic_SeqAIJ(a->A,advalues);
4286: MatSetValuesAdic_SeqAIJ(a->B,advalues);
4287: return(0);
4288: }
4289: #endif
4293: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4294: {
4295: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4299: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4300: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4301: return(0);
4302: }
4306: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4307: {
4309: PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4310: PetscInt *indx;
4313: /* This routine will ONLY return MPIAIJ type matrix */
4314: MatGetSize(inmat,&m,&N);
4315: MatGetBlockSizes(inmat,&bs,&cbs);
4316: if (n == PETSC_DECIDE){
4317: PetscSplitOwnership(comm,&n,&N);
4318: }
4319: /* Check sum(n) = N */
4320: MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4321: if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4322:
4323: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4324: rstart -= m;
4326: MatPreallocateInitialize(comm,m,n,dnz,onz);
4327: for (i=0;i<m;i++) {
4328: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4329: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4330: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4331: }
4332:
4333: MatCreate(comm,outmat);
4334: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4335: MatSetBlockSizes(*outmat,bs,cbs);
4336: MatSetType(*outmat,MATMPIAIJ);
4337: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4338: MatPreallocateFinalize(dnz,onz);
4339: return(0);
4340: }
4344: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4345: {
4347: PetscInt m,N,i,rstart,nnz,Ii;
4348: PetscInt *indx;
4349: PetscScalar *values;
4352: MatGetSize(inmat,&m,&N);
4353: MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);
4354: for (i=0;i<m;i++) {
4355: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4356: Ii = i + rstart;
4357: MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4358: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4359: }
4360: MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);
4361: MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);
4362: return(0);
4363: }
4367: /*@
4368: MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4369: matrices from each processor
4371: Collective on MPI_Comm
4373: Input Parameters:
4374: + comm - the communicators the parallel matrix will live on
4375: . inmat - the input sequential matrices
4376: . n - number of local columns (or PETSC_DECIDE)
4377: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4379: Output Parameter:
4380: . outmat - the parallel matrix generated
4382: Level: advanced
4384: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4386: @*/
4387: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4388: {
4392: PetscLogEventBegin(MAT_Merge,inmat,0,0,0);
4393: if (scall == MAT_INITIAL_MATRIX){
4394: MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);
4395: }
4396: MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);
4397: PetscLogEventEnd(MAT_Merge,inmat,0,0,0);
4398: return(0);
4399: }
4403: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4404: {
4405: PetscErrorCode ierr;
4406: PetscMPIInt rank;
4407: PetscInt m,N,i,rstart,nnz;
4408: size_t len;
4409: const PetscInt *indx;
4410: PetscViewer out;
4411: char *name;
4412: Mat B;
4413: const PetscScalar *values;
4416: MatGetLocalSize(A,&m,0);
4417: MatGetSize(A,0,&N);
4418: /* Should this be the type of the diagonal block of A? */
4419: MatCreate(PETSC_COMM_SELF,&B);
4420: MatSetSizes(B,m,N,m,N);
4421: MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
4422: MatSetType(B,MATSEQAIJ);
4423: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
4424: MatGetOwnershipRange(A,&rstart,0);
4425: for (i=0;i<m;i++) {
4426: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4427: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4428: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4429: }
4430: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4431: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4433: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
4434: PetscStrlen(outfile,&len);
4435: PetscMalloc((len+5)*sizeof(char),&name);
4436: sprintf(name,"%s.%d",outfile,rank);
4437: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4438: PetscFree(name);
4439: MatView(B,out);
4440: PetscViewerDestroy(&out);
4441: MatDestroy(&B);
4442: return(0);
4443: }
4445: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4448: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4449: {
4450: PetscErrorCode ierr;
4451: Mat_Merge_SeqsToMPI *merge;
4452: PetscContainer container;
4455: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
4456: if (container) {
4457: PetscContainerGetPointer(container,(void **)&merge);
4458: PetscFree(merge->id_r);
4459: PetscFree(merge->len_s);
4460: PetscFree(merge->len_r);
4461: PetscFree(merge->bi);
4462: PetscFree(merge->bj);
4463: PetscFree(merge->buf_ri[0]);
4464: PetscFree(merge->buf_ri);
4465: PetscFree(merge->buf_rj[0]);
4466: PetscFree(merge->buf_rj);
4467: PetscFree(merge->coi);
4468: PetscFree(merge->coj);
4469: PetscFree(merge->owners_co);
4470: PetscLayoutDestroy(&merge->rowmap);
4471: PetscFree(merge);
4472: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4473: }
4474: MatDestroy_MPIAIJ(A);
4475: return(0);
4476: }
4478: #include <../src/mat/utils/freespace.h>
4479: #include <petscbt.h>
4483: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4484: {
4485: PetscErrorCode ierr;
4486: MPI_Comm comm=((PetscObject)mpimat)->comm;
4487: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4488: PetscMPIInt size,rank,taga,*len_s;
4489: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4490: PetscInt proc,m;
4491: PetscInt **buf_ri,**buf_rj;
4492: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4493: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4494: MPI_Request *s_waits,*r_waits;
4495: MPI_Status *status;
4496: MatScalar *aa=a->a;
4497: MatScalar **abuf_r,*ba_i;
4498: Mat_Merge_SeqsToMPI *merge;
4499: PetscContainer container;
4502: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4504: MPI_Comm_size(comm,&size);
4505: MPI_Comm_rank(comm,&rank);
4507: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
4508: PetscContainerGetPointer(container,(void **)&merge);
4510: bi = merge->bi;
4511: bj = merge->bj;
4512: buf_ri = merge->buf_ri;
4513: buf_rj = merge->buf_rj;
4515: PetscMalloc(size*sizeof(MPI_Status),&status);
4516: owners = merge->rowmap->range;
4517: len_s = merge->len_s;
4519: /* send and recv matrix values */
4520: /*-----------------------------*/
4521: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4522: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4524: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
4525: for (proc=0,k=0; proc<size; proc++){
4526: if (!len_s[proc]) continue;
4527: i = owners[proc];
4528: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4529: k++;
4530: }
4532: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4533: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4534: PetscFree(status);
4536: PetscFree(s_waits);
4537: PetscFree(r_waits);
4539: /* insert mat values of mpimat */
4540: /*----------------------------*/
4541: PetscMalloc(N*sizeof(PetscScalar),&ba_i);
4542: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4544: for (k=0; k<merge->nrecv; k++){
4545: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4546: nrows = *(buf_ri_k[k]);
4547: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4548: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4549: }
4551: /* set values of ba */
4552: m = merge->rowmap->n;
4553: for (i=0; i<m; i++) {
4554: arow = owners[rank] + i;
4555: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4556: bnzi = bi[i+1] - bi[i];
4557: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4559: /* add local non-zero vals of this proc's seqmat into ba */
4560: anzi = ai[arow+1] - ai[arow];
4561: aj = a->j + ai[arow];
4562: aa = a->a + ai[arow];
4563: nextaj = 0;
4564: for (j=0; nextaj<anzi; j++){
4565: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4566: ba_i[j] += aa[nextaj++];
4567: }
4568: }
4570: /* add received vals into ba */
4571: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4572: /* i-th row */
4573: if (i == *nextrow[k]) {
4574: anzi = *(nextai[k]+1) - *nextai[k];
4575: aj = buf_rj[k] + *(nextai[k]);
4576: aa = abuf_r[k] + *(nextai[k]);
4577: nextaj = 0;
4578: for (j=0; nextaj<anzi; j++){
4579: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4580: ba_i[j] += aa[nextaj++];
4581: }
4582: }
4583: nextrow[k]++; nextai[k]++;
4584: }
4585: }
4586: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4587: }
4588: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4589: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4591: PetscFree(abuf_r[0]);
4592: PetscFree(abuf_r);
4593: PetscFree(ba_i);
4594: PetscFree3(buf_ri_k,nextrow,nextai);
4595: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4596: return(0);
4597: }
4599: extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4603: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4604: {
4605: PetscErrorCode ierr;
4606: Mat B_mpi;
4607: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4608: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4609: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4610: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4611: PetscInt len,proc,*dnz,*onz,bs,cbs;
4612: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4613: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4614: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4615: MPI_Status *status;
4616: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
4617: PetscBT lnkbt;
4618: Mat_Merge_SeqsToMPI *merge;
4619: PetscContainer container;
4622: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4624: /* make sure it is a PETSc comm */
4625: PetscCommDuplicate(comm,&comm,PETSC_NULL);
4626: MPI_Comm_size(comm,&size);
4627: MPI_Comm_rank(comm,&rank);
4629: PetscNew(Mat_Merge_SeqsToMPI,&merge);
4630: PetscMalloc(size*sizeof(MPI_Status),&status);
4632: /* determine row ownership */
4633: /*---------------------------------------------------------*/
4634: PetscLayoutCreate(comm,&merge->rowmap);
4635: PetscLayoutSetLocalSize(merge->rowmap,m);
4636: PetscLayoutSetSize(merge->rowmap,M);
4637: PetscLayoutSetBlockSize(merge->rowmap,1);
4638: PetscLayoutSetUp(merge->rowmap);
4639: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
4640: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
4642: m = merge->rowmap->n;
4643: M = merge->rowmap->N;
4644: owners = merge->rowmap->range;
4646: /* determine the number of messages to send, their lengths */
4647: /*---------------------------------------------------------*/
4648: len_s = merge->len_s;
4650: len = 0; /* length of buf_si[] */
4651: merge->nsend = 0;
4652: for (proc=0; proc<size; proc++){
4653: len_si[proc] = 0;
4654: if (proc == rank){
4655: len_s[proc] = 0;
4656: } else {
4657: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4658: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4659: }
4660: if (len_s[proc]) {
4661: merge->nsend++;
4662: nrows = 0;
4663: for (i=owners[proc]; i<owners[proc+1]; i++){
4664: if (ai[i+1] > ai[i]) nrows++;
4665: }
4666: len_si[proc] = 2*(nrows+1);
4667: len += len_si[proc];
4668: }
4669: }
4671: /* determine the number and length of messages to receive for ij-structure */
4672: /*-------------------------------------------------------------------------*/
4673: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
4674: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4676: /* post the Irecv of j-structure */
4677: /*-------------------------------*/
4678: PetscCommGetNewTag(comm,&tagj);
4679: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4681: /* post the Isend of j-structure */
4682: /*--------------------------------*/
4683: PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);
4685: for (proc=0, k=0; proc<size; proc++){
4686: if (!len_s[proc]) continue;
4687: i = owners[proc];
4688: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4689: k++;
4690: }
4692: /* receives and sends of j-structure are complete */
4693: /*------------------------------------------------*/
4694: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4695: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4697: /* send and recv i-structure */
4698: /*---------------------------*/
4699: PetscCommGetNewTag(comm,&tagi);
4700: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4702: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
4703: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4704: for (proc=0,k=0; proc<size; proc++){
4705: if (!len_s[proc]) continue;
4706: /* form outgoing message for i-structure:
4707: buf_si[0]: nrows to be sent
4708: [1:nrows]: row index (global)
4709: [nrows+1:2*nrows+1]: i-structure index
4710: */
4711: /*-------------------------------------------*/
4712: nrows = len_si[proc]/2 - 1;
4713: buf_si_i = buf_si + nrows+1;
4714: buf_si[0] = nrows;
4715: buf_si_i[0] = 0;
4716: nrows = 0;
4717: for (i=owners[proc]; i<owners[proc+1]; i++){
4718: anzi = ai[i+1] - ai[i];
4719: if (anzi) {
4720: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4721: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4722: nrows++;
4723: }
4724: }
4725: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4726: k++;
4727: buf_si += len_si[proc];
4728: }
4730: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4731: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4733: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4734: for (i=0; i<merge->nrecv; i++){
4735: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4736: }
4738: PetscFree(len_si);
4739: PetscFree(len_ri);
4740: PetscFree(rj_waits);
4741: PetscFree2(si_waits,sj_waits);
4742: PetscFree(ri_waits);
4743: PetscFree(buf_s);
4744: PetscFree(status);
4746: /* compute a local seq matrix in each processor */
4747: /*----------------------------------------------*/
4748: /* allocate bi array and free space for accumulating nonzero column info */
4749: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
4750: bi[0] = 0;
4752: /* create and initialize a linked list */
4753: nlnk = N+1;
4754: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4756: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4757: len = 0;
4758: len = ai[owners[rank+1]] - ai[owners[rank]];
4759: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4760: current_space = free_space;
4762: /* determine symbolic info for each local row */
4763: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4765: for (k=0; k<merge->nrecv; k++){
4766: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4767: nrows = *buf_ri_k[k];
4768: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4769: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4770: }
4772: MatPreallocateInitialize(comm,m,n,dnz,onz);
4773: len = 0;
4774: for (i=0;i<m;i++) {
4775: bnzi = 0;
4776: /* add local non-zero cols of this proc's seqmat into lnk */
4777: arow = owners[rank] + i;
4778: anzi = ai[arow+1] - ai[arow];
4779: aj = a->j + ai[arow];
4780: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4781: bnzi += nlnk;
4782: /* add received col data into lnk */
4783: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4784: if (i == *nextrow[k]) { /* i-th row */
4785: anzi = *(nextai[k]+1) - *nextai[k];
4786: aj = buf_rj[k] + *nextai[k];
4787: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4788: bnzi += nlnk;
4789: nextrow[k]++; nextai[k]++;
4790: }
4791: }
4792: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4794: /* if free space is not available, make more free space */
4795: if (current_space->local_remaining<bnzi) {
4796: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4797: nspacedouble++;
4798: }
4799: /* copy data into free space, then initialize lnk */
4800: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4801: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4803: current_space->array += bnzi;
4804: current_space->local_used += bnzi;
4805: current_space->local_remaining -= bnzi;
4807: bi[i+1] = bi[i] + bnzi;
4808: }
4810: PetscFree3(buf_ri_k,nextrow,nextai);
4812: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
4813: PetscFreeSpaceContiguous(&free_space,bj);
4814: PetscLLDestroy(lnk,lnkbt);
4816: /* create symbolic parallel matrix B_mpi */
4817: /*---------------------------------------*/
4818: MatGetBlockSizes(seqmat,&bs,&cbs);
4819: MatCreate(comm,&B_mpi);
4820: if (n==PETSC_DECIDE) {
4821: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4822: } else {
4823: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4824: }
4825: MatSetBlockSizes(B_mpi,bs,cbs);
4826: MatSetType(B_mpi,MATMPIAIJ);
4827: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4828: MatPreallocateFinalize(dnz,onz);
4829: MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
4831: /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4832: B_mpi->assembled = PETSC_FALSE;
4833: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4834: merge->bi = bi;
4835: merge->bj = bj;
4836: merge->buf_ri = buf_ri;
4837: merge->buf_rj = buf_rj;
4838: merge->coi = PETSC_NULL;
4839: merge->coj = PETSC_NULL;
4840: merge->owners_co = PETSC_NULL;
4842: PetscCommDestroy(&comm);
4844: /* attach the supporting struct to B_mpi for reuse */
4845: PetscContainerCreate(PETSC_COMM_SELF,&container);
4846: PetscContainerSetPointer(container,merge);
4847: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4848: PetscContainerDestroy(&container);
4849: *mpimat = B_mpi;
4851: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4852: return(0);
4853: }
4857: /*@C
4858: MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4859: matrices from each processor
4861: Collective on MPI_Comm
4863: Input Parameters:
4864: + comm - the communicators the parallel matrix will live on
4865: . seqmat - the input sequential matrices
4866: . m - number of local rows (or PETSC_DECIDE)
4867: . n - number of local columns (or PETSC_DECIDE)
4868: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4870: Output Parameter:
4871: . mpimat - the parallel matrix generated
4873: Level: advanced
4875: Notes:
4876: The dimensions of the sequential matrix in each processor MUST be the same.
4877: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4878: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4879: @*/
4880: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4881: {
4882: PetscErrorCode ierr;
4883: PetscMPIInt size;
4886: MPI_Comm_size(comm,&size);
4887: if (size == 1){
4888: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4889: if (scall == MAT_INITIAL_MATRIX){
4890: MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
4891: } else {
4892: MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
4893: }
4894: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4895: return(0);
4896: }
4897: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4898: if (scall == MAT_INITIAL_MATRIX){
4899: MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
4900: }
4901: MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
4902: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4903: return(0);
4904: }
4908: /*@
4909: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4910: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4911: with MatGetSize()
4913: Not Collective
4915: Input Parameters:
4916: + A - the matrix
4917: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4919: Output Parameter:
4920: . A_loc - the local sequential matrix generated
4922: Level: developer
4924: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4926: @*/
4927: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4928: {
4929: PetscErrorCode ierr;
4930: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4931: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4932: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4933: MatScalar *aa=a->a,*ba=b->a,*cam;
4934: PetscScalar *ca;
4935: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4936: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4937: PetscBool match;
4940: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4941: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4942: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4943: if (scall == MAT_INITIAL_MATRIX){
4944: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
4945: ci[0] = 0;
4946: for (i=0; i<am; i++){
4947: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4948: }
4949: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
4950: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
4951: k = 0;
4952: for (i=0; i<am; i++) {
4953: ncols_o = bi[i+1] - bi[i];
4954: ncols_d = ai[i+1] - ai[i];
4955: /* off-diagonal portion of A */
4956: for (jo=0; jo<ncols_o; jo++) {
4957: col = cmap[*bj];
4958: if (col >= cstart) break;
4959: cj[k] = col; bj++;
4960: ca[k++] = *ba++;
4961: }
4962: /* diagonal portion of A */
4963: for (j=0; j<ncols_d; j++) {
4964: cj[k] = cstart + *aj++;
4965: ca[k++] = *aa++;
4966: }
4967: /* off-diagonal portion of A */
4968: for (j=jo; j<ncols_o; j++) {
4969: cj[k] = cmap[*bj++];
4970: ca[k++] = *ba++;
4971: }
4972: }
4973: /* put together the new matrix */
4974: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4975: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4976: /* Since these are PETSc arrays, change flags to free them as necessary. */
4977: mat = (Mat_SeqAIJ*)(*A_loc)->data;
4978: mat->free_a = PETSC_TRUE;
4979: mat->free_ij = PETSC_TRUE;
4980: mat->nonew = 0;
4981: } else if (scall == MAT_REUSE_MATRIX){
4982: mat=(Mat_SeqAIJ*)(*A_loc)->data;
4983: ci = mat->i; cj = mat->j; cam = mat->a;
4984: for (i=0; i<am; i++) {
4985: /* off-diagonal portion of A */
4986: ncols_o = bi[i+1] - bi[i];
4987: for (jo=0; jo<ncols_o; jo++) {
4988: col = cmap[*bj];
4989: if (col >= cstart) break;
4990: *cam++ = *ba++; bj++;
4991: }
4992: /* diagonal portion of A */
4993: ncols_d = ai[i+1] - ai[i];
4994: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4995: /* off-diagonal portion of A */
4996: for (j=jo; j<ncols_o; j++) {
4997: *cam++ = *ba++; bj++;
4998: }
4999: }
5000: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5001: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
5002: return(0);
5003: }
5007: /*@C
5008: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5010: Not Collective
5012: Input Parameters:
5013: + A - the matrix
5014: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5015: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
5017: Output Parameter:
5018: . A_loc - the local sequential matrix generated
5020: Level: developer
5022: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5024: @*/
5025: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5026: {
5027: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5028: PetscErrorCode ierr;
5029: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5030: IS isrowa,iscola;
5031: Mat *aloc;
5032: PetscBool match;
5035: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5036: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5037: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5038: if (!row){
5039: start = A->rmap->rstart; end = A->rmap->rend;
5040: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5041: } else {
5042: isrowa = *row;
5043: }
5044: if (!col){
5045: start = A->cmap->rstart;
5046: cmap = a->garray;
5047: nzA = a->A->cmap->n;
5048: nzB = a->B->cmap->n;
5049: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5050: ncols = 0;
5051: for (i=0; i<nzB; i++) {
5052: if (cmap[i] < start) idx[ncols++] = cmap[i];
5053: else break;
5054: }
5055: imark = i;
5056: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5057: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5058: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5059: } else {
5060: iscola = *col;
5061: }
5062: if (scall != MAT_INITIAL_MATRIX){
5063: PetscMalloc(sizeof(Mat),&aloc);
5064: aloc[0] = *A_loc;
5065: }
5066: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5067: *A_loc = aloc[0];
5068: PetscFree(aloc);
5069: if (!row){
5070: ISDestroy(&isrowa);
5071: }
5072: if (!col){
5073: ISDestroy(&iscola);
5074: }
5075: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5076: return(0);
5077: }
5081: /*@C
5082: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5084: Collective on Mat
5086: Input Parameters:
5087: + A,B - the matrices in mpiaij format
5088: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5089: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5091: Output Parameter:
5092: + rowb, colb - index sets of rows and columns of B to extract
5093: - B_seq - the sequential matrix generated
5095: Level: developer
5097: @*/
5098: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5099: {
5100: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5101: PetscErrorCode ierr;
5102: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5103: IS isrowb,iscolb;
5104: Mat *bseq=PETSC_NULL;
5105:
5107: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5108: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5109: }
5110: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5111:
5112: if (scall == MAT_INITIAL_MATRIX){
5113: start = A->cmap->rstart;
5114: cmap = a->garray;
5115: nzA = a->A->cmap->n;
5116: nzB = a->B->cmap->n;
5117: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5118: ncols = 0;
5119: for (i=0; i<nzB; i++) { /* row < local row index */
5120: if (cmap[i] < start) idx[ncols++] = cmap[i];
5121: else break;
5122: }
5123: imark = i;
5124: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5125: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5126: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5127: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5128: } else {
5129: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5130: isrowb = *rowb; iscolb = *colb;
5131: PetscMalloc(sizeof(Mat),&bseq);
5132: bseq[0] = *B_seq;
5133: }
5134: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5135: *B_seq = bseq[0];
5136: PetscFree(bseq);
5137: if (!rowb){
5138: ISDestroy(&isrowb);
5139: } else {
5140: *rowb = isrowb;
5141: }
5142: if (!colb){
5143: ISDestroy(&iscolb);
5144: } else {
5145: *colb = iscolb;
5146: }
5147: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5148: return(0);
5149: }
5153: /*
5154: MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5155: of the OFF-DIAGONAL portion of local A
5157: Collective on Mat
5159: Input Parameters:
5160: + A,B - the matrices in mpiaij format
5161: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5163: Output Parameter:
5164: + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5165: . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5166: . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5167: - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5169: Level: developer
5171: */
5172: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5173: {
5174: VecScatter_MPI_General *gen_to,*gen_from;
5175: PetscErrorCode ierr;
5176: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5177: Mat_SeqAIJ *b_oth;
5178: VecScatter ctx=a->Mvctx;
5179: MPI_Comm comm=((PetscObject)ctx)->comm;
5180: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5181: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5182: PetscScalar *rvalues,*svalues;
5183: MatScalar *b_otha,*bufa,*bufA;
5184: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5185: MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5186: MPI_Status *sstatus,rstatus;
5187: PetscMPIInt jj;
5188: PetscInt *cols,sbs,rbs;
5189: PetscScalar *vals;
5192: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5193: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5194: }
5195: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5196: MPI_Comm_rank(comm,&rank);
5198: gen_to = (VecScatter_MPI_General*)ctx->todata;
5199: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5200: rvalues = gen_from->values; /* holds the length of receiving row */
5201: svalues = gen_to->values; /* holds the length of sending row */
5202: nrecvs = gen_from->n;
5203: nsends = gen_to->n;
5205: PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);
5206: srow = gen_to->indices; /* local row index to be sent */
5207: sstarts = gen_to->starts;
5208: sprocs = gen_to->procs;
5209: sstatus = gen_to->sstatus;
5210: sbs = gen_to->bs;
5211: rstarts = gen_from->starts;
5212: rprocs = gen_from->procs;
5213: rbs = gen_from->bs;
5215: if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5216: if (scall == MAT_INITIAL_MATRIX){
5217: /* i-array */
5218: /*---------*/
5219: /* post receives */
5220: for (i=0; i<nrecvs; i++){
5221: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5222: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5223: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5224: }
5226: /* pack the outgoing message */
5227: PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);
5228: sstartsj[0] = 0; rstartsj[0] = 0;
5229: len = 0; /* total length of j or a array to be sent */
5230: k = 0;
5231: for (i=0; i<nsends; i++){
5232: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5233: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5234: for (j=0; j<nrows; j++) {
5235: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5236: for (l=0; l<sbs; l++){
5237: MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL); /* rowlength */
5238: rowlen[j*sbs+l] = ncols;
5239: len += ncols;
5240: MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);
5241: }
5242: k++;
5243: }
5244: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5245: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5246: }
5247: /* recvs and sends of i-array are completed */
5248: i = nrecvs;
5249: while (i--) {
5250: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5251: }
5252: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5254: /* allocate buffers for sending j and a arrays */
5255: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
5256: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
5258: /* create i-array of B_oth */
5259: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
5260: b_othi[0] = 0;
5261: len = 0; /* total length of j or a array to be received */
5262: k = 0;
5263: for (i=0; i<nrecvs; i++){
5264: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5265: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5266: for (j=0; j<nrows; j++) {
5267: b_othi[k+1] = b_othi[k] + rowlen[j];
5268: len += rowlen[j]; k++;
5269: }
5270: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5271: }
5273: /* allocate space for j and a arrrays of B_oth */
5274: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
5275: PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);
5277: /* j-array */
5278: /*---------*/
5279: /* post receives of j-array */
5280: for (i=0; i<nrecvs; i++){
5281: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5282: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5283: }
5285: /* pack the outgoing message j-array */
5286: k = 0;
5287: for (i=0; i<nsends; i++){
5288: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5289: bufJ = bufj+sstartsj[i];
5290: for (j=0; j<nrows; j++) {
5291: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5292: for (ll=0; ll<sbs; ll++){
5293: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5294: for (l=0; l<ncols; l++){
5295: *bufJ++ = cols[l];
5296: }
5297: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5298: }
5299: }
5300: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5301: }
5303: /* recvs and sends of j-array are completed */
5304: i = nrecvs;
5305: while (i--) {
5306: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5307: }
5308: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5309: } else if (scall == MAT_REUSE_MATRIX){
5310: sstartsj = *startsj_s;
5311: rstartsj = *startsj_r;
5312: bufa = *bufa_ptr;
5313: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5314: b_otha = b_oth->a;
5315: } else {
5316: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5317: }
5319: /* a-array */
5320: /*---------*/
5321: /* post receives of a-array */
5322: for (i=0; i<nrecvs; i++){
5323: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5324: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5325: }
5327: /* pack the outgoing message a-array */
5328: k = 0;
5329: for (i=0; i<nsends; i++){
5330: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5331: bufA = bufa+sstartsj[i];
5332: for (j=0; j<nrows; j++) {
5333: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5334: for (ll=0; ll<sbs; ll++){
5335: MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5336: for (l=0; l<ncols; l++){
5337: *bufA++ = vals[l];
5338: }
5339: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5340: }
5341: }
5342: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5343: }
5344: /* recvs and sends of a-array are completed */
5345: i = nrecvs;
5346: while (i--) {
5347: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5348: }
5349: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5350: PetscFree2(rwaits,swaits);
5352: if (scall == MAT_INITIAL_MATRIX){
5353: /* put together the new matrix */
5354: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5356: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5357: /* Since these are PETSc arrays, change flags to free them as necessary. */
5358: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
5359: b_oth->free_a = PETSC_TRUE;
5360: b_oth->free_ij = PETSC_TRUE;
5361: b_oth->nonew = 0;
5363: PetscFree(bufj);
5364: if (!startsj_s || !bufa_ptr){
5365: PetscFree2(sstartsj,rstartsj);
5366: PetscFree(bufa_ptr);
5367: } else {
5368: *startsj_s = sstartsj;
5369: *startsj_r = rstartsj;
5370: *bufa_ptr = bufa;
5371: }
5372: }
5373: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5374: return(0);
5375: }
5379: /*@C
5380: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5382: Not Collective
5384: Input Parameters:
5385: . A - The matrix in mpiaij format
5387: Output Parameter:
5388: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5389: . colmap - A map from global column index to local index into lvec
5390: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5392: Level: developer
5394: @*/
5395: #if defined (PETSC_USE_CTABLE)
5396: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5397: #else
5398: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5399: #endif
5400: {
5401: Mat_MPIAIJ *a;
5408: a = (Mat_MPIAIJ *) A->data;
5409: if (lvec) *lvec = a->lvec;
5410: if (colmap) *colmap = a->colmap;
5411: if (multScatter) *multScatter = a->Mvctx;
5412: return(0);
5413: }
5415: EXTERN_C_BEGIN
5416: extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5417: extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5418: extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5419: EXTERN_C_END
5423: /*
5424: Computes (B'*A')' since computing B*A directly is untenable
5426: n p p
5427: ( ) ( ) ( )
5428: m ( A ) * n ( B ) = m ( C )
5429: ( ) ( ) ( )
5431: */
5432: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5433: {
5434: PetscErrorCode ierr;
5435: Mat At,Bt,Ct;
5438: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5439: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5440: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5441: MatDestroy(&At);
5442: MatDestroy(&Bt);
5443: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5444: MatDestroy(&Ct);
5445: return(0);
5446: }
5450: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5451: {
5453: PetscInt m=A->rmap->n,n=B->cmap->n;
5454: Mat Cmat;
5457: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5458: MatCreate(((PetscObject)A)->comm,&Cmat);
5459: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5460: MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);
5461: MatSetType(Cmat,MATMPIDENSE);
5462: MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);
5463: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5464: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5465: *C = Cmat;
5466: (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5467: return(0);
5468: }
5470: /* ----------------------------------------------------------------*/
5473: PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5474: {
5478: if (scall == MAT_INITIAL_MATRIX){
5479: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5480: }
5481: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5482: return(0);
5483: }
5485: EXTERN_C_BEGIN
5486: #if defined(PETSC_HAVE_MUMPS)
5487: extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5488: #endif
5489: #if defined(PETSC_HAVE_PASTIX)
5490: extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5491: #endif
5492: #if defined(PETSC_HAVE_SUPERLU_DIST)
5493: extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5494: #endif
5495: #if defined(PETSC_HAVE_SPOOLES)
5496: extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5497: #endif
5498: EXTERN_C_END
5500: /*MC
5501: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5503: Options Database Keys:
5504: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5506: Level: beginner
5508: .seealso: MatCreateAIJ()
5509: M*/
5511: EXTERN_C_BEGIN
5514: PetscErrorCode MatCreate_MPIAIJ(Mat B)
5515: {
5516: Mat_MPIAIJ *b;
5518: PetscMPIInt size;
5521: MPI_Comm_size(((PetscObject)B)->comm,&size);
5523: PetscNewLog(B,Mat_MPIAIJ,&b);
5524: B->data = (void*)b;
5525: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5526: B->assembled = PETSC_FALSE;
5528: B->insertmode = NOT_SET_VALUES;
5529: b->size = size;
5530: MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
5532: /* build cache for off array entries formed */
5533: MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
5534: b->donotstash = PETSC_FALSE;
5535: b->colmap = 0;
5536: b->garray = 0;
5537: b->roworiented = PETSC_TRUE;
5539: /* stuff used for matrix vector multiply */
5540: b->lvec = PETSC_NULL;
5541: b->Mvctx = PETSC_NULL;
5543: /* stuff for MatGetRow() */
5544: b->rowindices = 0;
5545: b->rowvalues = 0;
5546: b->getrowactive = PETSC_FALSE;
5548: #if defined(PETSC_HAVE_SPOOLES)
5549: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5550: "MatGetFactor_mpiaij_spooles",
5551: MatGetFactor_mpiaij_spooles);
5552: #endif
5553: #if defined(PETSC_HAVE_MUMPS)
5554: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5555: "MatGetFactor_aij_mumps",
5556: MatGetFactor_aij_mumps);
5557: #endif
5558: #if defined(PETSC_HAVE_PASTIX)
5559: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5560: "MatGetFactor_mpiaij_pastix",
5561: MatGetFactor_mpiaij_pastix);
5562: #endif
5563: #if defined(PETSC_HAVE_SUPERLU_DIST)
5564: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5565: "MatGetFactor_mpiaij_superlu_dist",
5566: MatGetFactor_mpiaij_superlu_dist);
5567: #endif
5568: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5569: "MatStoreValues_MPIAIJ",
5570: MatStoreValues_MPIAIJ);
5571: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5572: "MatRetrieveValues_MPIAIJ",
5573: MatRetrieveValues_MPIAIJ);
5574: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5575: "MatGetDiagonalBlock_MPIAIJ",
5576: MatGetDiagonalBlock_MPIAIJ);
5577: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5578: "MatIsTranspose_MPIAIJ",
5579: MatIsTranspose_MPIAIJ);
5580: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5581: "MatMPIAIJSetPreallocation_MPIAIJ",
5582: MatMPIAIJSetPreallocation_MPIAIJ);
5583: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5584: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5585: MatMPIAIJSetPreallocationCSR_MPIAIJ);
5586: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5587: "MatDiagonalScaleLocal_MPIAIJ",
5588: MatDiagonalScaleLocal_MPIAIJ);
5589: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5590: "MatConvert_MPIAIJ_MPIAIJPERM",
5591: MatConvert_MPIAIJ_MPIAIJPERM);
5592: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5593: "MatConvert_MPIAIJ_MPIAIJCRL",
5594: MatConvert_MPIAIJ_MPIAIJCRL);
5595: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5596: "MatConvert_MPIAIJ_MPISBAIJ",
5597: MatConvert_MPIAIJ_MPISBAIJ);
5598: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5599: "MatMatMult_MPIDense_MPIAIJ",
5600: MatMatMult_MPIDense_MPIAIJ);
5601: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5602: "MatMatMultSymbolic_MPIDense_MPIAIJ",
5603: MatMatMultSymbolic_MPIDense_MPIAIJ);
5604: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5605: "MatMatMultNumeric_MPIDense_MPIAIJ",
5606: MatMatMultNumeric_MPIDense_MPIAIJ);
5607: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5608: return(0);
5609: }
5610: EXTERN_C_END
5614: /*@
5615: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5616: and "off-diagonal" part of the matrix in CSR format.
5618: Collective on MPI_Comm
5620: Input Parameters:
5621: + comm - MPI communicator
5622: . m - number of local rows (Cannot be PETSC_DECIDE)
5623: . n - This value should be the same as the local size used in creating the
5624: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5625: calculated if N is given) For square matrices n is almost always m.
5626: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5627: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5628: . i - row indices for "diagonal" portion of matrix
5629: . j - column indices
5630: . a - matrix values
5631: . oi - row indices for "off-diagonal" portion of matrix
5632: . oj - column indices
5633: - oa - matrix values
5635: Output Parameter:
5636: . mat - the matrix
5638: Level: advanced
5640: Notes:
5641: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5642: must free the arrays once the matrix has been destroyed and not before.
5644: The i and j indices are 0 based
5645:
5646: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5648: This sets local rows and cannot be used to set off-processor values.
5650: You cannot later use MatSetValues() to change values in this matrix.
5652: .keywords: matrix, aij, compressed row, sparse, parallel
5654: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5655: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5656: @*/
5657: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5658: PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5659: {
5661: Mat_MPIAIJ *maij;
5664: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5665: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5666: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5667: MatCreate(comm,mat);
5668: MatSetSizes(*mat,m,n,M,N);
5669: MatSetType(*mat,MATMPIAIJ);
5670: maij = (Mat_MPIAIJ*) (*mat)->data;
5671: maij->donotstash = PETSC_TRUE;
5672: (*mat)->preallocated = PETSC_TRUE;
5674: PetscLayoutSetUp((*mat)->rmap);
5675: PetscLayoutSetUp((*mat)->cmap);
5677: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5678: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5680: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5681: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5682: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5683: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5685: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5686: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5687: return(0);
5688: }
5690: /*
5691: Special version for direct calls from Fortran
5692: */
5693: #include <petsc-private/fortranimpl.h>
5695: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5696: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5697: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5698: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5699: #endif
5701: /* Change these macros so can be used in void function */
5702: #undef CHKERRQ
5703: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5704: #undef SETERRQ2
5705: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5706: #undef SETERRQ3
5707: #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5708: #undef SETERRQ
5709: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5711: EXTERN_C_BEGIN
5714: void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5715: {
5716: Mat mat = *mmat;
5717: PetscInt m = *mm, n = *mn;
5718: InsertMode addv = *maddv;
5719: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5720: PetscScalar value;
5721: PetscErrorCode ierr;
5723: MatCheckPreallocated(mat,1);
5724: if (mat->insertmode == NOT_SET_VALUES) {
5725: mat->insertmode = addv;
5726: }
5727: #if defined(PETSC_USE_DEBUG)
5728: else if (mat->insertmode != addv) {
5729: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5730: }
5731: #endif
5732: {
5733: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5734: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5735: PetscBool roworiented = aij->roworiented;
5737: /* Some Variables required in the macro */
5738: Mat A = aij->A;
5739: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5740: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5741: MatScalar *aa = a->a;
5742: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5743: Mat B = aij->B;
5744: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5745: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5746: MatScalar *ba = b->a;
5748: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5749: PetscInt nonew = a->nonew;
5750: MatScalar *ap1,*ap2;
5753: for (i=0; i<m; i++) {
5754: if (im[i] < 0) continue;
5755: #if defined(PETSC_USE_DEBUG)
5756: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5757: #endif
5758: if (im[i] >= rstart && im[i] < rend) {
5759: row = im[i] - rstart;
5760: lastcol1 = -1;
5761: rp1 = aj + ai[row];
5762: ap1 = aa + ai[row];
5763: rmax1 = aimax[row];
5764: nrow1 = ailen[row];
5765: low1 = 0;
5766: high1 = nrow1;
5767: lastcol2 = -1;
5768: rp2 = bj + bi[row];
5769: ap2 = ba + bi[row];
5770: rmax2 = bimax[row];
5771: nrow2 = bilen[row];
5772: low2 = 0;
5773: high2 = nrow2;
5775: for (j=0; j<n; j++) {
5776: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5777: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5778: if (in[j] >= cstart && in[j] < cend){
5779: col = in[j] - cstart;
5780: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5781: } else if (in[j] < 0) continue;
5782: #if defined(PETSC_USE_DEBUG)
5783: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5784: #endif
5785: else {
5786: if (mat->was_assembled) {
5787: if (!aij->colmap) {
5788: MatCreateColmap_MPIAIJ_Private(mat);
5789: }
5790: #if defined (PETSC_USE_CTABLE)
5791: PetscTableFind(aij->colmap,in[j]+1,&col);
5792: col--;
5793: #else
5794: col = aij->colmap[in[j]] - 1;
5795: #endif
5796: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5797: MatDisAssemble_MPIAIJ(mat);
5798: col = in[j];
5799: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5800: B = aij->B;
5801: b = (Mat_SeqAIJ*)B->data;
5802: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5803: rp2 = bj + bi[row];
5804: ap2 = ba + bi[row];
5805: rmax2 = bimax[row];
5806: nrow2 = bilen[row];
5807: low2 = 0;
5808: high2 = nrow2;
5809: bm = aij->B->rmap->n;
5810: ba = b->a;
5811: }
5812: } else col = in[j];
5813: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5814: }
5815: }
5816: } else {
5817: if (!aij->donotstash) {
5818: if (roworiented) {
5819: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5820: } else {
5821: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5822: }
5823: }
5824: }
5825: }}
5826: PetscFunctionReturnVoid();
5827: }
5828: EXTERN_C_END