Actual source code: mpiaij.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/mpi/mpiaij.h
4: #include src/inline/spops.h
6: /*
7: Local utility routine that creates a mapping from the global column
8: number to the local number in the off-diagonal part of the local
9: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
10: a slightly higher hash table cost; without it it is not scalable (each processor
11: has an order N integer array but is fast to acess.
12: */
15: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16: {
17: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19: PetscInt n = aij->B->cmap.n,i;
22: #if defined (PETSC_USE_CTABLE)
23: PetscTableCreate(n,&aij->colmap);
24: for (i=0; i<n; i++){
25: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
26: }
27: #else
28: PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);
29: PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));
30: PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));
31: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32: #endif
33: return(0);
34: }
37: #define CHUNKSIZE 15
38: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39: { \
40: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41: lastcol1 = col;\
42: while (high1-low1 > 5) { \
43: t = (low1+high1)/2; \
44: if (rp1[t] > col) high1 = t; \
45: else low1 = t; \
46: } \
47: for (_i=low1; _i<high1; _i++) { \
48: if (rp1[_i] > col) break; \
49: if (rp1[_i] == col) { \
50: if (addv == ADD_VALUES) ap1[_i] += value; \
51: else ap1[_i] = value; \
52: goto a_noinsert; \
53: } \
54: } \
55: if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56: if (nonew == 1) goto a_noinsert; \
57: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59: N = nrow1++ - 1; a->nz++; high1++; \
60: /* shift up all the later entries in this row */ \
61: for (ii=N; ii>=_i; ii--) { \
62: rp1[ii+1] = rp1[ii]; \
63: ap1[ii+1] = ap1[ii]; \
64: } \
65: rp1[_i] = col; \
66: ap1[_i] = value; \
67: a_noinsert: ; \
68: ailen[row] = nrow1; \
69: }
72: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73: { \
74: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75: lastcol2 = col;\
76: while (high2-low2 > 5) { \
77: t = (low2+high2)/2; \
78: if (rp2[t] > col) high2 = t; \
79: else low2 = t; \
80: } \
81: for (_i=low2; _i<high2; _i++) { \
82: if (rp2[_i] > col) break; \
83: if (rp2[_i] == col) { \
84: if (addv == ADD_VALUES) ap2[_i] += value; \
85: else ap2[_i] = value; \
86: goto b_noinsert; \
87: } \
88: } \
89: if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90: if (nonew == 1) goto b_noinsert; \
91: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93: N = nrow2++ - 1; b->nz++; high2++;\
94: /* shift up all the later entries in this row */ \
95: for (ii=N; ii>=_i; ii--) { \
96: rp2[ii+1] = rp2[ii]; \
97: ap2[ii+1] = ap2[ii]; \
98: } \
99: rp2[_i] = col; \
100: ap2[_i] = value; \
101: b_noinsert: ; \
102: bilen[row] = nrow2; \
103: }
107: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108: {
109: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
110: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
112: PetscInt l,*garray = mat->garray,diag;
115: /* code only works for square matrices A */
117: /* find size of row to the left of the diagonal part */
118: MatGetOwnershipRange(A,&diag,0);
119: row = row - diag;
120: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121: if (garray[b->j[b->i[row]+l]] > diag) break;
122: }
123: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
125: /* diagonal part */
126: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
128: /* right of diagonal part */
129: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
130: return(0);
131: }
135: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136: {
137: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
138: PetscScalar value;
140: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141: PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142: PetscTruth roworiented = aij->roworiented;
144: /* Some Variables required in the macro */
145: Mat A = aij->A;
146: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
147: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148: PetscScalar *aa = a->a;
149: PetscTruth ignorezeroentries = a->ignorezeroentries;
150: Mat B = aij->B;
151: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
152: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153: PetscScalar *ba = b->a;
155: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156: PetscInt nonew = a->nonew;
157: PetscScalar *ap1,*ap2;
160: for (i=0; i<m; i++) {
161: if (im[i] < 0) continue;
162: #if defined(PETSC_USE_DEBUG)
163: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164: #endif
165: if (im[i] >= rstart && im[i] < rend) {
166: row = im[i] - rstart;
167: lastcol1 = -1;
168: rp1 = aj + ai[row];
169: ap1 = aa + ai[row];
170: rmax1 = aimax[row];
171: nrow1 = ailen[row];
172: low1 = 0;
173: high1 = nrow1;
174: lastcol2 = -1;
175: rp2 = bj + bi[row];
176: ap2 = ba + bi[row];
177: rmax2 = bimax[row];
178: nrow2 = bilen[row];
179: low2 = 0;
180: high2 = nrow2;
182: for (j=0; j<n; j++) {
183: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185: if (in[j] >= cstart && in[j] < cend){
186: col = in[j] - cstart;
187: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188: } else if (in[j] < 0) continue;
189: #if defined(PETSC_USE_DEBUG)
190: else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191: #endif
192: else {
193: if (mat->was_assembled) {
194: if (!aij->colmap) {
195: CreateColmap_MPIAIJ_Private(mat);
196: }
197: #if defined (PETSC_USE_CTABLE)
198: PetscTableFind(aij->colmap,in[j]+1,&col);
199: col--;
200: #else
201: col = aij->colmap[in[j]] - 1;
202: #endif
203: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204: DisAssemble_MPIAIJ(mat);
205: col = in[j];
206: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207: B = aij->B;
208: b = (Mat_SeqAIJ*)B->data;
209: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210: rp2 = bj + bi[row];
211: ap2 = ba + bi[row];
212: rmax2 = bimax[row];
213: nrow2 = bilen[row];
214: low2 = 0;
215: high2 = nrow2;
216: bm = aij->B->rmap.n;
217: ba = b->a;
218: }
219: } else col = in[j];
220: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221: }
222: }
223: } else {
224: if (!aij->donotstash) {
225: if (roworiented) {
226: if (ignorezeroentries && v[i*n] == 0.0) continue;
227: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
228: } else {
229: if (ignorezeroentries && v[i] == 0.0) continue;
230: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
231: }
232: }
233: }
234: }
235: return(0);
236: }
240: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241: {
242: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
244: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245: PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
248: for (i=0; i<m; i++) {
249: if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250: if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251: if (idxm[i] >= rstart && idxm[i] < rend) {
252: row = idxm[i] - rstart;
253: for (j=0; j<n; j++) {
254: if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255: if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256: if (idxn[j] >= cstart && idxn[j] < cend){
257: col = idxn[j] - cstart;
258: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
259: } else {
260: if (!aij->colmap) {
261: CreateColmap_MPIAIJ_Private(mat);
262: }
263: #if defined (PETSC_USE_CTABLE)
264: PetscTableFind(aij->colmap,idxn[j]+1,&col);
265: col --;
266: #else
267: col = aij->colmap[idxn[j]] - 1;
268: #endif
269: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270: else {
271: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
272: }
273: }
274: }
275: } else {
276: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277: }
278: }
279: return(0);
280: }
284: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285: {
286: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
288: PetscInt nstash,reallocs;
289: InsertMode addv;
292: if (aij->donotstash) {
293: return(0);
294: }
296: /* make sure all processors are either in INSERTMODE or ADDMODE */
297: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
298: if (addv == (ADD_VALUES|INSERT_VALUES)) {
299: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300: }
301: mat->insertmode = addv; /* in case this processor had no cache */
303: MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
304: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
305: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
306: return(0);
307: }
311: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312: {
313: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
314: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data;
316: PetscMPIInt n;
317: PetscInt i,j,rstart,ncols,flg;
318: PetscInt *row,*col,other_disassembled;
319: PetscScalar *val;
320: InsertMode addv = mat->insertmode;
322: /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324: if (!aij->donotstash) {
325: while (1) {
326: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
327: if (!flg) break;
329: for (i=0; i<n;) {
330: /* Now identify the consecutive vals belonging to the same row */
331: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332: if (j < n) ncols = j-i;
333: else ncols = n-i;
334: /* Now assemble all these values with a single function call */
335: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
336: i = j;
337: }
338: }
339: MatStashScatterEnd_Private(&mat->stash);
340: }
341: a->compressedrow.use = PETSC_FALSE;
342: MatAssemblyBegin(aij->A,mode);
343: MatAssemblyEnd(aij->A,mode);
345: /* determine if any processor has disassembled, if so we must
346: also disassemble ourselfs, in order that we may reassemble. */
347: /*
348: if nonzero structure of submatrix B cannot change then we know that
349: no processor disassembled thus we can skip this stuff
350: */
351: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
352: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
353: if (mat->was_assembled && !other_disassembled) {
354: DisAssemble_MPIAIJ(mat);
355: }
356: }
357: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358: MatSetUpMultiply_MPIAIJ(mat);
359: }
360: MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);
361: ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362: MatAssemblyBegin(aij->B,mode);
363: MatAssemblyEnd(aij->B,mode);
365: PetscFree(aij->rowvalues);
366: aij->rowvalues = 0;
368: /* used by MatAXPY() */
369: a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
370: a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
372: return(0);
373: }
377: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378: {
379: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
383: MatZeroEntries(l->A);
384: MatZeroEntries(l->B);
385: return(0);
386: }
390: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391: {
392: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
394: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395: PetscInt i,*owners = A->rmap.range;
396: PetscInt *nprocs,j,idx,nsends,row;
397: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
398: PetscInt *rvalues,count,base,slen,*source;
399: PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart;
400: MPI_Comm comm = A->comm;
401: MPI_Request *send_waits,*recv_waits;
402: MPI_Status recv_status,*send_status;
403: #if defined(PETSC_DEBUG)
404: PetscTruth found = PETSC_FALSE;
405: #endif
408: /* first count number of contributors to each processor */
409: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
410: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
411: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
412: j = 0;
413: for (i=0; i<N; i++) {
414: if (lastidx > (idx = rows[i])) j = 0;
415: lastidx = idx;
416: for (; j<size; j++) {
417: if (idx >= owners[j] && idx < owners[j+1]) {
418: nprocs[2*j]++;
419: nprocs[2*j+1] = 1;
420: owner[i] = j;
421: #if defined(PETSC_DEBUG)
422: found = PETSC_TRUE;
423: #endif
424: break;
425: }
426: }
427: #if defined(PETSC_DEBUG)
428: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429: found = PETSC_FALSE;
430: #endif
431: }
432: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434: /* inform other processors of number of messages and max length*/
435: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
437: /* post receives: */
438: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
439: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
440: for (i=0; i<nrecvs; i++) {
441: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
442: }
444: /* do sends:
445: 1) starts[i] gives the starting index in svalues for stuff going to
446: the ith processor
447: */
448: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
449: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
450: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
451: starts[0] = 0;
452: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453: for (i=0; i<N; i++) {
454: svalues[starts[owner[i]]++] = rows[i];
455: }
457: starts[0] = 0;
458: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459: count = 0;
460: for (i=0; i<size; i++) {
461: if (nprocs[2*i+1]) {
462: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
463: }
464: }
465: PetscFree(starts);
467: base = owners[rank];
469: /* wait on receives */
470: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
471: source = lens + nrecvs;
472: count = nrecvs; slen = 0;
473: while (count) {
474: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
475: /* unpack receives into our local space */
476: MPI_Get_count(&recv_status,MPIU_INT,&n);
477: source[imdex] = recv_status.MPI_SOURCE;
478: lens[imdex] = n;
479: slen += n;
480: count--;
481: }
482: PetscFree(recv_waits);
483:
484: /* move the data into the send scatter */
485: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
486: count = 0;
487: for (i=0; i<nrecvs; i++) {
488: values = rvalues + i*nmax;
489: for (j=0; j<lens[i]; j++) {
490: lrows[count++] = values[j] - base;
491: }
492: }
493: PetscFree(rvalues);
494: PetscFree(lens);
495: PetscFree(owner);
496: PetscFree(nprocs);
497:
498: /* actually zap the local rows */
499: /*
500: Zero the required rows. If the "diagonal block" of the matrix
501: is square and the user wishes to set the diagonal we use separate
502: code so that MatSetValues() is not called for each diagonal allocating
503: new memory, thus calling lots of mallocs and slowing things down.
505: Contributed by: Matthew Knepley
506: */
507: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508: MatZeroRows(l->B,slen,lrows,0.0);
509: if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510: MatZeroRows(l->A,slen,lrows,diag);
511: } else if (diag != 0.0) {
512: MatZeroRows(l->A,slen,lrows,0.0);
513: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516: }
517: for (i = 0; i < slen; i++) {
518: row = lrows[i] + rstart;
519: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
520: }
521: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
522: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
523: } else {
524: MatZeroRows(l->A,slen,lrows,0.0);
525: }
526: PetscFree(lrows);
528: /* wait on sends */
529: if (nsends) {
530: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
531: MPI_Waitall(nsends,send_waits,send_status);
532: PetscFree(send_status);
533: }
534: PetscFree(send_waits);
535: PetscFree(svalues);
537: return(0);
538: }
542: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543: {
544: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
546: PetscInt nt;
549: VecGetLocalSize(xx,&nt);
550: if (nt != A->cmap.n) {
551: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552: }
553: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
554: (*a->A->ops->mult)(a->A,xx,yy);
555: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
556: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
557: return(0);
558: }
562: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563: {
564: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
568: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
569: (*a->A->ops->multadd)(a->A,xx,yy,zz);
570: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
571: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
572: return(0);
573: }
577: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578: {
579: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
581: PetscTruth merged;
584: VecScatterGetMerged(a->Mvctx,&merged);
585: /* do nondiagonal part */
586: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
587: if (!merged) {
588: /* send it on its way */
589: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
590: /* do local part */
591: (*a->A->ops->multtranspose)(a->A,xx,yy);
592: /* receive remote parts: note this assumes the values are not actually */
593: /* added in yy until the next line, */
594: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
595: } else {
596: /* do local part */
597: (*a->A->ops->multtranspose)(a->A,xx,yy);
598: /* send it on its way */
599: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
600: /* values actually were received in the Begin() but we need to call this nop */
601: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
602: }
603: return(0);
604: }
609: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610: {
611: MPI_Comm comm;
612: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614: IS Me,Notme;
616: PetscInt M,N,first,last,*notme,i;
617: PetscMPIInt size;
621: /* Easy test: symmetric diagonal block */
622: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623: MatIsTranspose(Adia,Bdia,tol,f);
624: if (!*f) return(0);
625: PetscObjectGetComm((PetscObject)Amat,&comm);
626: MPI_Comm_size(comm,&size);
627: if (size == 1) return(0);
629: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630: MatGetSize(Amat,&M,&N);
631: MatGetOwnershipRange(Amat,&first,&last);
632: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
633: for (i=0; i<first; i++) notme[i] = i;
634: for (i=last; i<M; i++) notme[i-last+first] = i;
635: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
636: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
637: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
638: Aoff = Aoffs[0];
639: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
640: Boff = Boffs[0];
641: MatIsTranspose(Aoff,Boff,tol,f);
642: MatDestroyMatrices(1,&Aoffs);
643: MatDestroyMatrices(1,&Boffs);
644: ISDestroy(Me);
645: ISDestroy(Notme);
647: return(0);
648: }
653: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654: {
655: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
659: /* do nondiagonal part */
660: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
661: /* send it on its way */
662: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
663: /* do local part */
664: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
665: /* receive remote parts */
666: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
667: return(0);
668: }
670: /*
671: This only works correctly for square matrices where the subblock A->A is the
672: diagonal block
673: */
676: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677: {
679: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
682: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683: if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685: }
686: MatGetDiagonal(a->A,v);
687: return(0);
688: }
692: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693: {
694: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
698: MatScale(a->A,aa);
699: MatScale(a->B,aa);
700: return(0);
701: }
705: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706: {
707: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
711: #if defined(PETSC_USE_LOG)
712: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713: #endif
714: MatStashDestroy_Private(&mat->stash);
715: MatDestroy(aij->A);
716: MatDestroy(aij->B);
717: #if defined (PETSC_USE_CTABLE)
718: if (aij->colmap) {PetscTableDestroy(aij->colmap);}
719: #else
720: PetscFree(aij->colmap);
721: #endif
722: PetscFree(aij->garray);
723: if (aij->lvec) {VecDestroy(aij->lvec);}
724: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
725: PetscFree(aij->rowvalues);
726: PetscFree(aij->ld);
727: PetscFree(aij);
729: PetscObjectChangeTypeName((PetscObject)mat,0);
730: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
731: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
732: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
733: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
734: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
735: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
736: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
737: return(0);
738: }
742: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743: {
744: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
745: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
746: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
747: PetscErrorCode ierr;
748: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
749: int fd;
750: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
751: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752: PetscScalar *column_values;
755: MPI_Comm_rank(mat->comm,&rank);
756: MPI_Comm_size(mat->comm,&size);
757: nz = A->nz + B->nz;
758: if (!rank) {
759: header[0] = MAT_FILE_COOKIE;
760: header[1] = mat->rmap.N;
761: header[2] = mat->cmap.N;
762: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);
763: PetscViewerBinaryGetDescriptor(viewer,&fd);
764: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
765: /* get largest number of rows any processor has */
766: rlen = mat->rmap.n;
767: range = mat->rmap.range;
768: for (i=1; i<size; i++) {
769: rlen = PetscMax(rlen,range[i+1] - range[i]);
770: }
771: } else {
772: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);
773: rlen = mat->rmap.n;
774: }
776: /* load up the local row counts */
777: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
778: for (i=0; i<mat->rmap.n; i++) {
779: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780: }
782: /* store the row lengths to the file */
783: if (!rank) {
784: MPI_Status status;
785: PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);
786: for (i=1; i<size; i++) {
787: rlen = range[i+1] - range[i];
788: MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);
789: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
790: }
791: } else {
792: MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);
793: }
794: PetscFree(row_lengths);
796: /* load up the local column indices */
797: nzmax = nz; /* )th processor needs space a largest processor needs */
798: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);
799: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
800: cnt = 0;
801: for (i=0; i<mat->rmap.n; i++) {
802: for (j=B->i[i]; j<B->i[i+1]; j++) {
803: if ( (col = garray[B->j[j]]) > cstart) break;
804: column_indices[cnt++] = col;
805: }
806: for (k=A->i[i]; k<A->i[i+1]; k++) {
807: column_indices[cnt++] = A->j[k] + cstart;
808: }
809: for (; j<B->i[i+1]; j++) {
810: column_indices[cnt++] = garray[B->j[j]];
811: }
812: }
813: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
815: /* store the column indices to the file */
816: if (!rank) {
817: MPI_Status status;
818: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
819: for (i=1; i<size; i++) {
820: MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
821: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822: MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);
823: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
824: }
825: } else {
826: MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
827: MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);
828: }
829: PetscFree(column_indices);
831: /* load up the local column values */
832: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
833: cnt = 0;
834: for (i=0; i<mat->rmap.n; i++) {
835: for (j=B->i[i]; j<B->i[i+1]; j++) {
836: if ( garray[B->j[j]] > cstart) break;
837: column_values[cnt++] = B->a[j];
838: }
839: for (k=A->i[i]; k<A->i[i+1]; k++) {
840: column_values[cnt++] = A->a[k];
841: }
842: for (; j<B->i[i+1]; j++) {
843: column_values[cnt++] = B->a[j];
844: }
845: }
846: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
848: /* store the column values to the file */
849: if (!rank) {
850: MPI_Status status;
851: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
852: for (i=1; i<size; i++) {
853: MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
854: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855: MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
856: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
857: }
858: } else {
859: MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
860: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
861: }
862: PetscFree(column_values);
863: return(0);
864: }
868: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869: {
870: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
871: PetscErrorCode ierr;
872: PetscMPIInt rank = aij->rank,size = aij->size;
873: PetscTruth isdraw,iascii,isbinary;
874: PetscViewer sviewer;
875: PetscViewerFormat format;
878: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
879: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
880: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
881: if (iascii) {
882: PetscViewerGetFormat(viewer,&format);
883: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884: MatInfo info;
885: PetscTruth inodes;
887: MPI_Comm_rank(mat->comm,&rank);
888: MatGetInfo(mat,MAT_LOCAL,&info);
889: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
890: if (!inodes) {
891: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892: rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
893: } else {
894: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895: rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
896: }
897: MatGetInfo(aij->A,MAT_LOCAL,&info);
898: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
899: MatGetInfo(aij->B,MAT_LOCAL,&info);
900: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
901: PetscViewerFlush(viewer);
902: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
903: VecScatterView(aij->Mvctx,viewer);
904: return(0);
905: } else if (format == PETSC_VIEWER_ASCII_INFO) {
906: PetscInt inodecount,inodelimit,*inodes;
907: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
908: if (inodes) {
909: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
910: } else {
911: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
912: }
913: return(0);
914: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915: return(0);
916: }
917: } else if (isbinary) {
918: if (size == 1) {
919: PetscObjectSetName((PetscObject)aij->A,mat->name);
920: MatView(aij->A,viewer);
921: } else {
922: MatView_MPIAIJ_Binary(mat,viewer);
923: }
924: return(0);
925: } else if (isdraw) {
926: PetscDraw draw;
927: PetscTruth isnull;
928: PetscViewerDrawGetDraw(viewer,0,&draw);
929: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
930: }
932: if (size == 1) {
933: PetscObjectSetName((PetscObject)aij->A,mat->name);
934: MatView(aij->A,viewer);
935: } else {
936: /* assemble the entire matrix onto first processor. */
937: Mat A;
938: Mat_SeqAIJ *Aloc;
939: PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940: PetscScalar *a;
942: MatCreate(mat->comm,&A);
943: if (!rank) {
944: MatSetSizes(A,M,N,M,N);
945: } else {
946: MatSetSizes(A,0,0,M,N);
947: }
948: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949: MatSetType(A,MATMPIAIJ);
950: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
951: PetscLogObjectParent(mat,A);
953: /* copy over the A part */
954: Aloc = (Mat_SeqAIJ*)aij->A->data;
955: m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956: row = mat->rmap.rstart;
957: for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958: for (i=0; i<m; i++) {
959: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
960: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961: }
962: aj = Aloc->j;
963: for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
965: /* copy over the B part */
966: Aloc = (Mat_SeqAIJ*)aij->B->data;
967: m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968: row = mat->rmap.rstart;
969: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
970: ct = cols;
971: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972: for (i=0; i<m; i++) {
973: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
974: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975: }
976: PetscFree(ct);
977: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
978: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
979: /*
980: Everyone has to call to draw the matrix since the graphics waits are
981: synchronized across all processors that share the PetscDraw object
982: */
983: PetscViewerGetSingleton(viewer,&sviewer);
984: if (!rank) {
985: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
986: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
987: }
988: PetscViewerRestoreSingleton(viewer,&sviewer);
989: MatDestroy(A);
990: }
991: return(0);
992: }
996: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997: {
999: PetscTruth iascii,isdraw,issocket,isbinary;
1000:
1002: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1003: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1004: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1005: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1006: if (iascii || isdraw || isbinary || issocket) {
1007: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1008: } else {
1009: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010: }
1011: return(0);
1012: }
1016: PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017: {
1018: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1020: Vec bb1;
1023: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1025: VecDuplicate(bb,&bb1);
1027: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028: if (flag & SOR_ZERO_INITIAL_GUESS) {
1029: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1030: its--;
1031: }
1032:
1033: while (its--) {
1034: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1035: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1037: /* update rhs: bb1 = bb - B*x */
1038: VecScale(mat->lvec,-1.0);
1039: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1041: /* local sweep */
1042: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043:
1044: }
1045: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046: if (flag & SOR_ZERO_INITIAL_GUESS) {
1047: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1048: its--;
1049: }
1050: while (its--) {
1051: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1052: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1054: /* update rhs: bb1 = bb - B*x */
1055: VecScale(mat->lvec,-1.0);
1056: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1058: /* local sweep */
1059: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060:
1061: }
1062: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063: if (flag & SOR_ZERO_INITIAL_GUESS) {
1064: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1065: its--;
1066: }
1067: while (its--) {
1068: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1069: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1071: /* update rhs: bb1 = bb - B*x */
1072: VecScale(mat->lvec,-1.0);
1073: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1075: /* local sweep */
1076: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077:
1078: }
1079: } else {
1080: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081: }
1083: VecDestroy(bb1);
1084: return(0);
1085: }
1089: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090: {
1091: MPI_Comm comm,pcomm;
1092: PetscInt first,local_size,nrows,*rows;
1093: int ntids;
1094: IS crowp,growp,irowp,lrowp,lcolp,icolp;
1098: PetscObjectGetComm((PetscObject)A,&comm);
1099: /* make a collective version of 'rowp' */
1100: PetscObjectGetComm((PetscObject)rowp,&pcomm);
1101: if (pcomm==comm) {
1102: crowp = rowp;
1103: } else {
1104: ISGetSize(rowp,&nrows);
1105: ISGetIndices(rowp,&rows);
1106: ISCreateGeneral(comm,nrows,rows,&crowp);
1107: ISRestoreIndices(rowp,&rows);
1108: }
1109: /* collect the global row permutation and invert it */
1110: ISAllGather(crowp,&growp);
1111: ISSetPermutation(growp);
1112: if (pcomm!=comm) {
1113: ISDestroy(crowp);
1114: }
1115: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1116: /* get the local target indices */
1117: MatGetOwnershipRange(A,&first,PETSC_NULL);
1118: MatGetLocalSize(A,&local_size,PETSC_NULL);
1119: ISGetIndices(irowp,&rows);
1120: ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);
1121: ISRestoreIndices(irowp,&rows);
1122: ISDestroy(irowp);
1123: /* the column permutation is so much easier;
1124: make a local version of 'colp' and invert it */
1125: PetscObjectGetComm((PetscObject)colp,&pcomm);
1126: MPI_Comm_size(pcomm,&ntids);
1127: if (ntids==1) {
1128: lcolp = colp;
1129: } else {
1130: ISGetSize(colp,&nrows);
1131: ISGetIndices(colp,&rows);
1132: ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);
1133: }
1134: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1135: ISSetPermutation(lcolp);
1136: if (ntids>1) {
1137: ISRestoreIndices(colp,&rows);
1138: ISDestroy(lcolp);
1139: }
1140: /* now we just get the submatrix */
1141: MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);
1142: /* clean up */
1143: ISDestroy(lrowp);
1144: ISDestroy(icolp);
1145: return(0);
1146: }
1150: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151: {
1152: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1153: Mat A = mat->A,B = mat->B;
1155: PetscReal isend[5],irecv[5];
1158: info->block_size = 1.0;
1159: MatGetInfo(A,MAT_LOCAL,info);
1160: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161: isend[3] = info->memory; isend[4] = info->mallocs;
1162: MatGetInfo(B,MAT_LOCAL,info);
1163: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164: isend[3] += info->memory; isend[4] += info->mallocs;
1165: if (flag == MAT_LOCAL) {
1166: info->nz_used = isend[0];
1167: info->nz_allocated = isend[1];
1168: info->nz_unneeded = isend[2];
1169: info->memory = isend[3];
1170: info->mallocs = isend[4];
1171: } else if (flag == MAT_GLOBAL_MAX) {
1172: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1173: info->nz_used = irecv[0];
1174: info->nz_allocated = irecv[1];
1175: info->nz_unneeded = irecv[2];
1176: info->memory = irecv[3];
1177: info->mallocs = irecv[4];
1178: } else if (flag == MAT_GLOBAL_SUM) {
1179: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1180: info->nz_used = irecv[0];
1181: info->nz_allocated = irecv[1];
1182: info->nz_unneeded = irecv[2];
1183: info->memory = irecv[3];
1184: info->mallocs = irecv[4];
1185: }
1186: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1187: info->fill_ratio_needed = 0;
1188: info->factor_mallocs = 0;
1189: info->rows_global = (double)matin->rmap.N;
1190: info->columns_global = (double)matin->cmap.N;
1191: info->rows_local = (double)matin->rmap.n;
1192: info->columns_local = (double)matin->cmap.N;
1194: return(0);
1195: }
1199: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200: {
1201: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1205: switch (op) {
1206: case MAT_NO_NEW_NONZERO_LOCATIONS:
1207: case MAT_YES_NEW_NONZERO_LOCATIONS:
1208: case MAT_COLUMNS_UNSORTED:
1209: case MAT_COLUMNS_SORTED:
1210: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211: case MAT_KEEP_ZEROED_ROWS:
1212: case MAT_NEW_NONZERO_LOCATION_ERR:
1213: case MAT_USE_INODES:
1214: case MAT_DO_NOT_USE_INODES:
1215: case MAT_IGNORE_ZERO_ENTRIES:
1216: MatSetOption(a->A,op);
1217: MatSetOption(a->B,op);
1218: break;
1219: case MAT_ROW_ORIENTED:
1220: a->roworiented = PETSC_TRUE;
1221: MatSetOption(a->A,op);
1222: MatSetOption(a->B,op);
1223: break;
1224: case MAT_ROWS_SORTED:
1225: case MAT_ROWS_UNSORTED:
1226: case MAT_YES_NEW_DIAGONALS:
1227: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1228: break;
1229: case MAT_COLUMN_ORIENTED:
1230: a->roworiented = PETSC_FALSE;
1231: MatSetOption(a->A,op);
1232: MatSetOption(a->B,op);
1233: break;
1234: case MAT_IGNORE_OFF_PROC_ENTRIES:
1235: a->donotstash = PETSC_TRUE;
1236: break;
1237: case MAT_NO_NEW_DIAGONALS:
1238: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239: case MAT_SYMMETRIC:
1240: MatSetOption(a->A,op);
1241: break;
1242: case MAT_STRUCTURALLY_SYMMETRIC:
1243: case MAT_HERMITIAN:
1244: case MAT_SYMMETRY_ETERNAL:
1245: MatSetOption(a->A,op);
1246: break;
1247: case MAT_NOT_SYMMETRIC:
1248: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249: case MAT_NOT_HERMITIAN:
1250: case MAT_NOT_SYMMETRY_ETERNAL:
1251: break;
1252: default:
1253: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254: }
1255: return(0);
1256: }
1260: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261: {
1262: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1263: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1265: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267: PetscInt *cmap,*idx_p;
1270: if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271: mat->getrowactive = PETSC_TRUE;
1273: if (!mat->rowvalues && (idx || v)) {
1274: /*
1275: allocate enough space to hold information from the longest row.
1276: */
1277: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278: PetscInt max = 1,tmp;
1279: for (i=0; i<matin->rmap.n; i++) {
1280: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281: if (max < tmp) { max = tmp; }
1282: }
1283: PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1284: mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285: }
1287: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288: lrow = row - rstart;
1290: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291: if (!v) {pvA = 0; pvB = 0;}
1292: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1294: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1295: nztot = nzA + nzB;
1297: cmap = mat->garray;
1298: if (v || idx) {
1299: if (nztot) {
1300: /* Sort by increasing column numbers, assuming A and B already sorted */
1301: PetscInt imark = -1;
1302: if (v) {
1303: *v = v_p = mat->rowvalues;
1304: for (i=0; i<nzB; i++) {
1305: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1306: else break;
1307: }
1308: imark = i;
1309: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1310: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1311: }
1312: if (idx) {
1313: *idx = idx_p = mat->rowindices;
1314: if (imark > -1) {
1315: for (i=0; i<imark; i++) {
1316: idx_p[i] = cmap[cworkB[i]];
1317: }
1318: } else {
1319: for (i=0; i<nzB; i++) {
1320: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1321: else break;
1322: }
1323: imark = i;
1324: }
1325: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1326: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1327: }
1328: } else {
1329: if (idx) *idx = 0;
1330: if (v) *v = 0;
1331: }
1332: }
1333: *nz = nztot;
1334: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1335: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1336: return(0);
1337: }
1341: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342: {
1343: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1346: if (!aij->getrowactive) {
1347: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348: }
1349: aij->getrowactive = PETSC_FALSE;
1350: return(0);
1351: }
1355: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356: {
1357: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1358: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1360: PetscInt i,j,cstart = mat->cmap.rstart;
1361: PetscReal sum = 0.0;
1362: PetscScalar *v;
1365: if (aij->size == 1) {
1366: MatNorm(aij->A,type,norm);
1367: } else {
1368: if (type == NORM_FROBENIUS) {
1369: v = amat->a;
1370: for (i=0; i<amat->nz; i++) {
1371: #if defined(PETSC_USE_COMPLEX)
1372: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373: #else
1374: sum += (*v)*(*v); v++;
1375: #endif
1376: }
1377: v = bmat->a;
1378: for (i=0; i<bmat->nz; i++) {
1379: #if defined(PETSC_USE_COMPLEX)
1380: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381: #else
1382: sum += (*v)*(*v); v++;
1383: #endif
1384: }
1385: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1386: *norm = sqrt(*norm);
1387: } else if (type == NORM_1) { /* max column norm */
1388: PetscReal *tmp,*tmp2;
1389: PetscInt *jj,*garray = aij->garray;
1390: PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);
1391: PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);
1392: PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));
1393: *norm = 0.0;
1394: v = amat->a; jj = amat->j;
1395: for (j=0; j<amat->nz; j++) {
1396: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1397: }
1398: v = bmat->a; jj = bmat->j;
1399: for (j=0; j<bmat->nz; j++) {
1400: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401: }
1402: MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);
1403: for (j=0; j<mat->cmap.N; j++) {
1404: if (tmp2[j] > *norm) *norm = tmp2[j];
1405: }
1406: PetscFree(tmp);
1407: PetscFree(tmp2);
1408: } else if (type == NORM_INFINITY) { /* max row norm */
1409: PetscReal ntemp = 0.0;
1410: for (j=0; j<aij->A->rmap.n; j++) {
1411: v = amat->a + amat->i[j];
1412: sum = 0.0;
1413: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414: sum += PetscAbsScalar(*v); v++;
1415: }
1416: v = bmat->a + bmat->i[j];
1417: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418: sum += PetscAbsScalar(*v); v++;
1419: }
1420: if (sum > ntemp) ntemp = sum;
1421: }
1422: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1423: } else {
1424: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425: }
1426: }
1427: return(0);
1428: }
1432: PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433: {
1434: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1435: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1437: PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1438: PetscInt cstart=A->cmap.rstart,ncol;
1439: Mat B;
1440: PetscScalar *array;
1443: if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1445: /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1446: ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1447: ai = Aloc->i; aj = Aloc->j;
1448: bi = Bloc->i; bj = Bloc->j;
1449: PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);
1450: cols = d_nnz + na + 1; /* work space to be used by B part */
1451: PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));
1452: for (i=0; i<ai[ma]; i++){
1453: d_nnz[aj[i]] ++;
1454: aj[i] += cstart; /* global col index to be used by MatSetValues() */
1455: }
1457: MatCreate(A->comm,&B);
1458: MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);
1459: MatSetType(B,A->type_name);
1460: MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);
1462: /* copy over the A part */
1463: array = Aloc->a;
1464: row = A->rmap.rstart;
1465: for (i=0; i<ma; i++) {
1466: ncol = ai[i+1]-ai[i];
1467: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
1468: row++; array += ncol; aj += ncol;
1469: }
1470: aj = Aloc->j;
1471: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1473: /* copy over the B part */
1474: array = Bloc->a;
1475: row = A->rmap.rstart;
1476: for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1477: for (i=0; i<mb; i++) {
1478: ncol = bi[i+1]-bi[i];
1479: MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);
1480: row++; array += ncol; cols += ncol;
1481: }
1482: PetscFree(d_nnz);
1483: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1484: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1485: if (matout) {
1486: *matout = B;
1487: } else {
1488: MatHeaderCopy(A,B);
1489: }
1490: return(0);
1491: }
1495: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1496: {
1497: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1498: Mat a = aij->A,b = aij->B;
1500: PetscInt s1,s2,s3;
1503: MatGetLocalSize(mat,&s2,&s3);
1504: if (rr) {
1505: VecGetLocalSize(rr,&s1);
1506: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1507: /* Overlap communication with computation. */
1508: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1509: }
1510: if (ll) {
1511: VecGetLocalSize(ll,&s1);
1512: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1513: (*b->ops->diagonalscale)(b,ll,0);
1514: }
1515: /* scale the diagonal block */
1516: (*a->ops->diagonalscale)(a,ll,rr);
1518: if (rr) {
1519: /* Do a scatter end and then right scale the off-diagonal block */
1520: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1521: (*b->ops->diagonalscale)(b,0,aij->lvec);
1522: }
1523:
1524: return(0);
1525: }
1529: PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1530: {
1531: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1535: MatSetBlockSize(a->A,bs);
1536: MatSetBlockSize(a->B,bs);
1537: return(0);
1538: }
1541: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1542: {
1543: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1547: MatSetUnfactored(a->A);
1548: return(0);
1549: }
1553: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1554: {
1555: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1556: Mat a,b,c,d;
1557: PetscTruth flg;
1561: a = matA->A; b = matA->B;
1562: c = matB->A; d = matB->B;
1564: MatEqual(a,c,&flg);
1565: if (flg) {
1566: MatEqual(b,d,&flg);
1567: }
1568: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1569: return(0);
1570: }
1574: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1575: {
1577: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1578: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1581: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1582: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1583: /* because of the column compression in the off-processor part of the matrix a->B,
1584: the number of columns in a->B and b->B may be different, hence we cannot call
1585: the MatCopy() directly on the two parts. If need be, we can provide a more
1586: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1587: then copying the submatrices */
1588: MatCopy_Basic(A,B,str);
1589: } else {
1590: MatCopy(a->A,b->A,str);
1591: MatCopy(a->B,b->B,str);
1592: }
1593: return(0);
1594: }
1598: PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1599: {
1603: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1604: return(0);
1605: }
1607: #include petscblaslapack.h
1610: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1611: {
1613: PetscInt i;
1614: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1615: PetscBLASInt bnz,one=1;
1616: Mat_SeqAIJ *x,*y;
1619: if (str == SAME_NONZERO_PATTERN) {
1620: PetscScalar alpha = a;
1621: x = (Mat_SeqAIJ *)xx->A->data;
1622: y = (Mat_SeqAIJ *)yy->A->data;
1623: bnz = (PetscBLASInt)x->nz;
1624: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1625: x = (Mat_SeqAIJ *)xx->B->data;
1626: y = (Mat_SeqAIJ *)yy->B->data;
1627: bnz = (PetscBLASInt)x->nz;
1628: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1629: } else if (str == SUBSET_NONZERO_PATTERN) {
1630: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
1632: x = (Mat_SeqAIJ *)xx->B->data;
1633: y = (Mat_SeqAIJ *)yy->B->data;
1634: if (y->xtoy && y->XtoY != xx->B) {
1635: PetscFree(y->xtoy);
1636: MatDestroy(y->XtoY);
1637: }
1638: if (!y->xtoy) { /* get xtoy */
1639: MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1640: y->XtoY = xx->B;
1641: PetscObjectReference((PetscObject)xx->B);
1642: }
1643: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1644: } else {
1645: MatAXPY_Basic(Y,a,X,str);
1646: }
1647: return(0);
1648: }
1650: EXTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
1654: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
1655: {
1656: #if defined(PETSC_USE_COMPLEX)
1658: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1661: MatConjugate_SeqAIJ(aij->A);
1662: MatConjugate_SeqAIJ(aij->B);
1663: #else
1665: #endif
1666: return(0);
1667: }
1671: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1672: {
1673: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1677: MatRealPart(a->A);
1678: MatRealPart(a->B);
1679: return(0);
1680: }
1684: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1685: {
1686: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1690: MatImaginaryPart(a->A);
1691: MatImaginaryPart(a->B);
1692: return(0);
1693: }
1695: #ifdef PETSC_HAVE_PBGL
1697: #include <boost/parallel/mpi/bsp_process_group.hpp>
1698: #include <boost/graph/distributed/ilu_default_graph.hpp>
1699: #include <boost/graph/distributed/ilu_0_block.hpp>
1700: #include <boost/graph/distributed/ilu_preconditioner.hpp>
1701: #include <boost/graph/distributed/petsc/interface.hpp>
1702: #include <boost/multi_array.hpp>
1703: #include <boost/parallel/distributed_property_map.hpp>
1707: /*
1708: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1709: */
1710: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1711: {
1712: namespace petsc = boost::distributed::petsc;
1713:
1714: namespace graph_dist = boost::graph::distributed;
1715: using boost::graph::distributed::ilu_default::process_group_type;
1716: using boost::graph::ilu_permuted;
1718: PetscTruth row_identity, col_identity;
1719: PetscContainer c;
1720: PetscInt m, n, M, N;
1721: PetscErrorCode ierr;
1724: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1725: ISIdentity(isrow, &row_identity);
1726: ISIdentity(iscol, &col_identity);
1727: if (!row_identity || !col_identity) {
1728: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1729: }
1731: process_group_type pg;
1732: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
1733: lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1734: lgraph_type& level_graph = *lgraph_p;
1735: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
1737: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1738: ilu_permuted(level_graph);
1740: /* put together the new matrix */
1741: MatCreate(A->comm, fact);
1742: MatGetLocalSize(A, &m, &n);
1743: MatGetSize(A, &M, &N);
1744: MatSetSizes(*fact, m, n, M, N);
1745: MatSetType(*fact, A->type_name);
1746: MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);
1747: MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);
1748: (*fact)->factor = FACTOR_LU;
1750: PetscContainerCreate(A->comm, &c);
1751: PetscContainerSetPointer(c, lgraph_p);
1752: PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1753: return(0);
1754: }
1758: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1759: {
1761: return(0);
1762: }
1766: /*
1767: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1768: */
1769: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1770: {
1771: namespace graph_dist = boost::graph::distributed;
1773: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
1774: lgraph_type* lgraph_p;
1775: PetscContainer c;
1779: PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);
1780: PetscContainerGetPointer(c, (void **) &lgraph_p);
1781: VecCopy(b, x);
1783: PetscScalar* array_x;
1784: VecGetArray(x, &array_x);
1785: PetscInt sx;
1786: VecGetSize(x, &sx);
1787:
1788: PetscScalar* array_b;
1789: VecGetArray(b, &array_b);
1790: PetscInt sb;
1791: VecGetSize(b, &sb);
1793: lgraph_type& level_graph = *lgraph_p;
1794: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
1796: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1797: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]),
1798: ref_x(array_x, boost::extents[num_vertices(graph)]);
1800: typedef boost::iterator_property_map<array_ref_type::iterator,
1801: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
1802: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1803: vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1804:
1805: ilu_set_solve(*lgraph_p, vector_b, vector_x);
1807: return(0);
1808: }
1809: #endif
1811: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1812: PetscInt nzlocal,nsends,nrecvs;
1813: PetscMPIInt *send_rank;
1814: PetscInt *sbuf_nz,*sbuf_j,**rbuf_j;
1815: PetscScalar *sbuf_a,**rbuf_a;
1816: PetscErrorCode (*MatDestroy)(Mat);
1817: } Mat_Redundant;
1821: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1822: {
1823: PetscErrorCode ierr;
1824: Mat_Redundant *redund=(Mat_Redundant*)ptr;
1825: PetscInt i;
1828: PetscFree(redund->send_rank);
1829: PetscFree(redund->sbuf_j);
1830: PetscFree(redund->sbuf_a);
1831: for (i=0; i<redund->nrecvs; i++){
1832: PetscFree(redund->rbuf_j[i]);
1833: PetscFree(redund->rbuf_a[i]);
1834: }
1835: PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);
1836: PetscFree(redund);
1837: return(0);
1838: }
1842: PetscErrorCode MatDestroy_MatRedundant(Mat A)
1843: {
1844: PetscErrorCode ierr;
1845: PetscContainer container;
1846: Mat_Redundant *redund=PETSC_NULL;
1849: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
1850: if (container) {
1851: PetscContainerGetPointer(container,(void **)&redund);
1852: } else {
1853: SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1854: }
1855: A->ops->destroy = redund->MatDestroy;
1856: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
1857: (*A->ops->destroy)(A);
1858: PetscContainerDestroy(container);
1859: return(0);
1860: }
1864: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1865: {
1866: PetscMPIInt rank,size;
1867: MPI_Comm comm=mat->comm;
1869: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
1870: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1871: PetscInt *rowrange=mat->rmap.range;
1872: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1873: Mat A=aij->A,B=aij->B,C=*matredundant;
1874: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1875: PetscScalar *sbuf_a;
1876: PetscInt nzlocal=a->nz+b->nz;
1877: PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1878: PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1879: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1880: PetscScalar *vals,*aworkA,*aworkB;
1881: PetscMPIInt tag1,tag2,tag3,imdex;
1882: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1883: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1884: MPI_Status recv_status,*send_status;
1885: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1886: PetscInt **rbuf_j=PETSC_NULL;
1887: PetscScalar **rbuf_a=PETSC_NULL;
1888: Mat_Redundant *redund=PETSC_NULL;
1889: PetscContainer container;
1892: MPI_Comm_rank(comm,&rank);
1893: MPI_Comm_size(comm,&size);
1895: if (reuse == MAT_REUSE_MATRIX) {
1896: MatGetSize(C,&M,&N);
1897: if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1898: MatGetLocalSize(C,&M,&N);
1899: if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1900: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
1901: if (container) {
1902: PetscContainerGetPointer(container,(void **)&redund);
1903: } else {
1904: SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1905: }
1906: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1908: nsends = redund->nsends;
1909: nrecvs = redund->nrecvs;
1910: send_rank = redund->send_rank; recv_rank = send_rank + size;
1911: sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends;
1912: sbuf_j = redund->sbuf_j;
1913: sbuf_a = redund->sbuf_a;
1914: rbuf_j = redund->rbuf_j;
1915: rbuf_a = redund->rbuf_a;
1916: }
1918: if (reuse == MAT_INITIAL_MATRIX){
1919: PetscMPIInt subrank,subsize;
1920: PetscInt nleftover,np_subcomm;
1921: /* get the destination processors' id send_rank, nsends and nrecvs */
1922: MPI_Comm_rank(subcomm,&subrank);
1923: MPI_Comm_size(subcomm,&subsize);
1924: PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1925: recv_rank = send_rank + size;
1926: np_subcomm = size/nsubcomm;
1927: nleftover = size - nsubcomm*np_subcomm;
1928: nsends = 0; nrecvs = 0;
1929: for (i=0; i<size; i++){ /* i=rank*/
1930: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1931: send_rank[nsends] = i; nsends++;
1932: recv_rank[nrecvs++] = i;
1933: }
1934: }
1935: if (rank >= size - nleftover){/* this proc is a leftover processor */
1936: i = size-nleftover-1;
1937: j = 0;
1938: while (j < nsubcomm - nleftover){
1939: send_rank[nsends++] = i;
1940: i--; j++;
1941: }
1942: }
1944: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1945: for (i=0; i<nleftover; i++){
1946: recv_rank[nrecvs++] = size-nleftover+i;
1947: }
1948: }
1950: /* allocate sbuf_j, sbuf_a */
1951: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1952: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
1953: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
1954: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1955:
1956: /* copy mat's local entries into the buffers */
1957: if (reuse == MAT_INITIAL_MATRIX){
1958: rownz_max = 0;
1959: rptr = sbuf_j;
1960: cols = sbuf_j + rend-rstart + 1;
1961: vals = sbuf_a;
1962: rptr[0] = 0;
1963: for (i=0; i<rend-rstart; i++){
1964: row = i + rstart;
1965: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1966: ncols = nzA + nzB;
1967: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1968: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1969: /* load the column indices for this row into cols */
1970: lwrite = 0;
1971: for (l=0; l<nzB; l++) {
1972: if ((ctmp = bmap[cworkB[l]]) < cstart){
1973: vals[lwrite] = aworkB[l];
1974: cols[lwrite++] = ctmp;
1975: }
1976: }
1977: for (l=0; l<nzA; l++){
1978: vals[lwrite] = aworkA[l];
1979: cols[lwrite++] = cstart + cworkA[l];
1980: }
1981: for (l=0; l<nzB; l++) {
1982: if ((ctmp = bmap[cworkB[l]]) >= cend){
1983: vals[lwrite] = aworkB[l];
1984: cols[lwrite++] = ctmp;
1985: }
1986: }
1987: vals += ncols;
1988: cols += ncols;
1989: rptr[i+1] = rptr[i] + ncols;
1990: if (rownz_max < ncols) rownz_max = ncols;
1991: }
1992: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1993: } else { /* only copy matrix values into sbuf_a */
1994: rptr = sbuf_j;
1995: vals = sbuf_a;
1996: rptr[0] = 0;
1997: for (i=0; i<rend-rstart; i++){
1998: row = i + rstart;
1999: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2000: ncols = nzA + nzB;
2001: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2002: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2003: lwrite = 0;
2004: for (l=0; l<nzB; l++) {
2005: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2006: }
2007: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2008: for (l=0; l<nzB; l++) {
2009: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2010: }
2011: vals += ncols;
2012: rptr[i+1] = rptr[i] + ncols;
2013: }
2014: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2016: /* send nzlocal to others, and recv other's nzlocal */
2017: /*--------------------------------------------------*/
2018: if (reuse == MAT_INITIAL_MATRIX){
2019: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2020: s_waits2 = s_waits3 + nsends;
2021: s_waits1 = s_waits2 + nsends;
2022: r_waits1 = s_waits1 + nsends;
2023: r_waits2 = r_waits1 + nrecvs;
2024: r_waits3 = r_waits2 + nrecvs;
2025: } else {
2026: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2027: r_waits3 = s_waits3 + nsends;
2028: }
2030: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2031: if (reuse == MAT_INITIAL_MATRIX){
2032: /* get new tags to keep the communication clean */
2033: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2034: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2035: PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2036: rbuf_nz = sbuf_nz + nsends;
2037:
2038: /* post receives of other's nzlocal */
2039: for (i=0; i<nrecvs; i++){
2040: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2041: }
2042: /* send nzlocal to others */
2043: for (i=0; i<nsends; i++){
2044: sbuf_nz[i] = nzlocal;
2045: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2046: }
2047: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2048: count = nrecvs;
2049: while (count) {
2050: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2051: recv_rank[imdex] = recv_status.MPI_SOURCE;
2052: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2053: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2055: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2056: rbuf_nz[imdex] += i + 2;
2057: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2058: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2059: count--;
2060: }
2061: /* wait on sends of nzlocal */
2062: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2063: /* send mat->i,j to others, and recv from other's */
2064: /*------------------------------------------------*/
2065: for (i=0; i<nsends; i++){
2066: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2067: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2068: }
2069: /* wait on receives of mat->i,j */
2070: /*------------------------------*/
2071: count = nrecvs;
2072: while (count) {
2073: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2074: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2075: count--;
2076: }
2077: /* wait on sends of mat->i,j */
2078: /*---------------------------*/
2079: if (nsends) {
2080: MPI_Waitall(nsends,s_waits2,send_status);
2081: }
2082: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2084: /* post receives, send and receive mat->a */
2085: /*----------------------------------------*/
2086: for (imdex=0; imdex<nrecvs; imdex++) {
2087: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2088: }
2089: for (i=0; i<nsends; i++){
2090: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2091: }
2092: count = nrecvs;
2093: while (count) {
2094: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2095: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2096: count--;
2097: }
2098: if (nsends) {
2099: MPI_Waitall(nsends,s_waits3,send_status);
2100: }
2102: PetscFree2(s_waits3,send_status);
2103:
2104: /* create redundant matrix */
2105: /*-------------------------*/
2106: if (reuse == MAT_INITIAL_MATRIX){
2107: /* compute rownz_max for preallocation */
2108: for (imdex=0; imdex<nrecvs; imdex++){
2109: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2110: rptr = rbuf_j[imdex];
2111: for (i=0; i<j; i++){
2112: ncols = rptr[i+1] - rptr[i];
2113: if (rownz_max < ncols) rownz_max = ncols;
2114: }
2115: }
2116:
2117: MatCreate(subcomm,&C);
2118: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2119: MatSetFromOptions(C);
2120: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
2121: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
2122: } else {
2123: C = *matredundant;
2124: }
2126: /* insert local matrix entries */
2127: rptr = sbuf_j;
2128: cols = sbuf_j + rend-rstart + 1;
2129: vals = sbuf_a;
2130: for (i=0; i<rend-rstart; i++){
2131: row = i + rstart;
2132: ncols = rptr[i+1] - rptr[i];
2133: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2134: vals += ncols;
2135: cols += ncols;
2136: }
2137: /* insert received matrix entries */
2138: for (imdex=0; imdex<nrecvs; imdex++){
2139: rstart = rowrange[recv_rank[imdex]];
2140: rend = rowrange[recv_rank[imdex]+1];
2141: rptr = rbuf_j[imdex];
2142: cols = rbuf_j[imdex] + rend-rstart + 1;
2143: vals = rbuf_a[imdex];
2144: for (i=0; i<rend-rstart; i++){
2145: row = i + rstart;
2146: ncols = rptr[i+1] - rptr[i];
2147: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2148: vals += ncols;
2149: cols += ncols;
2150: }
2151: }
2152: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2153: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2154: MatGetSize(C,&M,&N);
2155: if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2156: if (reuse == MAT_INITIAL_MATRIX){
2157: PetscContainer container;
2158: *matredundant = C;
2159: /* create a supporting struct and attach it to C for reuse */
2160: PetscNew(Mat_Redundant,&redund);
2161: PetscContainerCreate(PETSC_COMM_SELF,&container);
2162: PetscContainerSetPointer(container,redund);
2163: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2164: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2165:
2166: redund->nzlocal = nzlocal;
2167: redund->nsends = nsends;
2168: redund->nrecvs = nrecvs;
2169: redund->send_rank = send_rank;
2170: redund->sbuf_nz = sbuf_nz;
2171: redund->sbuf_j = sbuf_j;
2172: redund->sbuf_a = sbuf_a;
2173: redund->rbuf_j = rbuf_j;
2174: redund->rbuf_a = rbuf_a;
2176: redund->MatDestroy = C->ops->destroy;
2177: C->ops->destroy = MatDestroy_MatRedundant;
2178: }
2179: return(0);
2180: }
2182: /* -------------------------------------------------------------------*/
2183: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2184: MatGetRow_MPIAIJ,
2185: MatRestoreRow_MPIAIJ,
2186: MatMult_MPIAIJ,
2187: /* 4*/ MatMultAdd_MPIAIJ,
2188: MatMultTranspose_MPIAIJ,
2189: MatMultTransposeAdd_MPIAIJ,
2190: #ifdef PETSC_HAVE_PBGL
2191: MatSolve_MPIAIJ,
2192: #else
2193: 0,
2194: #endif
2195: 0,
2196: 0,
2197: /*10*/ 0,
2198: 0,
2199: 0,
2200: MatRelax_MPIAIJ,
2201: MatTranspose_MPIAIJ,
2202: /*15*/ MatGetInfo_MPIAIJ,
2203: MatEqual_MPIAIJ,
2204: MatGetDiagonal_MPIAIJ,
2205: MatDiagonalScale_MPIAIJ,
2206: MatNorm_MPIAIJ,
2207: /*20*/ MatAssemblyBegin_MPIAIJ,
2208: MatAssemblyEnd_MPIAIJ,
2209: 0,
2210: MatSetOption_MPIAIJ,
2211: MatZeroEntries_MPIAIJ,
2212: /*25*/ MatZeroRows_MPIAIJ,
2213: 0,
2214: #ifdef PETSC_HAVE_PBGL
2215: MatLUFactorNumeric_MPIAIJ,
2216: #else
2217: 0,
2218: #endif
2219: 0,
2220: 0,
2221: /*30*/ MatSetUpPreallocation_MPIAIJ,
2222: #ifdef PETSC_HAVE_PBGL
2223: MatILUFactorSymbolic_MPIAIJ,
2224: #else
2225: 0,
2226: #endif
2227: 0,
2228: 0,
2229: 0,
2230: /*35*/ MatDuplicate_MPIAIJ,
2231: 0,
2232: 0,
2233: 0,
2234: 0,
2235: /*40*/ MatAXPY_MPIAIJ,
2236: MatGetSubMatrices_MPIAIJ,
2237: MatIncreaseOverlap_MPIAIJ,
2238: MatGetValues_MPIAIJ,
2239: MatCopy_MPIAIJ,
2240: /*45*/ 0,
2241: MatScale_MPIAIJ,
2242: 0,
2243: 0,
2244: 0,
2245: /*50*/ MatSetBlockSize_MPIAIJ,
2246: 0,
2247: 0,
2248: 0,
2249: 0,
2250: /*55*/ MatFDColoringCreate_MPIAIJ,
2251: 0,
2252: MatSetUnfactored_MPIAIJ,
2253: MatPermute_MPIAIJ,
2254: 0,
2255: /*60*/ MatGetSubMatrix_MPIAIJ,
2256: MatDestroy_MPIAIJ,
2257: MatView_MPIAIJ,
2258: 0,
2259: 0,
2260: /*65*/ 0,
2261: 0,
2262: 0,
2263: 0,
2264: 0,
2265: /*70*/ 0,
2266: 0,
2267: MatSetColoring_MPIAIJ,
2268: #if defined(PETSC_HAVE_ADIC)
2269: MatSetValuesAdic_MPIAIJ,
2270: #else
2271: 0,
2272: #endif
2273: MatSetValuesAdifor_MPIAIJ,
2274: /*75*/ 0,
2275: 0,
2276: 0,
2277: 0,
2278: 0,
2279: /*80*/ 0,
2280: 0,
2281: 0,
2282: 0,
2283: /*84*/ MatLoad_MPIAIJ,
2284: 0,
2285: 0,
2286: 0,
2287: 0,
2288: 0,
2289: /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2290: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2291: MatMatMultNumeric_MPIAIJ_MPIAIJ,
2292: MatPtAP_Basic,
2293: MatPtAPSymbolic_MPIAIJ,
2294: /*95*/ MatPtAPNumeric_MPIAIJ,
2295: 0,
2296: 0,
2297: 0,
2298: 0,
2299: /*100*/0,
2300: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2301: MatPtAPNumeric_MPIAIJ_MPIAIJ,
2302: MatConjugate_MPIAIJ,
2303: 0,
2304: /*105*/MatSetValuesRow_MPIAIJ,
2305: MatRealPart_MPIAIJ,
2306: MatImaginaryPart_MPIAIJ,
2307: 0,
2308: 0,
2309: /*110*/0,
2310: MatGetRedundantMatrix_MPIAIJ};
2312: /* ----------------------------------------------------------------------------------------*/
2317: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2318: {
2319: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2323: MatStoreValues(aij->A);
2324: MatStoreValues(aij->B);
2325: return(0);
2326: }
2332: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2333: {
2334: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2338: MatRetrieveValues(aij->A);
2339: MatRetrieveValues(aij->B);
2340: return(0);
2341: }
2344: #include petscpc.h
2348: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2349: {
2350: Mat_MPIAIJ *b;
2352: PetscInt i;
2355: B->preallocated = PETSC_TRUE;
2356: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2357: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2358: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2359: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2361: B->rmap.bs = B->cmap.bs = 1;
2362: PetscMapSetUp(&B->rmap);
2363: PetscMapSetUp(&B->cmap);
2364: if (d_nnz) {
2365: for (i=0; i<B->rmap.n; i++) {
2366: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2367: }
2368: }
2369: if (o_nnz) {
2370: for (i=0; i<B->rmap.n; i++) {
2371: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2372: }
2373: }
2374: b = (Mat_MPIAIJ*)B->data;
2376: /* Explicitly create 2 MATSEQAIJ matrices. */
2377: MatCreate(PETSC_COMM_SELF,&b->A);
2378: MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);
2379: MatSetType(b->A,MATSEQAIJ);
2380: PetscLogObjectParent(B,b->A);
2381: MatCreate(PETSC_COMM_SELF,&b->B);
2382: MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);
2383: MatSetType(b->B,MATSEQAIJ);
2384: PetscLogObjectParent(B,b->B);
2386: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
2387: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
2389: return(0);
2390: }
2395: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2396: {
2397: Mat mat;
2398: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2402: *newmat = 0;
2403: MatCreate(matin->comm,&mat);
2404: MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);
2405: MatSetType(mat,matin->type_name);
2406: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
2407: a = (Mat_MPIAIJ*)mat->data;
2408:
2409: mat->factor = matin->factor;
2410: mat->rmap.bs = matin->rmap.bs;
2411: mat->assembled = PETSC_TRUE;
2412: mat->insertmode = NOT_SET_VALUES;
2413: mat->preallocated = PETSC_TRUE;
2415: a->size = oldmat->size;
2416: a->rank = oldmat->rank;
2417: a->donotstash = oldmat->donotstash;
2418: a->roworiented = oldmat->roworiented;
2419: a->rowindices = 0;
2420: a->rowvalues = 0;
2421: a->getrowactive = PETSC_FALSE;
2423: PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);
2424: PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);
2426: MatStashCreate_Private(matin->comm,1,&mat->stash);
2427: if (oldmat->colmap) {
2428: #if defined (PETSC_USE_CTABLE)
2429: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2430: #else
2431: PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);
2432: PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));
2433: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));
2434: #endif
2435: } else a->colmap = 0;
2436: if (oldmat->garray) {
2437: PetscInt len;
2438: len = oldmat->B->cmap.n;
2439: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
2440: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2441: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
2442: } else a->garray = 0;
2443:
2444: VecDuplicate(oldmat->lvec,&a->lvec);
2445: PetscLogObjectParent(mat,a->lvec);
2446: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2447: PetscLogObjectParent(mat,a->Mvctx);
2448: MatDuplicate(oldmat->A,cpvalues,&a->A);
2449: PetscLogObjectParent(mat,a->A);
2450: MatDuplicate(oldmat->B,cpvalues,&a->B);
2451: PetscLogObjectParent(mat,a->B);
2452: PetscFListDuplicate(matin->qlist,&mat->qlist);
2453: *newmat = mat;
2454: return(0);
2455: }
2457: #include petscsys.h
2461: PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2462: {
2463: Mat A;
2464: PetscScalar *vals,*svals;
2465: MPI_Comm comm = ((PetscObject)viewer)->comm;
2466: MPI_Status status;
2468: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2469: PetscInt i,nz,j,rstart,rend,mmax;
2470: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
2471: PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2472: PetscInt cend,cstart,n,*rowners;
2473: int fd;
2476: MPI_Comm_size(comm,&size);
2477: MPI_Comm_rank(comm,&rank);
2478: if (!rank) {
2479: PetscViewerBinaryGetDescriptor(viewer,&fd);
2480: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
2481: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2482: }
2484: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2485: M = header[1]; N = header[2];
2486: /* determine ownership of all rows */
2487: m = M/size + ((M % size) > rank);
2488: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
2489: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
2491: /* First process needs enough room for process with most rows */
2492: if (!rank) {
2493: mmax = rowners[1];
2494: for (i=2; i<size; i++) {
2495: mmax = PetscMax(mmax,rowners[i]);
2496: }
2497: } else mmax = m;
2499: rowners[0] = 0;
2500: for (i=2; i<=size; i++) {
2501: rowners[i] += rowners[i-1];
2502: }
2503: rstart = rowners[rank];
2504: rend = rowners[rank+1];
2506: /* distribute row lengths to all processors */
2507: PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);
2508: if (!rank) {
2509: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
2510: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
2511: PetscMalloc(size*sizeof(PetscInt),&procsnz);
2512: PetscMemzero(procsnz,size*sizeof(PetscInt));
2513: for (j=0; j<m; j++) {
2514: procsnz[0] += ourlens[j];
2515: }
2516: for (i=1; i<size; i++) {
2517: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
2518: /* calculate the number of nonzeros on each processor */
2519: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2520: procsnz[i] += rowlengths[j];
2521: }
2522: MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
2523: }
2524: PetscFree(rowlengths);
2525: } else {
2526: MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);
2527: }
2529: if (!rank) {
2530: /* determine max buffer needed and allocate it */
2531: maxnz = 0;
2532: for (i=0; i<size; i++) {
2533: maxnz = PetscMax(maxnz,procsnz[i]);
2534: }
2535: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
2537: /* read in my part of the matrix column indices */
2538: nz = procsnz[0];
2539: PetscMalloc(nz*sizeof(PetscInt),&mycols);
2540: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2542: /* read in every one elses and ship off */
2543: for (i=1; i<size; i++) {
2544: nz = procsnz[i];
2545: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2546: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2547: }
2548: PetscFree(cols);
2549: } else {
2550: /* determine buffer space needed for message */
2551: nz = 0;
2552: for (i=0; i<m; i++) {
2553: nz += ourlens[i];
2554: }
2555: PetscMalloc(nz*sizeof(PetscInt),&mycols);
2557: /* receive message of column indices*/
2558: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2559: MPI_Get_count(&status,MPIU_INT,&maxnz);
2560: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2561: }
2563: /* determine column ownership if matrix is not square */
2564: if (N != M) {
2565: n = N/size + ((N % size) > rank);
2566: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
2567: cstart = cend - n;
2568: } else {
2569: cstart = rstart;
2570: cend = rend;
2571: n = cend - cstart;
2572: }
2574: /* loop over local rows, determining number of off diagonal entries */
2575: PetscMemzero(offlens,m*sizeof(PetscInt));
2576: jj = 0;
2577: for (i=0; i<m; i++) {
2578: for (j=0; j<ourlens[i]; j++) {
2579: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2580: jj++;
2581: }
2582: }
2584: /* create our matrix */
2585: for (i=0; i<m; i++) {
2586: ourlens[i] -= offlens[i];
2587: }
2588: MatCreate(comm,&A);
2589: MatSetSizes(A,m,n,M,N);
2590: MatSetType(A,type);
2591: MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);
2593: MatSetOption(A,MAT_COLUMNS_SORTED);
2594: for (i=0; i<m; i++) {
2595: ourlens[i] += offlens[i];
2596: }
2598: if (!rank) {
2599: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
2601: /* read in my part of the matrix numerical values */
2602: nz = procsnz[0];
2603: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2604:
2605: /* insert into matrix */
2606: jj = rstart;
2607: smycols = mycols;
2608: svals = vals;
2609: for (i=0; i<m; i++) {
2610: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2611: smycols += ourlens[i];
2612: svals += ourlens[i];
2613: jj++;
2614: }
2616: /* read in other processors and ship out */
2617: for (i=1; i<size; i++) {
2618: nz = procsnz[i];
2619: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2620: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2621: }
2622: PetscFree(procsnz);
2623: } else {
2624: /* receive numeric values */
2625: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2627: /* receive message of values*/
2628: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2629: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2630: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2632: /* insert into matrix */
2633: jj = rstart;
2634: smycols = mycols;
2635: svals = vals;
2636: for (i=0; i<m; i++) {
2637: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2638: smycols += ourlens[i];
2639: svals += ourlens[i];
2640: jj++;
2641: }
2642: }
2643: PetscFree2(ourlens,offlens);
2644: PetscFree(vals);
2645: PetscFree(mycols);
2646: PetscFree(rowners);
2648: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2649: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2650: *newmat = A;
2651: return(0);
2652: }
2656: /*
2657: Not great since it makes two copies of the submatrix, first an SeqAIJ
2658: in local and then by concatenating the local matrices the end result.
2659: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2660: */
2661: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2662: {
2664: PetscMPIInt rank,size;
2665: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j;
2666: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2667: Mat *local,M,Mreuse;
2668: PetscScalar *vwork,*aa;
2669: MPI_Comm comm = mat->comm;
2670: Mat_SeqAIJ *aij;
2674: MPI_Comm_rank(comm,&rank);
2675: MPI_Comm_size(comm,&size);
2677: if (call == MAT_REUSE_MATRIX) {
2678: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2679: if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2680: local = &Mreuse;
2681: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2682: } else {
2683: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2684: Mreuse = *local;
2685: PetscFree(local);
2686: }
2688: /*
2689: m - number of local rows
2690: n - number of columns (same on all processors)
2691: rstart - first row in new global matrix generated
2692: */
2693: MatGetSize(Mreuse,&m,&n);
2694: if (call == MAT_INITIAL_MATRIX) {
2695: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2696: ii = aij->i;
2697: jj = aij->j;
2699: /*
2700: Determine the number of non-zeros in the diagonal and off-diagonal
2701: portions of the matrix in order to do correct preallocation
2702: */
2704: /* first get start and end of "diagonal" columns */
2705: if (csize == PETSC_DECIDE) {
2706: ISGetSize(isrow,&mglobal);
2707: if (mglobal == n) { /* square matrix */
2708: nlocal = m;
2709: } else {
2710: nlocal = n/size + ((n % size) > rank);
2711: }
2712: } else {
2713: nlocal = csize;
2714: }
2715: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
2716: rstart = rend - nlocal;
2717: if (rank == size - 1 && rend != n) {
2718: SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2719: }
2721: /* next, compute all the lengths */
2722: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
2723: olens = dlens + m;
2724: for (i=0; i<m; i++) {
2725: jend = ii[i+1] - ii[i];
2726: olen = 0;
2727: dlen = 0;
2728: for (j=0; j<jend; j++) {
2729: if (*jj < rstart || *jj >= rend) olen++;
2730: else dlen++;
2731: jj++;
2732: }
2733: olens[i] = olen;
2734: dlens[i] = dlen;
2735: }
2736: MatCreate(comm,&M);
2737: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
2738: MatSetType(M,mat->type_name);
2739: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
2740: PetscFree(dlens);
2741: } else {
2742: PetscInt ml,nl;
2744: M = *newmat;
2745: MatGetLocalSize(M,&ml,&nl);
2746: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2747: MatZeroEntries(M);
2748: /*
2749: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2750: rather than the slower MatSetValues().
2751: */
2752: M->was_assembled = PETSC_TRUE;
2753: M->assembled = PETSC_FALSE;
2754: }
2755: MatGetOwnershipRange(M,&rstart,&rend);
2756: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2757: ii = aij->i;
2758: jj = aij->j;
2759: aa = aij->a;
2760: for (i=0; i<m; i++) {
2761: row = rstart + i;
2762: nz = ii[i+1] - ii[i];
2763: cwork = jj; jj += nz;
2764: vwork = aa; aa += nz;
2765: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2766: }
2768: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2769: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2770: *newmat = M;
2772: /* save submatrix used in processor for next request */
2773: if (call == MAT_INITIAL_MATRIX) {
2774: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2775: PetscObjectDereference((PetscObject)Mreuse);
2776: }
2778: return(0);
2779: }
2784: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2785: {
2786: PetscInt m,cstart, cend,j,nnz,i,d;
2787: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2788: const PetscInt *JJ;
2789: PetscScalar *values;
2793: if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2795: B->rmap.bs = B->cmap.bs = 1;
2796: PetscMapSetUp(&B->rmap);
2797: PetscMapSetUp(&B->cmap);
2798: m = B->rmap.n;
2799: cstart = B->cmap.rstart;
2800: cend = B->cmap.rend;
2801: rstart = B->rmap.rstart;
2803: PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);
2804: o_nnz = d_nnz + m;
2806: for (i=0; i<m; i++) {
2807: nnz = Ii[i+1]- Ii[i];
2808: JJ = J + Ii[i];
2809: nnz_max = PetscMax(nnz_max,nnz);
2810: if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2811: for (j=0; j<nnz; j++) {
2812: if (*JJ >= cstart) break;
2813: JJ++;
2814: }
2815: d = 0;
2816: for (; j<nnz; j++) {
2817: if (*JJ++ >= cend) break;
2818: d++;
2819: }
2820: d_nnz[i] = d;
2821: o_nnz[i] = nnz - d;
2822: }
2823: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
2824: PetscFree(d_nnz);
2826: if (v) values = (PetscScalar*)v;
2827: else {
2828: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
2829: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
2830: }
2832: MatSetOption(B,MAT_COLUMNS_SORTED);
2833: for (i=0; i<m; i++) {
2834: ii = i + rstart;
2835: nnz = Ii[i+1]- Ii[i];
2836: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
2837: }
2838: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2839: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2840: MatSetOption(B,MAT_COLUMNS_UNSORTED);
2842: if (!v) {
2843: PetscFree(values);
2844: }
2845: return(0);
2846: }
2851: /*@
2852: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2853: (the default parallel PETSc format).
2855: Collective on MPI_Comm
2857: Input Parameters:
2858: + B - the matrix
2859: . i - the indices into j for the start of each local row (starts with zero)
2860: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2861: - v - optional values in the matrix
2863: Level: developer
2865: Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2866: storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2868: .keywords: matrix, aij, compressed row, sparse, parallel
2870: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2871: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2872: @*/
2873: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2874: {
2875: PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2878: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);
2879: if (f) {
2880: (*f)(B,i,j,v);
2881: }
2882: return(0);
2883: }
2887: /*@C
2888: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2889: (the default parallel PETSc format). For good matrix assembly performance
2890: the user should preallocate the matrix storage by setting the parameters
2891: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2892: performance can be increased by more than a factor of 50.
2894: Collective on MPI_Comm
2896: Input Parameters:
2897: + A - the matrix
2898: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2899: (same value is used for all local rows)
2900: . d_nnz - array containing the number of nonzeros in the various rows of the
2901: DIAGONAL portion of the local submatrix (possibly different for each row)
2902: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2903: The size of this array is equal to the number of local rows, i.e 'm'.
2904: You must leave room for the diagonal entry even if it is zero.
2905: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2906: submatrix (same value is used for all local rows).
2907: - o_nnz - array containing the number of nonzeros in the various rows of the
2908: OFF-DIAGONAL portion of the local submatrix (possibly different for
2909: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2910: structure. The size of this array is equal to the number
2911: of local rows, i.e 'm'.
2913: If the *_nnz parameter is given then the *_nz parameter is ignored
2915: The AIJ format (also called the Yale sparse matrix format or
2916: compressed row storage (CSR)), is fully compatible with standard Fortran 77
2917: storage. The stored row and column indices begin with zero. See the users manual for details.
2919: The parallel matrix is partitioned such that the first m0 rows belong to
2920: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2921: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2923: The DIAGONAL portion of the local submatrix of a processor can be defined
2924: as the submatrix which is obtained by extraction the part corresponding
2925: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2926: first row that belongs to the processor, and r2 is the last row belonging
2927: to the this processor. This is a square mxm matrix. The remaining portion
2928: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2930: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2932: Example usage:
2933:
2934: Consider the following 8x8 matrix with 34 non-zero values, that is
2935: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2936: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2937: as follows:
2939: .vb
2940: 1 2 0 | 0 3 0 | 0 4
2941: Proc0 0 5 6 | 7 0 0 | 8 0
2942: 9 0 10 | 11 0 0 | 12 0
2943: -------------------------------------
2944: 13 0 14 | 15 16 17 | 0 0
2945: Proc1 0 18 0 | 19 20 21 | 0 0
2946: 0 0 0 | 22 23 0 | 24 0
2947: -------------------------------------
2948: Proc2 25 26 27 | 0 0 28 | 29 0
2949: 30 0 0 | 31 32 33 | 0 34
2950: .ve
2952: This can be represented as a collection of submatrices as:
2954: .vb
2955: A B C
2956: D E F
2957: G H I
2958: .ve
2960: Where the submatrices A,B,C are owned by proc0, D,E,F are
2961: owned by proc1, G,H,I are owned by proc2.
2963: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2964: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2965: The 'M','N' parameters are 8,8, and have the same values on all procs.
2967: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2968: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2969: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2970: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2971: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2972: matrix, ans [DF] as another SeqAIJ matrix.
2974: When d_nz, o_nz parameters are specified, d_nz storage elements are
2975: allocated for every row of the local diagonal submatrix, and o_nz
2976: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2977: One way to choose d_nz and o_nz is to use the max nonzerors per local
2978: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2979: In this case, the values of d_nz,o_nz are:
2980: .vb
2981: proc0 : dnz = 2, o_nz = 2
2982: proc1 : dnz = 3, o_nz = 2
2983: proc2 : dnz = 1, o_nz = 4
2984: .ve
2985: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2986: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2987: for proc3. i.e we are using 12+15+10=37 storage locations to store
2988: 34 values.
2990: When d_nnz, o_nnz parameters are specified, the storage is specified
2991: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2992: In the above case the values for d_nnz,o_nnz are:
2993: .vb
2994: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2995: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2996: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2997: .ve
2998: Here the space allocated is sum of all the above values i.e 34, and
2999: hence pre-allocation is perfect.
3001: Level: intermediate
3003: .keywords: matrix, aij, compressed row, sparse, parallel
3005: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3006: MPIAIJ
3007: @*/
3008: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3009: {
3010: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3013: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
3014: if (f) {
3015: (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
3016: }
3017: return(0);
3018: }
3022: /*@C
3023: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3024: CSR format the local rows.
3026: Collective on MPI_Comm
3028: Input Parameters:
3029: + comm - MPI communicator
3030: . m - number of local rows (Cannot be PETSC_DECIDE)
3031: . n - This value should be the same as the local size used in creating the
3032: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3033: calculated if N is given) For square matrices n is almost always m.
3034: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3035: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3036: . i - row indices
3037: . j - column indices
3038: - a - matrix values
3040: Output Parameter:
3041: . mat - the matrix
3043: Level: intermediate
3045: Notes:
3046: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3047: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3048: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3050: The i and j indices are 0 based
3052: .keywords: matrix, aij, compressed row, sparse, parallel
3054: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3055: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3056: @*/
3057: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3058: {
3062: if (i[0]) {
3063: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3064: }
3065: if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3066: MatCreate(comm,mat);
3067: MatSetSizes(*mat,m,n,M,N);
3068: MatSetType(*mat,MATMPIAIJ);
3069: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
3070: return(0);
3071: }
3075: /*@C
3076: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3077: (the default parallel PETSc format). For good matrix assembly performance
3078: the user should preallocate the matrix storage by setting the parameters
3079: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3080: performance can be increased by more than a factor of 50.
3082: Collective on MPI_Comm
3084: Input Parameters:
3085: + comm - MPI communicator
3086: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3087: This value should be the same as the local size used in creating the
3088: y vector for the matrix-vector product y = Ax.
3089: . n - This value should be the same as the local size used in creating the
3090: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3091: calculated if N is given) For square matrices n is almost always m.
3092: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3093: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3094: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3095: (same value is used for all local rows)
3096: . d_nnz - array containing the number of nonzeros in the various rows of the
3097: DIAGONAL portion of the local submatrix (possibly different for each row)
3098: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3099: The size of this array is equal to the number of local rows, i.e 'm'.
3100: You must leave room for the diagonal entry even if it is zero.
3101: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3102: submatrix (same value is used for all local rows).
3103: - o_nnz - array containing the number of nonzeros in the various rows of the
3104: OFF-DIAGONAL portion of the local submatrix (possibly different for
3105: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3106: structure. The size of this array is equal to the number
3107: of local rows, i.e 'm'.
3109: Output Parameter:
3110: . A - the matrix
3112: Notes:
3113: If the *_nnz parameter is given then the *_nz parameter is ignored
3115: m,n,M,N parameters specify the size of the matrix, and its partitioning across
3116: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3117: storage requirements for this matrix.
3119: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
3120: processor than it must be used on all processors that share the object for
3121: that argument.
3123: The user MUST specify either the local or global matrix dimensions
3124: (possibly both).
3126: The parallel matrix is partitioned across processors such that the
3127: first m0 rows belong to process 0, the next m1 rows belong to
3128: process 1, the next m2 rows belong to process 2 etc.. where
3129: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3130: values corresponding to [m x N] submatrix.
3132: The columns are logically partitioned with the n0 columns belonging
3133: to 0th partition, the next n1 columns belonging to the next
3134: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3136: The DIAGONAL portion of the local submatrix on any given processor
3137: is the submatrix corresponding to the rows and columns m,n
3138: corresponding to the given processor. i.e diagonal matrix on
3139: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3140: etc. The remaining portion of the local submatrix [m x (N-n)]
3141: constitute the OFF-DIAGONAL portion. The example below better
3142: illustrates this concept.
3144: For a square global matrix we define each processor's diagonal portion
3145: to be its local rows and the corresponding columns (a square submatrix);
3146: each processor's off-diagonal portion encompasses the remainder of the
3147: local matrix (a rectangular submatrix).
3149: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3151: When calling this routine with a single process communicator, a matrix of
3152: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
3153: type of communicator, use the construction mechanism:
3154: MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3156: By default, this format uses inodes (identical nodes) when possible.
3157: We search for consecutive rows with the same nonzero structure, thereby
3158: reusing matrix information to achieve increased efficiency.
3160: Options Database Keys:
3161: + -mat_no_inode - Do not use inodes
3162: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3163: - -mat_aij_oneindex - Internally use indexing starting at 1
3164: rather than 0. Note that when calling MatSetValues(),
3165: the user still MUST index entries starting at 0!
3168: Example usage:
3169:
3170: Consider the following 8x8 matrix with 34 non-zero values, that is
3171: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3172: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3173: as follows:
3175: .vb
3176: 1 2 0 | 0 3 0 | 0 4
3177: Proc0 0 5 6 | 7 0 0 | 8 0
3178: 9 0 10 | 11 0 0 | 12 0
3179: -------------------------------------
3180: 13 0 14 | 15 16 17 | 0 0
3181: Proc1 0 18 0 | 19 20 21 | 0 0
3182: 0 0 0 | 22 23 0 | 24 0
3183: -------------------------------------
3184: Proc2 25 26 27 | 0 0 28 | 29 0
3185: 30 0 0 | 31 32 33 | 0 34
3186: .ve
3188: This can be represented as a collection of submatrices as:
3190: .vb
3191: A B C
3192: D E F
3193: G H I
3194: .ve
3196: Where the submatrices A,B,C are owned by proc0, D,E,F are
3197: owned by proc1, G,H,I are owned by proc2.
3199: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3200: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3201: The 'M','N' parameters are 8,8, and have the same values on all procs.
3203: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3204: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3205: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3206: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3207: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3208: matrix, ans [DF] as another SeqAIJ matrix.
3210: When d_nz, o_nz parameters are specified, d_nz storage elements are
3211: allocated for every row of the local diagonal submatrix, and o_nz
3212: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3213: One way to choose d_nz and o_nz is to use the max nonzerors per local
3214: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3215: In this case, the values of d_nz,o_nz are:
3216: .vb
3217: proc0 : dnz = 2, o_nz = 2
3218: proc1 : dnz = 3, o_nz = 2
3219: proc2 : dnz = 1, o_nz = 4
3220: .ve
3221: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3222: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3223: for proc3. i.e we are using 12+15+10=37 storage locations to store
3224: 34 values.
3226: When d_nnz, o_nnz parameters are specified, the storage is specified
3227: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3228: In the above case the values for d_nnz,o_nnz are:
3229: .vb
3230: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3231: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3232: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3233: .ve
3234: Here the space allocated is sum of all the above values i.e 34, and
3235: hence pre-allocation is perfect.
3237: Level: intermediate
3239: .keywords: matrix, aij, compressed row, sparse, parallel
3241: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3242: MPIAIJ, MatCreateMPIAIJWithArrays()
3243: @*/
3244: PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3245: {
3247: PetscMPIInt size;
3250: MatCreate(comm,A);
3251: MatSetSizes(*A,m,n,M,N);
3252: MPI_Comm_size(comm,&size);
3253: if (size > 1) {
3254: MatSetType(*A,MATMPIAIJ);
3255: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
3256: } else {
3257: MatSetType(*A,MATSEQAIJ);
3258: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
3259: }
3260: return(0);
3261: }
3265: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3266: {
3267: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3270: *Ad = a->A;
3271: *Ao = a->B;
3272: *colmap = a->garray;
3273: return(0);
3274: }
3278: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3279: {
3281: PetscInt i;
3282: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3285: if (coloring->ctype == IS_COLORING_GLOBAL) {
3286: ISColoringValue *allcolors,*colors;
3287: ISColoring ocoloring;
3289: /* set coloring for diagonal portion */
3290: MatSetColoring_SeqAIJ(a->A,coloring);
3292: /* set coloring for off-diagonal portion */
3293: ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
3294: PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);
3295: for (i=0; i<a->B->cmap.n; i++) {
3296: colors[i] = allcolors[a->garray[i]];
3297: }
3298: PetscFree(allcolors);
3299: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);
3300: MatSetColoring_SeqAIJ(a->B,ocoloring);
3301: ISColoringDestroy(ocoloring);
3302: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3303: ISColoringValue *colors;
3304: PetscInt *larray;
3305: ISColoring ocoloring;
3307: /* set coloring for diagonal portion */
3308: PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);
3309: for (i=0; i<a->A->cmap.n; i++) {
3310: larray[i] = i + A->cmap.rstart;
3311: }
3312: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);
3313: PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);
3314: for (i=0; i<a->A->cmap.n; i++) {
3315: colors[i] = coloring->colors[larray[i]];
3316: }
3317: PetscFree(larray);
3318: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);
3319: MatSetColoring_SeqAIJ(a->A,ocoloring);
3320: ISColoringDestroy(ocoloring);
3322: /* set coloring for off-diagonal portion */
3323: PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);
3324: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);
3325: PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);
3326: for (i=0; i<a->B->cmap.n; i++) {
3327: colors[i] = coloring->colors[larray[i]];
3328: }
3329: PetscFree(larray);
3330: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);
3331: MatSetColoring_SeqAIJ(a->B,ocoloring);
3332: ISColoringDestroy(ocoloring);
3333: } else {
3334: SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3335: }
3337: return(0);
3338: }
3340: #if defined(PETSC_HAVE_ADIC)
3343: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3344: {
3345: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3349: MatSetValuesAdic_SeqAIJ(a->A,advalues);
3350: MatSetValuesAdic_SeqAIJ(a->B,advalues);
3351: return(0);
3352: }
3353: #endif
3357: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3358: {
3359: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3363: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
3364: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
3365: return(0);
3366: }
3370: /*@C
3371: MatMerge - Creates a single large PETSc matrix by concatinating sequential
3372: matrices from each processor
3374: Collective on MPI_Comm
3376: Input Parameters:
3377: + comm - the communicators the parallel matrix will live on
3378: . inmat - the input sequential matrices
3379: . n - number of local columns (or PETSC_DECIDE)
3380: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3382: Output Parameter:
3383: . outmat - the parallel matrix generated
3385: Level: advanced
3387: Notes: The number of columns of the matrix in EACH processor MUST be the same.
3389: @*/
3390: PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3391: {
3393: PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz;
3394: PetscInt *indx;
3395: PetscScalar *values;
3398: MatGetSize(inmat,&m,&N);
3399: if (scall == MAT_INITIAL_MATRIX){
3400: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3401: if (n == PETSC_DECIDE){
3402: PetscSplitOwnership(comm,&n,&N);
3403: }
3404: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
3405: rstart -= m;
3407: MatPreallocateInitialize(comm,m,n,dnz,onz);
3408: for (i=0;i<m;i++) {
3409: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
3410: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
3411: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
3412: }
3413: /* This routine will ONLY return MPIAIJ type matrix */
3414: MatCreate(comm,outmat);
3415: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3416: MatSetType(*outmat,MATMPIAIJ);
3417: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
3418: MatPreallocateFinalize(dnz,onz);
3419:
3420: } else if (scall == MAT_REUSE_MATRIX){
3421: MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);
3422: } else {
3423: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3424: }
3426: for (i=0;i<m;i++) {
3427: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
3428: Ii = i + rstart;
3429: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
3430: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
3431: }
3432: MatDestroy(inmat);
3433: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
3434: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
3436: return(0);
3437: }
3441: PetscErrorCode MatFileSplit(Mat A,char *outfile)
3442: {
3443: PetscErrorCode ierr;
3444: PetscMPIInt rank;
3445: PetscInt m,N,i,rstart,nnz;
3446: size_t len;
3447: const PetscInt *indx;
3448: PetscViewer out;
3449: char *name;
3450: Mat B;
3451: const PetscScalar *values;
3454: MatGetLocalSize(A,&m,0);
3455: MatGetSize(A,0,&N);
3456: /* Should this be the type of the diagonal block of A? */
3457: MatCreate(PETSC_COMM_SELF,&B);
3458: MatSetSizes(B,m,N,m,N);
3459: MatSetType(B,MATSEQAIJ);
3460: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
3461: MatGetOwnershipRange(A,&rstart,0);
3462: for (i=0;i<m;i++) {
3463: MatGetRow(A,i+rstart,&nnz,&indx,&values);
3464: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
3465: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
3466: }
3467: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3468: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3470: MPI_Comm_rank(A->comm,&rank);
3471: PetscStrlen(outfile,&len);
3472: PetscMalloc((len+5)*sizeof(char),&name);
3473: sprintf(name,"%s.%d",outfile,rank);
3474: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
3475: PetscFree(name);
3476: MatView(B,out);
3477: PetscViewerDestroy(out);
3478: MatDestroy(B);
3479: return(0);
3480: }
3482: EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3485: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3486: {
3487: PetscErrorCode ierr;
3488: Mat_Merge_SeqsToMPI *merge;
3489: PetscContainer container;
3492: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
3493: if (container) {
3494: PetscContainerGetPointer(container,(void **)&merge);
3495: PetscFree(merge->id_r);
3496: PetscFree(merge->len_s);
3497: PetscFree(merge->len_r);
3498: PetscFree(merge->bi);
3499: PetscFree(merge->bj);
3500: PetscFree(merge->buf_ri);
3501: PetscFree(merge->buf_rj);
3502: PetscFree(merge->coi);
3503: PetscFree(merge->coj);
3504: PetscFree(merge->owners_co);
3505: PetscFree(merge->rowmap.range);
3506:
3507: PetscContainerDestroy(container);
3508: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
3509: }
3510: PetscFree(merge);
3512: MatDestroy_MPIAIJ(A);
3513: return(0);
3514: }
3516: #include src/mat/utils/freespace.h
3517: #include petscbt.h
3518: static PetscEvent logkey_seqstompinum = 0;
3521: /*@C
3522: MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3523: matrices from each processor
3525: Collective on MPI_Comm
3527: Input Parameters:
3528: + comm - the communicators the parallel matrix will live on
3529: . seqmat - the input sequential matrices
3530: . m - number of local rows (or PETSC_DECIDE)
3531: . n - number of local columns (or PETSC_DECIDE)
3532: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3534: Output Parameter:
3535: . mpimat - the parallel matrix generated
3537: Level: advanced
3539: Notes:
3540: The dimensions of the sequential matrix in each processor MUST be the same.
3541: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3542: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3543: @*/
3544: PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3545: {
3546: PetscErrorCode ierr;
3547: MPI_Comm comm=mpimat->comm;
3548: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
3549: PetscMPIInt size,rank,taga,*len_s;
3550: PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3551: PetscInt proc,m;
3552: PetscInt **buf_ri,**buf_rj;
3553: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3554: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
3555: MPI_Request *s_waits,*r_waits;
3556: MPI_Status *status;
3557: MatScalar *aa=a->a,**abuf_r,*ba_i;
3558: Mat_Merge_SeqsToMPI *merge;
3559: PetscContainer container;
3560:
3562: if (!logkey_seqstompinum) {
3564: }
3567: MPI_Comm_size(comm,&size);
3568: MPI_Comm_rank(comm,&rank);
3570: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
3571: if (container) {
3572: PetscContainerGetPointer(container,(void **)&merge);
3573: }
3574: bi = merge->bi;
3575: bj = merge->bj;
3576: buf_ri = merge->buf_ri;
3577: buf_rj = merge->buf_rj;
3579: PetscMalloc(size*sizeof(MPI_Status),&status);
3580: owners = merge->rowmap.range;
3581: len_s = merge->len_s;
3583: /* send and recv matrix values */
3584: /*-----------------------------*/
3585: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
3586: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
3588: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
3589: for (proc=0,k=0; proc<size; proc++){
3590: if (!len_s[proc]) continue;
3591: i = owners[proc];
3592: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
3593: k++;
3594: }
3596: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
3597: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
3598: PetscFree(status);
3600: PetscFree(s_waits);
3601: PetscFree(r_waits);
3603: /* insert mat values of mpimat */
3604: /*----------------------------*/
3605: PetscMalloc(N*sizeof(MatScalar),&ba_i);
3606: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
3607: nextrow = buf_ri_k + merge->nrecv;
3608: nextai = nextrow + merge->nrecv;
3610: for (k=0; k<merge->nrecv; k++){
3611: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3612: nrows = *(buf_ri_k[k]);
3613: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
3614: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
3615: }
3617: /* set values of ba */
3618: m = merge->rowmap.n;
3619: for (i=0; i<m; i++) {
3620: arow = owners[rank] + i;
3621: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
3622: bnzi = bi[i+1] - bi[i];
3623: PetscMemzero(ba_i,bnzi*sizeof(MatScalar));
3625: /* add local non-zero vals of this proc's seqmat into ba */
3626: anzi = ai[arow+1] - ai[arow];
3627: aj = a->j + ai[arow];
3628: aa = a->a + ai[arow];
3629: nextaj = 0;
3630: for (j=0; nextaj<anzi; j++){
3631: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3632: ba_i[j] += aa[nextaj++];
3633: }
3634: }
3636: /* add received vals into ba */
3637: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3638: /* i-th row */
3639: if (i == *nextrow[k]) {
3640: anzi = *(nextai[k]+1) - *nextai[k];
3641: aj = buf_rj[k] + *(nextai[k]);
3642: aa = abuf_r[k] + *(nextai[k]);
3643: nextaj = 0;
3644: for (j=0; nextaj<anzi; j++){
3645: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3646: ba_i[j] += aa[nextaj++];
3647: }
3648: }
3649: nextrow[k]++; nextai[k]++;
3650: }
3651: }
3652: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
3653: }
3654: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
3655: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
3657: PetscFree(abuf_r);
3658: PetscFree(ba_i);
3659: PetscFree(buf_ri_k);
3661: return(0);
3662: }
3664: static PetscEvent logkey_seqstompisym = 0;
3667: PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3668: {
3669: PetscErrorCode ierr;
3670: Mat B_mpi;
3671: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
3672: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3673: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
3674: PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3675: PetscInt len,proc,*dnz,*onz;
3676: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3677: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3678: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
3679: MPI_Status *status;
3680: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
3681: PetscBT lnkbt;
3682: Mat_Merge_SeqsToMPI *merge;
3683: PetscContainer container;
3686: if (!logkey_seqstompisym) {
3688: }
3691: /* make sure it is a PETSc comm */
3692: PetscCommDuplicate(comm,&comm,PETSC_NULL);
3693: MPI_Comm_size(comm,&size);
3694: MPI_Comm_rank(comm,&rank);
3695:
3696: PetscNew(Mat_Merge_SeqsToMPI,&merge);
3697: PetscMalloc(size*sizeof(MPI_Status),&status);
3699: /* determine row ownership */
3700: /*---------------------------------------------------------*/
3701: PetscMapInitialize(comm,&merge->rowmap);
3702: merge->rowmap.n = m;
3703: merge->rowmap.N = M;
3704: merge->rowmap.bs = 1;
3705: PetscMapSetUp(&merge->rowmap);
3706: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
3707: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
3708:
3709: m = merge->rowmap.n;
3710: M = merge->rowmap.N;
3711: owners = merge->rowmap.range;
3713: /* determine the number of messages to send, their lengths */
3714: /*---------------------------------------------------------*/
3715: len_s = merge->len_s;
3717: len = 0; /* length of buf_si[] */
3718: merge->nsend = 0;
3719: for (proc=0; proc<size; proc++){
3720: len_si[proc] = 0;
3721: if (proc == rank){
3722: len_s[proc] = 0;
3723: } else {
3724: len_si[proc] = owners[proc+1] - owners[proc] + 1;
3725: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3726: }
3727: if (len_s[proc]) {
3728: merge->nsend++;
3729: nrows = 0;
3730: for (i=owners[proc]; i<owners[proc+1]; i++){
3731: if (ai[i+1] > ai[i]) nrows++;
3732: }
3733: len_si[proc] = 2*(nrows+1);
3734: len += len_si[proc];
3735: }
3736: }
3738: /* determine the number and length of messages to receive for ij-structure */
3739: /*-------------------------------------------------------------------------*/
3740: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
3741: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
3743: /* post the Irecv of j-structure */
3744: /*-------------------------------*/
3745: PetscCommGetNewTag(comm,&tagj);
3746: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
3748: /* post the Isend of j-structure */
3749: /*--------------------------------*/
3750: PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);
3751: sj_waits = si_waits + merge->nsend;
3753: for (proc=0, k=0; proc<size; proc++){
3754: if (!len_s[proc]) continue;
3755: i = owners[proc];
3756: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
3757: k++;
3758: }
3760: /* receives and sends of j-structure are complete */
3761: /*------------------------------------------------*/
3762: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
3763: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
3764:
3765: /* send and recv i-structure */
3766: /*---------------------------*/
3767: PetscCommGetNewTag(comm,&tagi);
3768: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
3769:
3770: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
3771: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
3772: for (proc=0,k=0; proc<size; proc++){
3773: if (!len_s[proc]) continue;
3774: /* form outgoing message for i-structure:
3775: buf_si[0]: nrows to be sent
3776: [1:nrows]: row index (global)
3777: [nrows+1:2*nrows+1]: i-structure index
3778: */
3779: /*-------------------------------------------*/
3780: nrows = len_si[proc]/2 - 1;
3781: buf_si_i = buf_si + nrows+1;
3782: buf_si[0] = nrows;
3783: buf_si_i[0] = 0;
3784: nrows = 0;
3785: for (i=owners[proc]; i<owners[proc+1]; i++){
3786: anzi = ai[i+1] - ai[i];
3787: if (anzi) {
3788: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3789: buf_si[nrows+1] = i-owners[proc]; /* local row index */
3790: nrows++;
3791: }
3792: }
3793: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
3794: k++;
3795: buf_si += len_si[proc];
3796: }
3798: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
3799: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
3801: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
3802: for (i=0; i<merge->nrecv; i++){
3803: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
3804: }
3806: PetscFree(len_si);
3807: PetscFree(len_ri);
3808: PetscFree(rj_waits);
3809: PetscFree(si_waits);
3810: PetscFree(ri_waits);
3811: PetscFree(buf_s);
3812: PetscFree(status);
3814: /* compute a local seq matrix in each processor */
3815: /*----------------------------------------------*/
3816: /* allocate bi array and free space for accumulating nonzero column info */
3817: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
3818: bi[0] = 0;
3820: /* create and initialize a linked list */
3821: nlnk = N+1;
3822: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
3823:
3824: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3825: len = 0;
3826: len = ai[owners[rank+1]] - ai[owners[rank]];
3827: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
3828: current_space = free_space;
3830: /* determine symbolic info for each local row */
3831: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
3832: nextrow = buf_ri_k + merge->nrecv;
3833: nextai = nextrow + merge->nrecv;
3834: for (k=0; k<merge->nrecv; k++){
3835: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3836: nrows = *buf_ri_k[k];
3837: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
3838: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
3839: }
3841: MatPreallocateInitialize(comm,m,n,dnz,onz);
3842: len = 0;
3843: for (i=0;i<m;i++) {
3844: bnzi = 0;
3845: /* add local non-zero cols of this proc's seqmat into lnk */
3846: arow = owners[rank] + i;
3847: anzi = ai[arow+1] - ai[arow];
3848: aj = a->j + ai[arow];
3849: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3850: bnzi += nlnk;
3851: /* add received col data into lnk */
3852: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3853: if (i == *nextrow[k]) { /* i-th row */
3854: anzi = *(nextai[k]+1) - *nextai[k];
3855: aj = buf_rj[k] + *nextai[k];
3856: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3857: bnzi += nlnk;
3858: nextrow[k]++; nextai[k]++;
3859: }
3860: }
3861: if (len < bnzi) len = bnzi; /* =max(bnzi) */
3863: /* if free space is not available, make more free space */
3864: if (current_space->local_remaining<bnzi) {
3865: PetscFreeSpaceGet(current_space->total_array_size,¤t_space);
3866: nspacedouble++;
3867: }
3868: /* copy data into free space, then initialize lnk */
3869: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
3870: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
3872: current_space->array += bnzi;
3873: current_space->local_used += bnzi;
3874: current_space->local_remaining -= bnzi;
3875:
3876: bi[i+1] = bi[i] + bnzi;
3877: }
3878:
3879: PetscFree(buf_ri_k);
3881: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
3882: PetscFreeSpaceContiguous(&free_space,bj);
3883: PetscLLDestroy(lnk,lnkbt);
3885: /* create symbolic parallel matrix B_mpi */
3886: /*---------------------------------------*/
3887: MatCreate(comm,&B_mpi);
3888: if (n==PETSC_DECIDE) {
3889: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
3890: } else {
3891: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3892: }
3893: MatSetType(B_mpi,MATMPIAIJ);
3894: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
3895: MatPreallocateFinalize(dnz,onz);
3897: /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3898: B_mpi->assembled = PETSC_FALSE;
3899: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
3900: merge->bi = bi;
3901: merge->bj = bj;
3902: merge->buf_ri = buf_ri;
3903: merge->buf_rj = buf_rj;
3904: merge->coi = PETSC_NULL;
3905: merge->coj = PETSC_NULL;
3906: merge->owners_co = PETSC_NULL;
3908: /* attach the supporting struct to B_mpi for reuse */
3909: PetscContainerCreate(PETSC_COMM_SELF,&container);
3910: PetscContainerSetPointer(container,merge);
3911: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
3912: *mpimat = B_mpi;
3914: PetscCommDestroy(&comm);
3916: return(0);
3917: }
3919: static PetscEvent logkey_seqstompi = 0;
3922: PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3923: {
3924: PetscErrorCode ierr;
3927: if (!logkey_seqstompi) {
3929: }
3931: if (scall == MAT_INITIAL_MATRIX){
3932: MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);
3933: }
3934: MatMerge_SeqsToMPINumeric(seqmat,*mpimat);
3936: return(0);
3937: }
3938: static PetscEvent logkey_getlocalmat = 0;
3941: /*@C
3942: MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3944: Not Collective
3946: Input Parameters:
3947: + A - the matrix
3948: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3950: Output Parameter:
3951: . A_loc - the local sequential matrix generated
3953: Level: developer
3955: @*/
3956: PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3957: {
3958: PetscErrorCode ierr;
3959: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
3960: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3961: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3962: PetscScalar *aa=a->a,*ba=b->a,*ca;
3963: PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3964: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
3967: if (!logkey_getlocalmat) {
3969: }
3971: if (scall == MAT_INITIAL_MATRIX){
3972: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
3973: ci[0] = 0;
3974: for (i=0; i<am; i++){
3975: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3976: }
3977: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
3978: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
3979: k = 0;
3980: for (i=0; i<am; i++) {
3981: ncols_o = bi[i+1] - bi[i];
3982: ncols_d = ai[i+1] - ai[i];
3983: /* off-diagonal portion of A */
3984: for (jo=0; jo<ncols_o; jo++) {
3985: col = cmap[*bj];
3986: if (col >= cstart) break;
3987: cj[k] = col; bj++;
3988: ca[k++] = *ba++;
3989: }
3990: /* diagonal portion of A */
3991: for (j=0; j<ncols_d; j++) {
3992: cj[k] = cstart + *aj++;
3993: ca[k++] = *aa++;
3994: }
3995: /* off-diagonal portion of A */
3996: for (j=jo; j<ncols_o; j++) {
3997: cj[k] = cmap[*bj++];
3998: ca[k++] = *ba++;
3999: }
4000: }
4001: /* put together the new matrix */
4002: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);
4003: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4004: /* Since these are PETSc arrays, change flags to free them as necessary. */
4005: mat = (Mat_SeqAIJ*)(*A_loc)->data;
4006: mat->free_a = PETSC_TRUE;
4007: mat->free_ij = PETSC_TRUE;
4008: mat->nonew = 0;
4009: } else if (scall == MAT_REUSE_MATRIX){
4010: mat=(Mat_SeqAIJ*)(*A_loc)->data;
4011: ci = mat->i; cj = mat->j; ca = mat->a;
4012: for (i=0; i<am; i++) {
4013: /* off-diagonal portion of A */
4014: ncols_o = bi[i+1] - bi[i];
4015: for (jo=0; jo<ncols_o; jo++) {
4016: col = cmap[*bj];
4017: if (col >= cstart) break;
4018: *ca++ = *ba++; bj++;
4019: }
4020: /* diagonal portion of A */
4021: ncols_d = ai[i+1] - ai[i];
4022: for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4023: /* off-diagonal portion of A */
4024: for (j=jo; j<ncols_o; j++) {
4025: *ca++ = *ba++; bj++;
4026: }
4027: }
4028: } else {
4029: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4030: }
4033: return(0);
4034: }
4036: static PetscEvent logkey_getlocalmatcondensed = 0;
4039: /*@C
4040: MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4042: Not Collective
4044: Input Parameters:
4045: + A - the matrix
4046: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4047: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
4049: Output Parameter:
4050: . A_loc - the local sequential matrix generated
4052: Level: developer
4054: @*/
4055: PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4056: {
4057: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4058: PetscErrorCode ierr;
4059: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4060: IS isrowa,iscola;
4061: Mat *aloc;
4064: if (!logkey_getlocalmatcondensed) {
4066: }
4068: if (!row){
4069: start = A->rmap.rstart; end = A->rmap.rend;
4070: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
4071: } else {
4072: isrowa = *row;
4073: }
4074: if (!col){
4075: start = A->cmap.rstart;
4076: cmap = a->garray;
4077: nzA = a->A->cmap.n;
4078: nzB = a->B->cmap.n;
4079: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
4080: ncols = 0;
4081: for (i=0; i<nzB; i++) {
4082: if (cmap[i] < start) idx[ncols++] = cmap[i];
4083: else break;
4084: }
4085: imark = i;
4086: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4087: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4088: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);
4089: PetscFree(idx);
4090: } else {
4091: iscola = *col;
4092: }
4093: if (scall != MAT_INITIAL_MATRIX){
4094: PetscMalloc(sizeof(Mat),&aloc);
4095: aloc[0] = *A_loc;
4096: }
4097: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
4098: *A_loc = aloc[0];
4099: PetscFree(aloc);
4100: if (!row){
4101: ISDestroy(isrowa);
4102: }
4103: if (!col){
4104: ISDestroy(iscola);
4105: }
4107: return(0);
4108: }
4110: static PetscEvent logkey_GetBrowsOfAcols = 0;
4113: /*@C
4114: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4116: Collective on Mat
4118: Input Parameters:
4119: + A,B - the matrices in mpiaij format
4120: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4121: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4123: Output Parameter:
4124: + rowb, colb - index sets of rows and columns of B to extract
4125: . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4126: - B_seq - the sequential matrix generated
4128: Level: developer
4130: @*/
4131: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4132: {
4133: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4134: PetscErrorCode ierr;
4135: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4136: IS isrowb,iscolb;
4137: Mat *bseq;
4138:
4140: if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4141: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4142: }
4143: if (!logkey_GetBrowsOfAcols) {
4145: }
4147:
4148: if (scall == MAT_INITIAL_MATRIX){
4149: start = A->cmap.rstart;
4150: cmap = a->garray;
4151: nzA = a->A->cmap.n;
4152: nzB = a->B->cmap.n;
4153: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
4154: ncols = 0;
4155: for (i=0; i<nzB; i++) { /* row < local row index */
4156: if (cmap[i] < start) idx[ncols++] = cmap[i];
4157: else break;
4158: }
4159: imark = i;
4160: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
4161: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4162: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);
4163: PetscFree(idx);
4164: *brstart = imark;
4165: ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);
4166: } else {
4167: if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4168: isrowb = *rowb; iscolb = *colb;
4169: PetscMalloc(sizeof(Mat),&bseq);
4170: bseq[0] = *B_seq;
4171: }
4172: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
4173: *B_seq = bseq[0];
4174: PetscFree(bseq);
4175: if (!rowb){
4176: ISDestroy(isrowb);
4177: } else {
4178: *rowb = isrowb;
4179: }
4180: if (!colb){
4181: ISDestroy(iscolb);
4182: } else {
4183: *colb = iscolb;
4184: }
4186: return(0);
4187: }
4189: static PetscEvent logkey_GetBrowsOfAocols = 0;
4192: /*@C
4193: MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4194: of the OFF-DIAGONAL portion of local A
4196: Collective on Mat
4198: Input Parameters:
4199: + A,B - the matrices in mpiaij format
4200: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4201: . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4202: - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4204: Output Parameter:
4205: + B_oth - the sequential matrix generated
4207: Level: developer
4209: @*/
4210: PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4211: {
4212: VecScatter_MPI_General *gen_to,*gen_from;
4213: PetscErrorCode ierr;
4214: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4215: Mat_SeqAIJ *b_oth;
4216: VecScatter ctx=a->Mvctx;
4217: MPI_Comm comm=ctx->comm;
4218: PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank;
4219: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4220: PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA;
4221: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4222: MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4223: MPI_Status *sstatus,rstatus;
4224: PetscMPIInt jj;
4225: PetscInt *cols,sbs,rbs;
4226: PetscScalar *vals;
4229: if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4230: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4231: }
4232: if (!logkey_GetBrowsOfAocols) {
4234: }
4236: MPI_Comm_rank(comm,&rank);
4238: gen_to = (VecScatter_MPI_General*)ctx->todata;
4239: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4240: rvalues = gen_from->values; /* holds the length of receiving row */
4241: svalues = gen_to->values; /* holds the length of sending row */
4242: nrecvs = gen_from->n;
4243: nsends = gen_to->n;
4245: PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);
4246: srow = gen_to->indices; /* local row index to be sent */
4247: sstarts = gen_to->starts;
4248: sprocs = gen_to->procs;
4249: sstatus = gen_to->sstatus;
4250: sbs = gen_to->bs;
4251: rstarts = gen_from->starts;
4252: rprocs = gen_from->procs;
4253: rbs = gen_from->bs;
4255: if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4256: if (scall == MAT_INITIAL_MATRIX){
4257: /* i-array */
4258: /*---------*/
4259: /* post receives */
4260: for (i=0; i<nrecvs; i++){
4261: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4262: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4263: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
4264: }
4266: /* pack the outgoing message */
4267: PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);
4268: rstartsj = sstartsj + nsends +1;
4269: sstartsj[0] = 0; rstartsj[0] = 0;
4270: len = 0; /* total length of j or a array to be sent */
4271: k = 0;
4272: for (i=0; i<nsends; i++){
4273: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4274: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4275: for (j=0; j<nrows; j++) {
4276: row = srow[k] + B->rmap.range[rank]; /* global row idx */
4277: for (l=0; l<sbs; l++){
4278: MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL); /* rowlength */
4279: rowlen[j*sbs+l] = ncols;
4280: len += ncols;
4281: MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);
4282: }
4283: k++;
4284: }
4285: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
4286: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4287: }
4288: /* recvs and sends of i-array are completed */
4289: i = nrecvs;
4290: while (i--) {
4291: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
4292: }
4293: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
4295: /* allocate buffers for sending j and a arrays */
4296: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
4297: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
4299: /* create i-array of B_oth */
4300: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
4301: b_othi[0] = 0;
4302: len = 0; /* total length of j or a array to be received */
4303: k = 0;
4304: for (i=0; i<nrecvs; i++){
4305: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4306: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4307: for (j=0; j<nrows; j++) {
4308: b_othi[k+1] = b_othi[k] + rowlen[j];
4309: len += rowlen[j]; k++;
4310: }
4311: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4312: }
4314: /* allocate space for j and a arrrays of B_oth */
4315: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
4316: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);
4318: /* j-array */
4319: /*---------*/
4320: /* post receives of j-array */
4321: for (i=0; i<nrecvs; i++){
4322: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4323: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
4324: }
4326: /* pack the outgoing message j-array */
4327: k = 0;
4328: for (i=0; i<nsends; i++){
4329: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4330: bufJ = bufj+sstartsj[i];
4331: for (j=0; j<nrows; j++) {
4332: row = srow[k++] + B->rmap.range[rank]; /* global row idx */
4333: for (ll=0; ll<sbs; ll++){
4334: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
4335: for (l=0; l<ncols; l++){
4336: *bufJ++ = cols[l];
4337: }
4338: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
4339: }
4340: }
4341: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
4342: }
4344: /* recvs and sends of j-array are completed */
4345: i = nrecvs;
4346: while (i--) {
4347: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
4348: }
4349: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
4350: } else if (scall == MAT_REUSE_MATRIX){
4351: sstartsj = *startsj;
4352: rstartsj = sstartsj + nsends +1;
4353: bufa = *bufa_ptr;
4354: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
4355: b_otha = b_oth->a;
4356: } else {
4357: SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4358: }
4360: /* a-array */
4361: /*---------*/
4362: /* post receives of a-array */
4363: for (i=0; i<nrecvs; i++){
4364: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4365: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
4366: }
4368: /* pack the outgoing message a-array */
4369: k = 0;
4370: for (i=0; i<nsends; i++){
4371: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4372: bufA = bufa+sstartsj[i];
4373: for (j=0; j<nrows; j++) {
4374: row = srow[k++] + B->rmap.range[rank]; /* global row idx */
4375: for (ll=0; ll<sbs; ll++){
4376: MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
4377: for (l=0; l<ncols; l++){
4378: *bufA++ = vals[l];
4379: }
4380: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
4381: }
4382: }
4383: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
4384: }
4385: /* recvs and sends of a-array are completed */
4386: i = nrecvs;
4387: while (i--) {
4388: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
4389: }
4390: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
4391: PetscFree2(rwaits,swaits);
4393: if (scall == MAT_INITIAL_MATRIX){
4394: /* put together the new matrix */
4395: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);
4397: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4398: /* Since these are PETSc arrays, change flags to free them as necessary. */
4399: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
4400: b_oth->free_a = PETSC_TRUE;
4401: b_oth->free_ij = PETSC_TRUE;
4402: b_oth->nonew = 0;
4404: PetscFree(bufj);
4405: if (!startsj || !bufa_ptr){
4406: PetscFree(sstartsj);
4407: PetscFree(bufa_ptr);
4408: } else {
4409: *startsj = sstartsj;
4410: *bufa_ptr = bufa;
4411: }
4412: }
4414: return(0);
4415: }
4419: /*@C
4420: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4422: Not Collective
4424: Input Parameters:
4425: . A - The matrix in mpiaij format
4427: Output Parameter:
4428: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4429: . colmap - A map from global column index to local index into lvec
4430: - multScatter - A scatter from the argument of a matrix-vector product to lvec
4432: Level: developer
4434: @*/
4435: #if defined (PETSC_USE_CTABLE)
4436: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4437: #else
4438: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4439: #endif
4440: {
4441: Mat_MPIAIJ *a;
4448: a = (Mat_MPIAIJ *) A->data;
4449: if (lvec) *lvec = a->lvec;
4450: if (colmap) *colmap = a->colmap;
4451: if (multScatter) *multScatter = a->Mvctx;
4452: return(0);
4453: }
4460: /*MC
4461: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4463: Options Database Keys:
4464: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4466: Level: beginner
4468: .seealso: MatCreateMPIAIJ
4469: M*/
4474: PetscErrorCode MatCreate_MPIAIJ(Mat B)
4475: {
4476: Mat_MPIAIJ *b;
4478: PetscMPIInt size;
4481: MPI_Comm_size(B->comm,&size);
4483: PetscNew(Mat_MPIAIJ,&b);
4484: B->data = (void*)b;
4485: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
4486: B->factor = 0;
4487: B->rmap.bs = 1;
4488: B->assembled = PETSC_FALSE;
4489: B->mapping = 0;
4491: B->insertmode = NOT_SET_VALUES;
4492: b->size = size;
4493: MPI_Comm_rank(B->comm,&b->rank);
4495: /* build cache for off array entries formed */
4496: MatStashCreate_Private(B->comm,1,&B->stash);
4497: b->donotstash = PETSC_FALSE;
4498: b->colmap = 0;
4499: b->garray = 0;
4500: b->roworiented = PETSC_TRUE;
4502: /* stuff used for matrix vector multiply */
4503: b->lvec = PETSC_NULL;
4504: b->Mvctx = PETSC_NULL;
4506: /* stuff for MatGetRow() */
4507: b->rowindices = 0;
4508: b->rowvalues = 0;
4509: b->getrowactive = PETSC_FALSE;
4512: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4513: "MatStoreValues_MPIAIJ",
4514: MatStoreValues_MPIAIJ);
4515: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4516: "MatRetrieveValues_MPIAIJ",
4517: MatRetrieveValues_MPIAIJ);
4518: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4519: "MatGetDiagonalBlock_MPIAIJ",
4520: MatGetDiagonalBlock_MPIAIJ);
4521: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4522: "MatIsTranspose_MPIAIJ",
4523: MatIsTranspose_MPIAIJ);
4524: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4525: "MatMPIAIJSetPreallocation_MPIAIJ",
4526: MatMPIAIJSetPreallocation_MPIAIJ);
4527: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4528: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4529: MatMPIAIJSetPreallocationCSR_MPIAIJ);
4530: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4531: "MatDiagonalScaleLocal_MPIAIJ",
4532: MatDiagonalScaleLocal_MPIAIJ);
4533: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4534: "MatConvert_MPIAIJ_MPICSRPERM",
4535: MatConvert_MPIAIJ_MPICSRPERM);
4536: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4537: "MatConvert_MPIAIJ_MPICRL",
4538: MatConvert_MPIAIJ_MPICRL);
4539: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
4540: return(0);
4541: }
4546: /*@C
4547: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4548: and "off-diagonal" part of the matrix in CSR format.
4550: Collective on MPI_Comm
4552: Input Parameters:
4553: + comm - MPI communicator
4554: . m - number of local rows (Cannot be PETSC_DECIDE)
4555: . n - This value should be the same as the local size used in creating the
4556: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4557: calculated if N is given) For square matrices n is almost always m.
4558: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4559: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4560: . i - row indices for "diagonal" portion of matrix
4561: . j - column indices
4562: . a - matrix values
4563: . oi - row indices for "off-diagonal" portion of matrix
4564: . oj - column indices
4565: - oa - matrix values
4567: Output Parameter:
4568: . mat - the matrix
4570: Level: advanced
4572: Notes:
4573: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4575: The i and j indices are 0 based
4576:
4577: See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4580: .keywords: matrix, aij, compressed row, sparse, parallel
4582: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4583: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4584: @*/
4585: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4586: PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4587: {
4589: Mat_MPIAIJ *maij;
4592: if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4593: if (i[0]) {
4594: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4595: }
4596: if (oi[0]) {
4597: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4598: }
4599: MatCreate(comm,mat);
4600: MatSetSizes(*mat,m,n,M,N);
4601: MatSetType(*mat,MATMPIAIJ);
4602: maij = (Mat_MPIAIJ*) (*mat)->data;
4603: maij->donotstash = PETSC_TRUE;
4604: (*mat)->preallocated = PETSC_TRUE;
4606: (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4607: PetscMapSetUp(&(*mat)->rmap);
4608: PetscMapSetUp(&(*mat)->cmap);
4610: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
4611: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);
4613: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
4614: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
4615: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
4616: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
4618: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
4619: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
4620: return(0);
4621: }
4623: /*
4624: Special version for direct calls from Fortran
4625: */
4626: #if defined(PETSC_HAVE_FORTRAN_CAPS)
4627: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4628: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4629: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4630: #endif
4632: /* Change these macros so can be used in void function */
4633: #undef CHKERRQ
4634: #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4635: #undef SETERRQ2
4636: #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4637: #undef SETERRQ
4638: #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4643: void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4644: {
4645: Mat mat = *mmat;
4646: PetscInt m = *mm, n = *mn;
4647: InsertMode addv = *maddv;
4648: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
4649: PetscScalar value;
4652: MatPreallocated(mat);
4653: if (mat->insertmode == NOT_SET_VALUES) {
4654: mat->insertmode = addv;
4655: }
4656: #if defined(PETSC_USE_DEBUG)
4657: else if (mat->insertmode != addv) {
4658: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4659: }
4660: #endif
4661: {
4662: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4663: PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4664: PetscTruth roworiented = aij->roworiented;
4666: /* Some Variables required in the macro */
4667: Mat A = aij->A;
4668: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
4669: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4670: PetscScalar *aa = a->a;
4671: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4672: Mat B = aij->B;
4673: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
4674: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4675: PetscScalar *ba = b->a;
4677: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4678: PetscInt nonew = a->nonew;
4679: PetscScalar *ap1,*ap2;
4682: for (i=0; i<m; i++) {
4683: if (im[i] < 0) continue;
4684: #if defined(PETSC_USE_DEBUG)
4685: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4686: #endif
4687: if (im[i] >= rstart && im[i] < rend) {
4688: row = im[i] - rstart;
4689: lastcol1 = -1;
4690: rp1 = aj + ai[row];
4691: ap1 = aa + ai[row];
4692: rmax1 = aimax[row];
4693: nrow1 = ailen[row];
4694: low1 = 0;
4695: high1 = nrow1;
4696: lastcol2 = -1;
4697: rp2 = bj + bi[row];
4698: ap2 = ba + bi[row];
4699: rmax2 = bimax[row];
4700: nrow2 = bilen[row];
4701: low2 = 0;
4702: high2 = nrow2;
4704: for (j=0; j<n; j++) {
4705: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4706: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4707: if (in[j] >= cstart && in[j] < cend){
4708: col = in[j] - cstart;
4709: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4710: } else if (in[j] < 0) continue;
4711: #if defined(PETSC_USE_DEBUG)
4712: else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4713: #endif
4714: else {
4715: if (mat->was_assembled) {
4716: if (!aij->colmap) {
4717: CreateColmap_MPIAIJ_Private(mat);
4718: }
4719: #if defined (PETSC_USE_CTABLE)
4720: PetscTableFind(aij->colmap,in[j]+1,&col);
4721: col--;
4722: #else
4723: col = aij->colmap[in[j]] - 1;
4724: #endif
4725: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4726: DisAssemble_MPIAIJ(mat);
4727: col = in[j];
4728: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4729: B = aij->B;
4730: b = (Mat_SeqAIJ*)B->data;
4731: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4732: rp2 = bj + bi[row];
4733: ap2 = ba + bi[row];
4734: rmax2 = bimax[row];
4735: nrow2 = bilen[row];
4736: low2 = 0;
4737: high2 = nrow2;
4738: bm = aij->B->rmap.n;
4739: ba = b->a;
4740: }
4741: } else col = in[j];
4742: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4743: }
4744: }
4745: } else {
4746: if (!aij->donotstash) {
4747: if (roworiented) {
4748: if (ignorezeroentries && v[i*n] == 0.0) continue;
4749: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
4750: } else {
4751: if (ignorezeroentries && v[i] == 0.0) continue;
4752: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
4753: }
4754: }
4755: }
4756: }}
4757: PetscFunctionReturnVoid();
4758: }