Actual source code: mpirowbs.c
1: #define PETSCMAT_DLL
3: #include ../src/mat/impls/rowbs/mpi/mpirowbs.h
5: #define CHUNCKSIZE_LOCAL 10
9: static PetscErrorCode MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
10: {
14: if (v) {
15: #if defined(PETSC_USE_LOG)
16: int len = -n*(sizeof(int)+sizeof(PetscScalar));
17: #endif
18: PetscFree(v);
19: PetscLogObjectMemory(A,len);
20: }
21: return(0);
22: }
26: static PetscErrorCode MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
27: {
31: if (!n) {
32: *i = 0; *v = 0;
33: } else {
34: PetscMalloc(n*(sizeof(int) + sizeof(PetscScalar)),v);
35: PetscLogObjectMemory(A,len);
36: *i = (int*)(*v + n);
37: }
38: return(0);
39: }
43: PetscErrorCode MatScale_MPIRowbs(Mat inA,PetscScalar alpha)
44: {
45: Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data;
46: BSspmat *A = a->A;
47: BSsprow *vs;
48: PetscScalar *ap;
49: int i,m = inA->rmap->n,nrow,j;
53: for (i=0; i<m; i++) {
54: vs = A->rows[i];
55: nrow = vs->length;
56: ap = vs->nz;
57: for (j=0; j<nrow; j++) {
58: ap[j] *= alpha;
59: }
60: }
61: PetscLogFlops(a->nz);
62: return(0);
63: }
65: /* ----------------------------------------------------------------- */
68: static PetscErrorCode MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
69: {
70: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
72: int i,len,m = A->rmap->n,*tnnz;
73: BSspmat *bsmat;
74: BSsprow *vs;
77: PetscMalloc((m+1)*sizeof(int),&tnnz);
78: if (!nnz) {
79: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
80: if (nz <= 0) nz = 1;
81: for (i=0; i<m; i++) tnnz[i] = nz;
82: nz = nz*m;
83: } else {
84: nz = 0;
85: for (i=0; i<m; i++) {
86: if (nnz[i] <= 0) tnnz[i] = 1;
87: else tnnz[i] = nnz[i];
88: nz += tnnz[i];
89: }
90: }
92: /* Allocate BlockSolve matrix context */
93: PetscNewLog(A,BSspmat,&bsif->A);
94: bsmat = bsif->A;
95: BSset_mat_icc_storage(bsmat,PETSC_FALSE);
96: BSset_mat_symmetric(bsmat,PETSC_FALSE);
97: PetscMalloc(m*(sizeof(BSsprow*)+ sizeof(BSsprow)),&bsmat->rows);
98: bsmat->num_rows = m;
99: bsmat->global_num_rows = A->rmap->N;
100: bsmat->map = bsif->bsmap;
101: vs = (BSsprow*)(bsmat->rows + m);
102: for (i=0; i<m; i++) {
103: bsmat->rows[i] = vs;
104: bsif->imax[i] = tnnz[i];
105: vs->diag_ind = -1;
106: MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
107: /* put zero on diagonal */
108: /*vs->length = 1;
109: vs->col[0] = i + bsif->rstart;
110: vs->nz[0] = 0.0;*/
111: vs->length = 0;
112: vs++;
113: }
114: PetscLogObjectMemory(A,sizeof(BSspmat) + len);
115: bsif->nz = 0;
116: bsif->maxnz = nz;
117: bsif->sorted = 0;
118: bsif->roworiented = PETSC_TRUE;
119: bsif->nonew = 0;
120: bsif->bs_color_single = 0;
122: PetscFree(tnnz);
123: return(0);
124: }
128: static PetscErrorCode MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
129: {
130: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
131: BSspmat *A = mat->A;
132: BSsprow *vs;
134: int *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax;
135: int *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
136: PetscScalar *ap,value;
139: for (k=0; k<m; k++) { /* loop over added rows */
140: row = im[k];
141: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
142: if (row >= AA->rmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->rmap->n-1);
143: vs = A->rows[row];
144: ap = vs->nz; rp = vs->col;
145: rmax = imax[row]; nrow = vs->length;
146: a = 0;
147: for (l=0; l<n; l++) { /* loop over added columns */
148: if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
149: if (in[l] >= AA->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->cmap->N-1);
150: col = in[l]; value = *v++;
151: if (!sorted) a = 0; b = nrow;
152: while (b-a > 5) {
153: t = (b+a)/2;
154: if (rp[t] > col) b = t;
155: else a = t;
156: }
157: for (i=a; i<b; i++) {
158: if (rp[i] > col) break;
159: if (rp[i] == col) {
160: if (addv == ADD_VALUES) ap[i] += value;
161: else ap[i] = value;
162: goto noinsert;
163: }
164: }
165: if (nonew) goto noinsert;
166: if (nrow >= rmax) {
167: /* there is no extra room in row, therefore enlarge */
168: int *itemp,*iout,*iin = vs->col;
169: PetscScalar *vout,*vin = vs->nz,*vtemp;
171: /* malloc new storage space */
172: imax[row] += CHUNCKSIZE_LOCAL;
173: MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
174: vout = vtemp; iout = itemp;
175: for (ii=0; ii<i; ii++) {
176: vout[ii] = vin[ii];
177: iout[ii] = iin[ii];
178: }
179: vout[i] = value;
180: iout[i] = col;
181: for (ii=i+1; ii<=nrow; ii++) {
182: vout[ii] = vin[ii-1];
183: iout[ii] = iin[ii-1];
184: }
185: /* free old row storage */
186: if (rmax > 0) {
187: MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
188: }
189: vs->col = iout; vs->nz = vout;
190: rmax = imax[row];
191: mat->maxnz += CHUNCKSIZE_LOCAL;
192: mat->reallocs++;
193: } else {
194: /* shift higher columns over to make room for newie */
195: for (ii=nrow-1; ii>=i; ii--) {
196: rp[ii+1] = rp[ii];
197: ap[ii+1] = ap[ii];
198: }
199: rp[i] = col;
200: ap[i] = value;
201: }
202: nrow++;
203: mat->nz++;
204: AA->same_nonzero = PETSC_FALSE;
205: noinsert:;
206: a = i + 1;
207: }
208: vs->length = nrow;
209: }
210: return(0);
211: }
216: static PetscErrorCode MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
217: {
219: return(0);
220: }
224: static PetscErrorCode MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
225: {
226: Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
227: BSspmat *A = a->A;
228: BSsprow *vs;
229: int i,j,rstart = AA->rmap->rstart;
232: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
234: /* Mark location of diagonal */
235: for (i=0; i<AA->rmap->n; i++) {
236: vs = A->rows[i];
237: for (j=0; j<vs->length; j++) {
238: if (vs->col[j] == i + rstart) {
239: vs->diag_ind = j;
240: break;
241: }
242: }
243: if (vs->diag_ind == -1) {
244: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
245: }
246: }
247: return(0);
248: }
252: static PetscErrorCode MatZeroRows_MPIRowbs_local(Mat A,PetscInt N,const PetscInt rz[],PetscScalar diag)
253: {
254: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
255: BSspmat *l = a->A;
257: int i,m = A->rmap->n - 1,col,base=A->rmap->rstart;
260: if (a->keepzeroedrows) {
261: for (i=0; i<N; i++) {
262: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
263: PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
264: if (diag != 0.0) {
265: col=rz[i]+base;
266: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
267: }
268: }
269: } else {
270: if (diag != 0.0) {
271: for (i=0; i<N; i++) {
272: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
273: if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
274: l->rows[rz[i]]->length = 1;
275: l->rows[rz[i]]->nz[0] = diag;
276: l->rows[rz[i]]->col[0] = A->rmap->rstart + rz[i];
277: } else {
278: col=rz[i]+base;
279: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
280: }
281: }
282: } else {
283: for (i=0; i<N; i++) {
284: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
285: l->rows[rz[i]]->length = 0;
286: }
287: }
288: A->same_nonzero = PETSC_FALSE;
289: }
290: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
291: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
292: return(0);
293: }
297: static PetscErrorCode MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
298: {
299: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
300: BSsprow *vs,**rs;
301: PetscScalar *xv;
302: PetscReal sum = 0.0;
304: int *xi,nz,i,j;
307: rs = mat->A->rows;
308: if (type == NORM_FROBENIUS) {
309: for (i=0; i<A->rmap->n; i++) {
310: vs = *rs++;
311: nz = vs->length;
312: xv = vs->nz;
313: while (nz--) {
314: #if defined(PETSC_USE_COMPLEX)
315: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
316: #else
317: sum += (*xv)*(*xv); xv++;
318: #endif
319: }
320: }
321: *norm = sqrt(sum);
322: } else if (type == NORM_1) { /* max column norm */
323: PetscReal *tmp;
324: PetscMalloc(A->cmap->n*sizeof(PetscReal),&tmp);
325: PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));
326: *norm = 0.0;
327: for (i=0; i<A->rmap->n; i++) {
328: vs = *rs++;
329: nz = vs->length;
330: xi = vs->col;
331: xv = vs->nz;
332: while (nz--) {
333: tmp[*xi] += PetscAbsScalar(*xv);
334: xi++; xv++;
335: }
336: }
337: for (j=0; j<A->rmap->n; j++) {
338: if (tmp[j] > *norm) *norm = tmp[j];
339: }
340: PetscFree(tmp);
341: } else if (type == NORM_INFINITY) { /* max row norm */
342: *norm = 0.0;
343: for (i=0; i<A->rmap->n; i++) {
344: vs = *rs++;
345: nz = vs->length;
346: xv = vs->nz;
347: sum = 0.0;
348: while (nz--) {
349: sum += PetscAbsScalar(*xv); xv++;
350: }
351: if (sum > *norm) *norm = sum;
352: }
353: } else {
354: SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
355: }
356: return(0);
357: }
359: /* ----------------------------------------------------------------- */
363: PetscErrorCode MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
364: {
365: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
367: int i,j,row,col,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
368: PetscTruth roworiented = a->roworiented;
371: /* Note: There's no need to "unscale" the matrix, since scaling is
372: confined to a->pA, and we're working with a->A here */
373: for (i=0; i<m; i++) {
374: if (im[i] < 0) continue;
375: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->rmap->N-1);
376: if (im[i] >= rstart && im[i] < rend) {
377: row = im[i] - rstart;
378: for (j=0; j<n; j++) {
379: if (in[j] < 0) continue;
380: if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->cmap->N-1);
381: if (in[j] >= 0 && in[j] < mat->cmap->N){
382: col = in[j];
383: if (roworiented) {
384: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
385: } else {
386: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
387: }
388: } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
389: }
390: } else {
391: if (!a->donotstash) {
392: if (roworiented) {
393: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
394: } else {
395: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
396: }
397: }
398: }
399: }
400: return(0);
401: }
405: PetscErrorCode MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
406: {
407: MPI_Comm comm = ((PetscObject)mat)->comm;
409: int nstash,reallocs;
410: InsertMode addv;
413: /* Note: There's no need to "unscale" the matrix, since scaling is
414: confined to a->pA, and we're working with a->A here */
416: /* make sure all processors are either in INSERTMODE or ADDMODE */
417: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
418: if (addv == (ADD_VALUES|INSERT_VALUES)) {
419: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
420: }
421: mat->insertmode = addv; /* in case this processor had no cache */
423: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
424: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
425: PetscInfo2(mat,"Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
426: return(0);
427: }
431: static PetscErrorCode MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
432: {
433: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
435: int i,j;
436: PetscTruth iascii;
437: BSspmat *A = a->A;
438: BSsprow **rs = A->rows;
439: PetscViewerFormat format;
442: PetscViewerGetFormat(viewer,&format);
443: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
445: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
446: int ind_l,ind_g,clq_l,clq_g,color;
447: ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
448: ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
449: clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
450: clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
451: color = BSnum_colors(a->pA);CHKERRBS(0);
452: PetscViewerASCIIPrintf(viewer," %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
453: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
454: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
455: for (i=0; i<A->num_rows; i++) {
456: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap->rstart);
457: for (j=0; j<rs[i]->length; j++) {
458: if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
459: }
460: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
461: }
462: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
463: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
464: } else {
465: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
466: for (i=0; i<A->num_rows; i++) {
467: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+mat->rmap->rstart);
468: for (j=0; j<rs[i]->length; j++) {
469: PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
470: }
471: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
472: }
473: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
474: }
475: PetscViewerFlush(viewer);
476: return(0);
477: }
481: static PetscErrorCode MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
482: {
483: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
485: PetscMPIInt rank,size,mnz;
486: PetscInt i,M,m,*sbuff,*rowlengths;
487: PetscInt *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
488: BSspmat *A = a->A;
489: BSsprow **rs = A->rows;
490: MPI_Comm comm = ((PetscObject)mat)->comm;
491: MPI_Status status;
492: PetscScalar *vals;
493: MatInfo info;
496: MPI_Comm_size(comm,&size);
497: MPI_Comm_rank(comm,&rank);
499: M = mat->rmap->N; m = mat->rmap->n;
500: /* First gather together on the first processor the lengths of
501: each row, and write them out to the file */
502: PetscMalloc(m*sizeof(int),&sbuff);
503: for (i=0; i<A->num_rows; i++) {
504: sbuff[i] = rs[i]->length;
505: }
506: MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
507: if (!rank) {
508: PetscViewerBinaryGetDescriptor(viewer,&fd);
509: PetscMalloc((4+M)*sizeof(int),&rowlengths);
510: PetscMalloc(size*sizeof(int),&recvcts);
511: recvdisp = mat->rmap->range;
512: for (i=0; i<size; i++) {
513: recvcts[i] = recvdisp[i+1] - recvdisp[i];
514: }
515: /* first four elements of rowlength are the header */
516: rowlengths[0] = ((PetscObject)mat)->cookie;
517: rowlengths[1] = mat->rmap->N;
518: rowlengths[2] = mat->cmap->N;
519: rowlengths[3] = (int)info.nz_used;
520: MPI_Gatherv(sbuff,m,MPIU_INT,rowlengths+4,recvcts,recvdisp,MPIU_INT,0,comm);
521: PetscFree(sbuff);
522: PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,PETSC_FALSE);
523: /* count the number of nonzeros on each processor */
524: PetscMemzero(recvcts,size*sizeof(int));
525: for (i=0; i<size; i++) {
526: for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
527: recvcts[i] += rowlengths[j+3];
528: }
529: }
530: /* allocate buffer long enough to hold largest one */
531: maxnz = 0;
532: for (i=0; i<size; i++) {
533: maxnz = PetscMax(maxnz,recvcts[i]);
534: }
535: PetscFree(rowlengths);
536: PetscFree(recvcts);
537: PetscMalloc(maxnz*sizeof(int),&cols);
539: /* binary store column indices for 0th processor */
540: nz = 0;
541: for (i=0; i<A->num_rows; i++) {
542: for (j=0; j<rs[i]->length; j++) {
543: cols[nz++] = rs[i]->col[j];
544: }
545: }
546: PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
548: /* receive and store column indices for all other processors */
549: for (i=1; i<size; i++) {
550: /* should tell processor that I am now ready and to begin the send */
551: MPI_Recv(cols,maxnz,MPIU_INT,i,((PetscObject)mat)->tag,comm,&status);
552: MPI_Get_count(&status,MPIU_INT,&mnz);
553: PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
554: }
555: PetscFree(cols);
556: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
558: /* binary store values for 0th processor */
559: nz = 0;
560: for (i=0; i<A->num_rows; i++) {
561: for (j=0; j<rs[i]->length; j++) {
562: vals[nz++] = rs[i]->nz[j];
563: }
564: }
565: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
567: /* receive and store nonzeros for all other processors */
568: for (i=1; i<size; i++) {
569: /* should tell processor that I am now ready and to begin the send */
570: MPI_Recv(vals,maxnz,MPIU_SCALAR,i,((PetscObject)mat)->tag,comm,&status);
571: MPI_Get_count(&status,MPIU_SCALAR,&nz);
572: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
573: }
574: PetscFree(vals);
575: } else {
576: MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
577: PetscFree(sbuff);
579: /* count local nonzeros */
580: nz = 0;
581: for (i=0; i<A->num_rows; i++) {
582: for (j=0; j<rs[i]->length; j++) {
583: nz++;
584: }
585: }
586: /* copy into buffer column indices */
587: PetscMalloc(nz*sizeof(int),&cols);
588: nz = 0;
589: for (i=0; i<A->num_rows; i++) {
590: for (j=0; j<rs[i]->length; j++) {
591: cols[nz++] = rs[i]->col[j];
592: }
593: }
594: /* send */ /* should wait until processor zero tells me to go */
595: MPI_Send(cols,nz,MPI_INT,0,((PetscObject)mat)->tag,comm);
596: PetscFree(cols);
598: /* copy into buffer column values */
599: PetscMalloc(nz*sizeof(PetscScalar),&vals);
600: nz = 0;
601: for (i=0; i<A->num_rows; i++) {
602: for (j=0; j<rs[i]->length; j++) {
603: vals[nz++] = rs[i]->nz[j];
604: }
605: }
606: /* send */ /* should wait until processor zero tells me to go */
607: MPI_Send(vals,nz,MPIU_SCALAR,0,((PetscObject)mat)->tag,comm);
608: PetscFree(vals);
609: }
611: return(0);
612: }
616: PetscErrorCode MatView_MPIRowbs(Mat mat,PetscViewer viewer)
617: {
618: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
620: PetscTruth iascii,isbinary;
623: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
624: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
625: if (!bsif->blocksolveassembly) {
626: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
627: }
628: if (iascii) {
629: MatView_MPIRowbs_ASCII(mat,viewer);
630: } else if (isbinary) {
631: MatView_MPIRowbs_Binary(mat,viewer);
632: } else {
633: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
634: }
635: return(0);
636: }
637:
640: static PetscErrorCode MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
641: {
642: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
643: BSspmat *A = a->A;
644: BSsprow *vs;
645: int size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
646: int msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
648: int ctr_j,*sbuf1_j,k;
649: PetscScalar val=0.0;
650: MPI_Comm comm;
651: MPI_Request *s_waits1,*r_waits1;
652: MPI_Status *s_status,*r_status;
655: comm = ((PetscObject)mat)->comm;
656: tag = ((PetscObject)mat)->tag;
657: size = a->size;
658: rank = a->rank;
659: M = mat->rmap->N;
660: rstart = mat->rmap->rstart;
662: PetscMalloc(M*sizeof(int),&rtable);
663: /* Create hash table for the mapping :row -> proc */
664: for (i=0,j=0; i<size; i++) {
665: len = mat->rmap->range[i+1];
666: for (; j<len; j++) {
667: rtable[j] = i;
668: }
669: }
671: /* Evaluate communication - mesg to whom, length of mesg, and buffer space
672: required. Based on this, buffers are allocated, and data copied into them. */
673: PetscMalloc(size*4*sizeof(int),&w1);/* mesg size */
674: w3 = w1 + 2*size; /* no of IS that needs to be sent to proc i */
675: w4 = w3 + size; /* temp work space used in determining w1, w3 */
676: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */
678: for (i=0; i<mat->rmap->n; i++) {
679: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
680: vs = A->rows[i];
681: for (j=0; j<vs->length; j++) {
682: proc = rtable[vs->col[j]];
683: w4[proc]++;
684: }
685: for (j=0; j<size; j++) {
686: if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
687: }
688: }
689:
690: nrqs = 0; /* number of outgoing messages */
691: msz = 0; /* total mesg length (for all proc */
692: w1[2*rank] = 0; /* no mesg sent to itself */
693: w3[rank] = 0;
694: for (i=0; i<size; i++) {
695: if (w1[2*i]) {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
696: }
697: /* pa - is list of processors to communicate with */
698: PetscMalloc((nrqs+1)*sizeof(int),&pa);
699: for (i=0,j=0; i<size; i++) {
700: if (w1[2*i]) {pa[j] = i; j++;}
701: }
703: /* Each message would have a header = 1 + 2*(no of ROWS) + data */
704: for (i=0; i<nrqs; i++) {
705: j = pa[i];
706: w1[2*j] += w1[2*j+1] + 2*w3[j];
707: msz += w1[2*j];
708: }
709:
710: /* Do a global reduction to determine how many messages to expect */
711: PetscMaxSum(comm,w1,&bsz,&nrqr);
713: /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
714: PetscMalloc((nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int),&rbuf1);
715: rbuf1[0] = (int*)(rbuf1 + nrqr);
716: for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;
718: /* Post the receives */
719: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
720: for (i=0; i<nrqr; ++i){
721: MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
722: }
723:
724: /* Allocate Memory for outgoing messages */
725: PetscMalloc(2*size*sizeof(int*) + (size+msz)*sizeof(int),&sbuf1);
726: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
727: PetscMemzero(sbuf1,2*size*sizeof(int*));
728: tmp = (int*)(sbuf1 + 2*size);
729: ctr = tmp + msz;
731: {
732: int *iptr = tmp,ict = 0;
733: for (i=0; i<nrqs; i++) {
734: j = pa[i];
735: iptr += ict;
736: sbuf1[j] = iptr;
737: ict = w1[2*j];
738: }
739: }
741: /* Form the outgoing messages */
742: /* Clean up the header space */
743: for (i=0; i<nrqs; i++) {
744: j = pa[i];
745: sbuf1[j][0] = 0;
746: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
747: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
748: }
750: /* Parse the matrix and copy the data into sbuf1 */
751: for (i=0; i<mat->rmap->n; i++) {
752: PetscMemzero(ctr,size*sizeof(int));
753: vs = A->rows[i];
754: for (j=0; j<vs->length; j++) {
755: col = vs->col[j];
756: proc = rtable[col];
757: if (proc != rank) { /* copy to the outgoing buffer */
758: ctr[proc]++;
759: *ptr[proc] = col;
760: ptr[proc]++;
761: } else {
762: row = col - rstart;
763: col = i + rstart;
764: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
765: }
766: }
767: /* Update the headers for the current row */
768: for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
769: if ((ctr_j = ctr[j])) {
770: sbuf1_j = sbuf1[j];
771: k = ++sbuf1_j[0];
772: sbuf1_j[2*k] = ctr_j;
773: sbuf1_j[2*k-1] = i + rstart;
774: }
775: }
776: }
777: /* Check Validity of the outgoing messages */
778: {
779: int sum;
780: for (i=0 ; i<nrqs ; i++) {
781: j = pa[i];
782: if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
783: }
785: for (i=0 ; i<nrqs ; i++) {
786: j = pa[i];
787: sum = 1;
788: for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
789: if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
790: }
791: }
792:
793: /* Now post the sends */
794: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
795: for (i=0; i<nrqs; ++i) {
796: j = pa[i];
797: MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
798: }
799:
800: /* Receive messages*/
801: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
802: for (i=0; i<nrqr; ++i) {
803: MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
804: /* Process the Message */
805: {
806: int *rbuf1_i,n_row,ct1;
808: rbuf1_i = rbuf1[idx];
809: n_row = rbuf1_i[0];
810: ct1 = 2*n_row+1;
811: val = 0.0;
812: /* Optimise this later */
813: for (j=1; j<=n_row; j++) {
814: col = rbuf1_i[2*j-1];
815: for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
816: row = rbuf1_i[ct1] - rstart;
817: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
818: }
819: }
820: }
821: }
823: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
824: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}
826: PetscFree(rtable);
827: PetscFree(w1);
828: PetscFree(pa);
829: PetscFree(rbuf1);
830: PetscFree(sbuf1);
831: PetscFree(r_waits1);
832: PetscFree(s_waits1);
833: PetscFree(r_status);
834: PetscFree(s_status);
835: return(0);
836: }
838: /*
839: This does the BlockSolve portion of the matrix assembly.
840: It is provided in a separate routine so that users can
841: operate on the matrix (using MatScale(), MatShift() etc.) after
842: the matrix has been assembled but before BlockSolve has sucked it
843: in and devoured it.
844: */
847: PetscErrorCode MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
848: {
849: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
851: int ldim,low,high,i;
852: PetscScalar *diag;
855: if ((mat->was_assembled) && (!mat->same_nonzero)) { /* Free the old info */
856: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
857: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
858: }
860: if ((!mat->same_nonzero) || (!mat->was_assembled)) {
861: /* Indicates bypassing cliques in coloring */
862: if (a->bs_color_single) {
863: BSctx_set_si(a->procinfo,100);
864: }
865: /* Form permuted matrix for efficient parallel execution */
866: a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
867: /* Set up the communication */
868: a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
869: } else {
870: /* Repermute the matrix */
871: BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
872: }
874: /* Symmetrically scale the matrix by the diagonal */
875: BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);
877: /* Store inverse of square root of permuted diagonal scaling matrix */
878: VecGetLocalSize(a->diag,&ldim);
879: VecGetOwnershipRange(a->diag,&low,&high);
880: VecGetArray(a->diag,&diag);
881: for (i=0; i<ldim; i++) {
882: if (a->pA->scale_diag[i] != 0.0) {
883: diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
884: } else {
885: diag[i] = 1.0;
886: }
887: }
888: VecRestoreArray(a->diag,&diag);
889: a->assembled_icc_storage = a->A->icc_storage;
890: a->blocksolveassembly = 1;
891: mat->was_assembled = PETSC_TRUE;
892: mat->same_nonzero = PETSC_TRUE;
893: PetscInfo(mat,"Completed BlockSolve95 matrix assembly\n");
894: return(0);
895: }
899: PetscErrorCode MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
900: {
901: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
903: int i,n,row,col,*rows,*cols,rstart,nzcount,flg,j,ncols;
904: PetscScalar *vals,val;
905: InsertMode addv = mat->insertmode;
908: while (1) {
909: MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
910: if (!flg) break;
911:
912: for (i=0; i<n;) {
913: /* Now identify the consecutive vals belonging to the same row */
914: for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
915: if (j < n) ncols = j-i;
916: else ncols = n-i;
917: /* Now assemble all these values with a single function call */
918: MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
919: i = j;
920: }
921: }
922: MatStashScatterEnd_Private(&mat->stash);
924: rstart = mat->rmap->rstart;
925: nzcount = a->nz; /* This is the number of nonzeros entered by the user */
926: /* BlockSolve requires that the matrix is structurally symmetric */
927: if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
928: MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
929: }
930:
931: /* BlockSolve requires that all the diagonal elements are set */
932: val = 0.0;
933: for (i=0; i<mat->rmap->n; i++) {
934: row = i; col = i + rstart;
935: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
936: }
937:
938: MatAssemblyBegin_MPIRowbs_local(mat,mode);
939: MatAssemblyEnd_MPIRowbs_local(mat,mode);
940:
941: a->blocksolveassembly = 0;
942: PetscInfo4(mat,"Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->rmap->n,mat->cmap->n,a->maxnz-a->nz,a->nz);
943: PetscInfo2(mat,"User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount);
944: PetscInfo1(mat,"Number of mallocs during MatSetValues is %d\n",a->reallocs);
945: return(0);
946: }
950: PetscErrorCode MatZeroEntries_MPIRowbs(Mat mat)
951: {
952: Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
953: BSspmat *A = l->A;
954: BSsprow *vs;
955: int i,j;
958: for (i=0; i <mat->rmap->n; i++) {
959: vs = A->rows[i];
960: for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
961: }
962: return(0);
963: }
965: /* the code does not do the diagonal entries correctly unless the
966: matrix is square and the column and row owerships are identical.
967: This is a BUG.
968: */
972: PetscErrorCode MatZeroRows_MPIRowbs(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
973: {
974: Mat_MPIRowbs *l = (Mat_MPIRowbs*)A->data;
976: int i,*owners = A->rmap->range,size = l->size;
977: int *nprocs,j,idx,nsends;
978: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
979: int *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,n,*source;
980: int *lens,imdex,*lrows,*values;
981: MPI_Comm comm = ((PetscObject)A)->comm;
982: MPI_Request *send_waits,*recv_waits;
983: MPI_Status recv_status,*send_status;
984: PetscTruth found;
987: /* first count number of contributors to each processor */
988: PetscMalloc(2*size*sizeof(int),&nprocs);
989: PetscMemzero(nprocs,2*size*sizeof(int));
990: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
991: for (i=0; i<N; i++) {
992: idx = rows[i];
993: found = PETSC_FALSE;
994: for (j=0; j<size; j++) {
995: if (idx >= owners[j] && idx < owners[j+1]) {
996: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
997: }
998: }
999: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1000: }
1001: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
1003: /* inform other processors of number of messages and max length*/
1004: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1006: /* post receives: */
1007: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1008: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1009: for (i=0; i<nrecvs; i++) {
1010: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1011: }
1013: /* do sends:
1014: 1) starts[i] gives the starting index in svalues for stuff going to
1015: the ith processor
1016: */
1017: PetscMalloc((N+1)*sizeof(int),&svalues);
1018: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1019: PetscMalloc((size+1)*sizeof(int),&starts);
1020: starts[0] = 0;
1021: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1022: for (i=0; i<N; i++) {
1023: svalues[starts[owner[i]]++] = rows[i];
1024: }
1026: starts[0] = 0;
1027: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1028: count = 0;
1029: for (i=0; i<size; i++) {
1030: if (nprocs[2*i+1]) {
1031: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1032: }
1033: }
1034: PetscFree(starts);
1036: base = owners[rank];
1038: /* wait on receives */
1039: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1040: source = lens + nrecvs;
1041: count = nrecvs; slen = 0;
1042: while (count) {
1043: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1044: /* unpack receives into our local space */
1045: MPI_Get_count(&recv_status,MPI_INT,&n);
1046: source[imdex] = recv_status.MPI_SOURCE;
1047: lens[imdex] = n;
1048: slen += n;
1049: count--;
1050: }
1051: PetscFree(recv_waits);
1052:
1053: /* move the data into the send scatter */
1054: PetscMalloc((slen+1)*sizeof(int),&lrows);
1055: count = 0;
1056: for (i=0; i<nrecvs; i++) {
1057: values = rvalues + i*nmax;
1058: for (j=0; j<lens[i]; j++) {
1059: lrows[count++] = values[j] - base;
1060: }
1061: }
1062: PetscFree(rvalues);
1063: PetscFree(lens);
1064: PetscFree(owner);
1065: PetscFree(nprocs);
1066:
1067: /* actually zap the local rows */
1068: MatZeroRows_MPIRowbs_local(A,slen,lrows,diag);
1069: PetscFree(lrows);
1071: /* wait on sends */
1072: if (nsends) {
1073: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1074: MPI_Waitall(nsends,send_waits,send_status);
1075: PetscFree(send_status);
1076: }
1077: PetscFree(send_waits);
1078: PetscFree(svalues);
1080: return(0);
1081: }
1085: PetscErrorCode MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1086: {
1087: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1088: BSsprow *vs,**rs;
1089: PetscScalar *xv;
1090: PetscReal sum = 0.0;
1092: int *xi,nz,i,j;
1095: if (a->size == 1) {
1096: MatNorm_MPIRowbs_local(mat,type,norm);
1097: } else {
1098: rs = a->A->rows;
1099: if (type == NORM_FROBENIUS) {
1100: for (i=0; i<mat->rmap->n; i++) {
1101: vs = *rs++;
1102: nz = vs->length;
1103: xv = vs->nz;
1104: while (nz--) {
1105: #if defined(PETSC_USE_COMPLEX)
1106: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1107: #else
1108: sum += (*xv)*(*xv); xv++;
1109: #endif
1110: }
1111: }
1112: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
1113: *norm = sqrt(*norm);
1114: } else if (type == NORM_1) { /* max column norm */
1115: PetscReal *tmp,*tmp2;
1116: PetscMalloc(mat->cmap->n*sizeof(PetscReal),&tmp);
1117: PetscMalloc(mat->cmap->n*sizeof(PetscReal),&tmp2);
1118: PetscMemzero(tmp,mat->cmap->n*sizeof(PetscReal));
1119: *norm = 0.0;
1120: for (i=0; i<mat->rmap->n; i++) {
1121: vs = *rs++;
1122: nz = vs->length;
1123: xi = vs->col;
1124: xv = vs->nz;
1125: while (nz--) {
1126: tmp[*xi] += PetscAbsScalar(*xv);
1127: xi++; xv++;
1128: }
1129: }
1130: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
1131: for (j=0; j<mat->cmap->n; j++) {
1132: if (tmp2[j] > *norm) *norm = tmp2[j];
1133: }
1134: PetscFree(tmp);
1135: PetscFree(tmp2);
1136: } else if (type == NORM_INFINITY) { /* max row norm */
1137: PetscReal ntemp = 0.0;
1138: for (i=0; i<mat->rmap->n; i++) {
1139: vs = *rs++;
1140: nz = vs->length;
1141: xv = vs->nz;
1142: sum = 0.0;
1143: while (nz--) {
1144: sum += PetscAbsScalar(*xv); xv++;
1145: }
1146: if (sum > ntemp) ntemp = sum;
1147: }
1148: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);
1149: } else {
1150: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1151: }
1152: }
1153: return(0);
1154: }
1158: PetscErrorCode MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1159: {
1160: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1161: BSprocinfo *bspinfo = bsif->procinfo;
1162: PetscScalar *xxa,*xworka,*yya;
1166: if (!bsif->blocksolveassembly) {
1167: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1168: }
1170: /* Permute and apply diagonal scaling: [ xwork = D^{1/2} * x ] */
1171: if (!bsif->vecs_permscale) {
1172: VecGetArray(bsif->xwork,&xworka);
1173: VecGetArray(xx,&xxa);
1174: BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1175: VecRestoreArray(bsif->xwork,&xworka);
1176: VecRestoreArray(xx,&xxa);
1177: VecPointwiseDivide(xx,bsif->xwork,bsif->diag);
1178: }
1180: VecGetArray(xx,&xxa);
1181: VecGetArray(yy,&yya);
1182: /* Do lower triangular multiplication: [ y = L * xwork ] */
1183: if (bspinfo->single) {
1184: BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1185: } else {
1186: BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1187: }
1188:
1189: /* Do upper triangular multiplication: [ y = y + L^{T} * xwork ] */
1190: if (mat->symmetric) {
1191: if (bspinfo->single){
1192: BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1193: } else {
1194: BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1195: }
1196: }
1197: /* not needed for ILU version since forward does it all */
1198: VecRestoreArray(xx,&xxa);
1199: VecRestoreArray(yy,&yya);
1201: /* Apply diagonal scaling to vector: [ y = D^{1/2} * y ] */
1202: if (!bsif->vecs_permscale) {
1203: VecGetArray(bsif->xwork,&xworka);
1204: VecGetArray(xx,&xxa);
1205: BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1206: VecRestoreArray(bsif->xwork,&xworka);
1207: VecRestoreArray(xx,&xxa);
1208: VecPointwiseDivide(bsif->xwork,yy,bsif->diag);
1209: VecGetArray(bsif->xwork,&xworka);
1210: VecGetArray(yy,&yya);
1211: BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1212: VecRestoreArray(bsif->xwork,&xworka);
1213: VecRestoreArray(yy,&yya);
1214: }
1215: PetscLogFlops(2*bsif->nz - mat->cmap->n);
1217: return(0);
1218: }
1222: PetscErrorCode MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1223: {
1225: PetscScalar one = 1.0;
1228: (*mat->ops->mult)(mat,xx,zz);
1229: VecAXPY(zz,one,yy);
1230: return(0);
1231: }
1235: PetscErrorCode MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1236: {
1237: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1238: PetscReal isend[5],irecv[5];
1242: info->block_size = 1.0;
1243: info->mallocs = (double)mat->reallocs;
1244: isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] = mat->maxnz - mat->nz;
1245: isend[3] = ((PetscObject)A)->mem; isend[4] = info->mallocs;
1247: if (flag == MAT_LOCAL) {
1248: info->nz_used = isend[0];
1249: info->nz_allocated = isend[1];
1250: info->nz_unneeded = isend[2];
1251: info->memory = isend[3];
1252: info->mallocs = isend[4];
1253: } else if (flag == MAT_GLOBAL_MAX) {
1254: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
1255: info->nz_used = irecv[0];
1256: info->nz_allocated = irecv[1];
1257: info->nz_unneeded = irecv[2];
1258: info->memory = irecv[3];
1259: info->mallocs = irecv[4];
1260: } else if (flag == MAT_GLOBAL_SUM) {
1261: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
1262: info->nz_used = irecv[0];
1263: info->nz_allocated = irecv[1];
1264: info->nz_unneeded = irecv[2];
1265: info->memory = irecv[3];
1266: info->mallocs = irecv[4];
1267: }
1268: return(0);
1269: }
1273: PetscErrorCode MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1274: {
1275: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1276: BSsprow **rs = a->A->rows;
1278: int i,n;
1279: PetscScalar *x,zero = 0.0;
1282: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1283: if (!a->blocksolveassembly) {
1284: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1285: }
1287: VecSet(v,zero);
1288: VecGetLocalSize(v,&n);
1289: if (n != mat->rmap->n) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1290: VecGetArray(v,&x);
1291: for (i=0; i<mat->rmap->n; i++) {
1292: x[i] = rs[i]->nz[rs[i]->diag_ind];
1293: }
1294: VecRestoreArray(v,&x);
1295: return(0);
1296: }
1300: PetscErrorCode MatDestroy_MPIRowbs(Mat mat)
1301: {
1302: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1303: BSspmat *A = a->A;
1304: BSsprow *vs;
1306: int i;
1309: #if defined(PETSC_USE_LOG)
1310: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->rmap->N,mat->cmap->N);
1311: #endif
1312: MatStashDestroy_Private(&mat->stash);
1313: if (a->bsmap) {
1314: PetscFree(a->bsmap->vlocal2global);
1315: PetscFree(a->bsmap->vglobal2local);
1316: if (a->bsmap->vglobal2proc) (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1317: PetscFree(a->bsmap);
1318: }
1320: if (A) {
1321: for (i=0; i<mat->rmap->n; i++) {
1322: vs = A->rows[i];
1323: MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1324: }
1325: /* Note: A->map = a->bsmap is freed above */
1326: PetscFree(A->rows);
1327: PetscFree(A);
1328: }
1329: if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1330: if (a->diag) {VecDestroy(a->diag);}
1331: if (a->xwork) {VecDestroy(a->xwork);}
1332: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
1333: if (a->fpA) {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1334: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1335: if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1336: PetscFree(a->imax);
1337: MPI_Comm_free(&(a->comm_mpirowbs));
1338: PetscFree(a);
1340: PetscObjectChangeTypeName((PetscObject)mat,0);
1341: PetscObjectComposeFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C","",PETSC_NULL);
1342: return(0);
1343: }
1347: PetscErrorCode MatSetOption_MPIRowbs(Mat A,MatOption op,PetscTruth flg)
1348: {
1349: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
1353: switch (op) {
1354: case MAT_ROW_ORIENTED:
1355: a->roworiented = flg;
1356: break;
1357: case MAT_NEW_NONZERO_LOCATIONS:
1358: a->nonew = (flg ? 0 : 1);
1359: break;
1360: case MAT_USE_INODES:
1361: a->bs_color_single = (flg ? 0 : 1);
1362: break;
1363: case MAT_NEW_DIAGONALS:
1364: case MAT_NEW_NONZERO_LOCATION_ERR:
1365: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1366: case MAT_USE_HASH_TABLE:
1367: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1368: break;
1369: case MAT_IGNORE_OFF_PROC_ENTRIES:
1370: a->donotstash = flg;
1371: break;
1372: case MAT_KEEP_ZEROED_ROWS:
1373: a->keepzeroedrows = flg;
1374: break;
1375: case MAT_SYMMETRIC:
1376: BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1377: break;
1378: case MAT_STRUCTURALLY_SYMMETRIC:
1379: case MAT_HERMITIAN:
1380: case MAT_SYMMETRY_ETERNAL:
1381: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1382: break;
1383: default:
1384: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1385: break;
1386: }
1387: return(0);
1388: }
1392: PetscErrorCode MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1393: {
1394: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1395: BSspmat *A = mat->A;
1396: BSsprow *rs;
1397:
1399: if (row < AA->rmap->rstart || row >= AA->rmap->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1401: rs = A->rows[row - AA->rmap->rstart];
1402: *nz = rs->length;
1403: if (v) *v = rs->nz;
1404: if (idx) *idx = rs->col;
1405: return(0);
1406: }
1410: PetscErrorCode MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1411: {
1413: return(0);
1414: }
1416: /* ------------------------------------------------------------------ */
1420: PetscErrorCode MatSetUpPreallocation_MPIRowbs(Mat A)
1421: {
1425: MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1426: return(0);
1427: }
1429: /* -------------------------------------------------------------------*/
1430: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1431: MatGetRow_MPIRowbs,
1432: MatRestoreRow_MPIRowbs,
1433: MatMult_MPIRowbs,
1434: /* 4*/ MatMultAdd_MPIRowbs,
1435: MatMult_MPIRowbs,
1436: MatMultAdd_MPIRowbs,
1437: 0,
1438: 0,
1439: 0,
1440: /*10*/ 0,
1441: 0,
1442: 0,
1443: 0,
1444: 0,
1445: /*15*/ MatGetInfo_MPIRowbs,
1446: 0,
1447: MatGetDiagonal_MPIRowbs,
1448: 0,
1449: MatNorm_MPIRowbs,
1450: /*20*/ MatAssemblyBegin_MPIRowbs,
1451: MatAssemblyEnd_MPIRowbs,
1452: 0,
1453: MatSetOption_MPIRowbs,
1454: MatZeroEntries_MPIRowbs,
1455: /*25*/ MatZeroRows_MPIRowbs,
1456: 0,
1457: 0,
1458: 0,
1459: 0,
1460: /*30*/ MatSetUpPreallocation_MPIRowbs,
1461: 0,
1462: 0,
1463: 0,
1464: 0,
1465: /*35*/ 0,
1466: 0,
1467: 0,
1468: 0,
1469: 0,
1470: /*40*/ 0,
1471: MatGetSubMatrices_MPIRowbs,
1472: 0,
1473: 0,
1474: 0,
1475: /*45*/ 0,
1476: MatScale_MPIRowbs,
1477: 0,
1478: 0,
1479: 0,
1480: /*50*/ 0,
1481: 0,
1482: 0,
1483: 0,
1484: 0,
1485: /*55*/ 0,
1486: 0,
1487: 0,
1488: 0,
1489: 0,
1490: /*60*/ MatGetSubMatrix_MPIRowbs,
1491: MatDestroy_MPIRowbs,
1492: MatView_MPIRowbs,
1493: 0,
1494: MatUseScaledForm_MPIRowbs,
1495: /*65*/ MatScaleSystem_MPIRowbs,
1496: MatUnScaleSystem_MPIRowbs,
1497: 0,
1498: 0,
1499: 0,
1500: /*70*/ 0,
1501: 0,
1502: 0,
1503: 0,
1504: 0,
1505: /*75*/ 0,
1506: 0,
1507: 0,
1508: 0,
1509: 0,
1510: /*80*/ 0,
1511: 0,
1512: 0,
1513: 0,
1514: MatLoad_MPIRowbs,
1515: /*85*/ 0,
1516: 0,
1517: 0,
1518: 0,
1519: 0,
1520: /*90*/ 0,
1521: 0,
1522: 0,
1523: 0,
1524: 0,
1525: /*95*/ 0,
1526: 0,
1527: 0,
1528: 0};
1530: /* ------------------------------------------------------------------- */
1535: PetscErrorCode MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[])
1536: {
1540: mat->preallocated = PETSC_TRUE;
1541: MatCreateMPIRowbs_local(mat,nz,nnz);
1542: return(0);
1543: }
1546: /*MC
1547: MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use
1548: with the external package BlockSolve95. If BlockSolve95 is installed (see the manual for instructions
1549: on how to declare the existence of external packages), a matrix type can be constructed which invokes
1550: BlockSolve95 preconditioners and solvers.
1552: Options Database Keys:
1553: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions()
1555: Level: beginner
1557: .seealso: MatCreateMPIRowbs
1558: M*/
1563: PetscErrorCode MatCreate_MPIRowbs(Mat A)
1564: {
1565: Mat_MPIRowbs *a;
1566: BSmapping *bsmap;
1567: BSoff_map *bsoff;
1569: int *offset,m,M;
1570: PetscTruth flg1,flg3;
1571: BSprocinfo *bspinfo;
1572: MPI_Comm comm;
1573:
1575: comm = ((PetscObject)A)->comm;
1577: PetscNewLog(A,Mat_MPIRowbs,&a);
1578: A->data = (void*)a;
1579: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1580: A->factor = 0;
1581: A->mapping = 0;
1582: a->vecs_permscale = PETSC_FALSE;
1583: A->insertmode = NOT_SET_VALUES;
1584: a->blocksolveassembly = 0;
1585: a->keepzeroedrows = PETSC_FALSE;
1587: MPI_Comm_rank(comm,&a->rank);
1588: MPI_Comm_size(comm,&a->size);
1591: PetscMapSetBlockSize(A->rmap,1);
1592: PetscMapSetBlockSize(A->cmap,1);
1593: PetscMapSetUp(A->rmap);
1594: PetscMapSetUp(A->cmap);
1595: m = A->rmap->n;
1596: M = A->rmap->N;
1598: PetscMalloc((A->rmap->n+1)*sizeof(int),&a->imax);
1599: a->reallocs = 0;
1601: /* build cache for off array entries formed */
1602: MatStashCreate_Private(((PetscObject)A)->comm,1,&A->stash);
1603: a->donotstash = PETSC_FALSE;
1605: /* Initialize BlockSolve information */
1606: a->A = 0;
1607: a->pA = 0;
1608: a->comm_pA = 0;
1609: a->fpA = 0;
1610: a->comm_fpA = 0;
1611: a->alpha = 1.0;
1612: a->0;
1613: a->failures = 0;
1614: MPI_Comm_dup(((PetscObject)A)->comm,&(a->comm_mpirowbs));
1615: VecCreateMPI(((PetscObject)A)->comm,A->rmap->n,A->rmap->N,&(a->diag));
1616: VecDuplicate(a->diag,&(a->xwork));
1617: PetscLogObjectParent(A,a->diag); PetscLogObjectParent(A,a->xwork);
1618: PetscLogObjectMemory(A,(A->rmap->n+1)*sizeof(PetscScalar));
1619: bspinfo = BScreate_ctx();CHKERRBS(0);
1620: a->procinfo = bspinfo;
1621: BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1622: BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1623: BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0);
1624: BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1625: BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1626: BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1627: #if defined(PETSC_USE_DEBUG)
1628: BSctx_set_err(bspinfo,1);CHKERRBS(0); /* BS error checking */
1629: #endif
1630: BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1631: #if defined (PETSC_USE_INFO)
1632: PetscOptionsHasName(PETSC_NULL,"-info",&flg1);
1633: if (flg1) {
1634: BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1635: }
1636: #endif
1637: PetscOptionsBegin(((PetscObject)A)->comm,PETSC_NULL,"Options for MPIROWBS matrix","Mat");
1638: PetscOptionsTruth("-pc_factor_factorpointwise","Do not optimize for inodes (slow)",PETSC_NULL,PETSC_FALSE,&flg1,PETSC_NULL);
1639: PetscOptionsTruth("-mat_rowbs_no_inode","Do not optimize for inodes (slow)",PETSC_NULL,PETSC_FALSE,&flg3,PETSC_NULL);
1640: PetscOptionsEnd();
1641: if (flg1 || flg3) {
1642: BSctx_set_si(bspinfo,1);CHKERRBS(0);
1643: } else {
1644: BSctx_set_si(bspinfo,0);CHKERRBS(0);
1645: }
1646: #if defined(PETSC_USE_LOG)
1647: MLOG_INIT(); /* Initialize logging */
1648: #endif
1650: /* Compute global offsets */
1651: offset = &A->rmap->rstart;
1653: PetscNewLog(A,BSmapping,&a->bsmap);
1654: bsmap = a->bsmap;
1655: PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1656: *((int*)bsmap->vlocal2global) = (*offset);
1657: bsmap->flocal2global = BSloc2glob;
1658: bsmap->free_l2g = 0;
1659: PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1660: *((int*)bsmap->vglobal2local) = (*offset);
1661: bsmap->fglobal2local = BSglob2loc;
1662: bsmap->free_g2l = 0;
1663: bsoff = BSmake_off_map(*offset,bspinfo,A->rmap->N);
1664: bsmap->vglobal2proc = (void*)bsoff;
1665: bsmap->fglobal2proc = BSglob2proc;
1666: bsmap->free_g2p = (void(*)(void*)) BSfree_off_map;
1667: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C",
1668: "MatMPIRowbsSetPreallocation_MPIRowbs",
1669: MatMPIRowbsSetPreallocation_MPIRowbs);
1670: PetscObjectChangeTypeName((PetscObject)A,MATMPIROWBS);
1671: return(0);
1672: }
1677: /* @
1678: MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros
1679: per row in the matrix.
1681: Input Parameter:
1682: + mat - matrix
1683: . nz - maximum expected for any row
1684: - nzz - number expected in each row
1686: Note:
1687: This routine is valid only for matrices stored in the MATMPIROWBS
1688: format.
1689: @ */
1690: PetscErrorCode MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[])
1691: {
1692: PetscErrorCode ierr,(*f)(Mat,int,const int[]);
1695: PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);
1696: if (f) {
1697: (*f)(mat,nz,nnz);
1698: }
1699: return(0);
1700: }
1702: /* --------------- extra BlockSolve-specific routines -------------- */
1705: /* @
1706: MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1707: user can then manipulate to alter the default parameters.
1709: Input Parameter:
1710: mat - matrix
1712: Output Parameter:
1713: procinfo - processor information context
1715: Note:
1716: This routine is valid only for matrices stored in the MATMPIROWBS
1717: format.
1718: @ */
1719: PetscErrorCode MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1720: {
1721: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1722: PetscTruth ismpirowbs;
1726: PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1727: if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1728: procinfo = a->procinfo;
1729: return(0);
1730: }
1734: PetscErrorCode MatLoad_MPIRowbs(PetscViewer viewer,const MatType type,Mat *newmat)
1735: {
1736: Mat_MPIRowbs *a;
1737: BSspmat *A;
1738: BSsprow **rs;
1739: Mat mat;
1741: int i,nz,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1742: int header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1743: PetscScalar *vals;
1744: MPI_Comm comm = ((PetscObject)viewer)->comm;
1745: MPI_Status status;
1748: MPI_Comm_size(comm,&size);
1749: MPI_Comm_rank(comm,&rank);
1750: if (!rank) {
1751: PetscViewerBinaryGetDescriptor(viewer,&fd);
1752: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1753: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1754: if (header[3] < 0) {
1755: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1756: }
1757: }
1759: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1760: M = header[1];
1762: /* determine ownership of all rows */
1763: m = M/size + ((M % size) > rank);
1764: PetscMalloc((size+2)*sizeof(int),&rowners);
1765: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1766: rowners[0] = 0;
1767: for (i=2; i<=size; i++) {
1768: rowners[i] += rowners[i-1];
1769: }
1770: rstart = rowners[rank];
1771: rend = rowners[rank+1];
1773: /* distribute row lengths to all processors */
1774: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1775: if (!rank) {
1776: PetscMalloc(M*sizeof(int),&rowlengths);
1777: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1778: PetscMalloc(size*sizeof(int),&sndcounts);
1779: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1780: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1781: PetscFree(sndcounts);
1782: } else {
1783: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1784: }
1786: /* create our matrix */
1787: MatCreate(comm,newmat);
1788: MatSetSizes(*newmat,m,m,M,M);
1789: MatSetType(*newmat,type);
1790: MatMPIRowbsSetPreallocation(*newmat,0,ourlens);
1791: mat = *newmat;
1792: PetscFree(ourlens);
1794: a = (Mat_MPIRowbs*)mat->data;
1795: A = a->A;
1796: rs = A->rows;
1798: if (!rank) {
1799: /* calculate the number of nonzeros on each processor */
1800: PetscMalloc(size*sizeof(int),&procsnz);
1801: PetscMemzero(procsnz,size*sizeof(int));
1802: for (i=0; i<size; i++) {
1803: for (j=rowners[i]; j< rowners[i+1]; j++) {
1804: procsnz[i] += rowlengths[j];
1805: }
1806: }
1807: PetscFree(rowlengths);
1809: /* determine max buffer needed and allocate it */
1810: maxnz = 0;
1811: for (i=0; i<size; i++) {
1812: maxnz = PetscMax(maxnz,procsnz[i]);
1813: }
1814: PetscMalloc(maxnz*sizeof(int),&cols);
1816: /* read in my part of the matrix column indices */
1817: nz = procsnz[0];
1818: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1819:
1820: /* insert it into my part of matrix */
1821: nz = 0;
1822: for (i=0; i<A->num_rows; i++) {
1823: for (j=0; j<a->imax[i]; j++) {
1824: rs[i]->col[j] = cols[nz++];
1825: }
1826: rs[i]->length = a->imax[i];
1827: }
1828: /* read in parts for all other processors */
1829: for (i=1; i<size; i++) {
1830: nz = procsnz[i];
1831: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1832: MPI_Send(cols,nz,MPI_INT,i,((PetscObject)mat)->tag,comm);
1833: }
1834: PetscFree(cols);
1835: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1837: /* read in my part of the matrix numerical values */
1838: nz = procsnz[0];
1839: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1840:
1841: /* insert it into my part of matrix */
1842: nz = 0;
1843: for (i=0; i<A->num_rows; i++) {
1844: for (j=0; j<a->imax[i]; j++) {
1845: rs[i]->nz[j] = vals[nz++];
1846: }
1847: }
1848: /* read in parts for all other processors */
1849: for (i=1; i<size; i++) {
1850: nz = procsnz[i];
1851: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1852: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)mat)->tag,comm);
1853: }
1854: PetscFree(vals);
1855: PetscFree(procsnz);
1856: } else {
1857: /* determine buffer space needed for message */
1858: nz = 0;
1859: for (i=0; i<A->num_rows; i++) {
1860: nz += a->imax[i];
1861: }
1862: PetscMalloc(nz*sizeof(int),&cols);
1864: /* receive message of column indices*/
1865: MPI_Recv(cols,nz,MPI_INT,0,((PetscObject)mat)->tag,comm,&status);
1866: MPI_Get_count(&status,MPI_INT,&maxnz);
1867: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1869: /* insert it into my part of matrix */
1870: nz = 0;
1871: for (i=0; i<A->num_rows; i++) {
1872: for (j=0; j<a->imax[i]; j++) {
1873: rs[i]->col[j] = cols[nz++];
1874: }
1875: rs[i]->length = a->imax[i];
1876: }
1877: PetscFree(cols);
1878: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1880: /* receive message of values*/
1881: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)mat)->tag,comm,&status);
1882: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1883: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1885: /* insert it into my part of matrix */
1886: nz = 0;
1887: for (i=0; i<A->num_rows; i++) {
1888: for (j=0; j<a->imax[i]; j++) {
1889: rs[i]->nz[j] = vals[nz++];
1890: }
1891: rs[i]->length = a->imax[i];
1892: }
1893: PetscFree(vals);
1894: }
1895: PetscFree(rowners);
1896: a->nz = a->maxnz;
1897: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1898: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1899: return(0);
1900: }
1902: /*
1903: Special destroy and view routines for factored matrices
1904: */
1907: static PetscErrorCode MatDestroy_MPIRowbs_Factored(Mat mat)
1908: {
1910: #if defined(PETSC_USE_LOG)
1911: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->rmap->N,mat->cmap->N);
1912: #endif
1913: return(0);
1914: }
1918: static PetscErrorCode MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1919: {
1923: MatView((Mat) mat->data,viewer);
1924: return(0);
1925: }
1929: PetscErrorCode MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat newmat,Mat mat,IS isrow,const MatFactorInfo *info)
1930: {
1931: /* Note: f is not currently used in BlockSolve */
1932: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1934: PetscTruth idn;
1937: if (isrow) {
1938: ISIdentity(isrow,&idn);
1939: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1940: }
1942: if (!mat->symmetric) {
1943: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\
1944: preconditioning with a MATMPIROWBS matrix you must declare it to be \n\
1945: symmetric using the option MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE)");
1946: }
1948: /* If the icc_storage flag wasn't set before the last blocksolveassembly, */
1949: /* we must completely redo the assembly as a different storage format is required. */
1950: if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) {
1951: mat->same_nonzero = PETSC_FALSE;
1952: mbs->blocksolveassembly = 0;
1953: }
1955: if (!mbs->blocksolveassembly) {
1956: BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
1957: BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
1958: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1959: }
1961: /* Copy permuted matrix */
1962: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
1963: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
1965: /* Set up the communication for factorization */
1966: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
1967: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
1969: /*
1970: Create a new Mat structure to hold the "factored" matrix,
1971: not this merely contains a pointer to the original matrix, since
1972: the original matrix contains the factor information.
1973: */
1974: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",((PetscObject)mat)->comm,MatDestroy,MatView);
1976: newmat->data = (void*)mat;
1977: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
1978: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
1979: newmat->ops->view = MatView_MPIRowbs_Factored;
1980: newmat->factor = 1;
1981: newmat->preallocated = PETSC_TRUE;
1982: PetscMapCopy(((PetscObject)mat)->comm,mat->rmap,newmat->rmap);
1983: PetscMapCopy(((PetscObject)mat)->comm,mat->cmap,newmat->cmap);
1985: PetscStrallocpy(MATMPIROWBS,&((PetscObject)newmat)->type_name);
1986: newmat->ops->lufactornumeric = MatLUFactorNumeric_MPIRowbs;
1987: return(0);
1988: }
1992: PetscErrorCode MatILUFactorSymbolic_MPIRowbs(Mat newmat,Mat mat,IS isrow,IS iscol,const MatFactorInfo* info)
1993: {
1994: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1996: PetscTruth idn;
1999: if (info->levels) SETERRQ(PETSC_ERR_SUP,"Blocksolve ILU only supports 0 fill");
2000: if (isrow) {
2001: ISIdentity(isrow,&idn);
2002: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2003: }
2004: if (iscol) {
2005: ISIdentity(iscol,&idn);
2006: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
2007: }
2009: if (!mbs->blocksolveassembly) {
2010: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2011: }
2012:
2013: /* if (mat->symmetric) { */
2014: /* SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */
2015: /* MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */
2016: /* using the option MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE)"); */
2017: /* } */
2019: /* Copy permuted matrix */
2020: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2021: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
2023: /* Set up the communication for factorization */
2024: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2025: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2027: /*
2028: Create a new Mat structure to hold the "factored" matrix,
2029: not this merely contains a pointer to the original matrix, since
2030: the original matrix contains the factor information.
2031: */
2032: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",((PetscObject)mat)->comm,MatDestroy,MatView);
2034: newmat->data = (void*)mat;
2035: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2036: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2037: newmat->ops->view = MatView_MPIRowbs_Factored;
2038: newmat->factor = 1;
2039: newmat->preallocated = PETSC_TRUE;
2041: PetscMapCopy(((PetscObject)mat)->comm,mat->rmap,newmat->rmap);
2042: PetscMapCopy(((PetscObject)mat)->comm,mat->cmap,newmat->cmap);
2044: PetscStrallocpy(MATMPIROWBS,&((PetscObject)newmat)->type_name);
2046: newmat->ops->lufactornumeric = MatLUFactorNumeric_MPIRowbs;
2047: *newfact = newmat;
2048: return(0);
2049: }
2053: /*@C
2054: MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2055: format. This format is intended primarily as an interface for BlockSolve95.
2057: Collective on MPI_Comm
2059: Input Parameters:
2060: + comm - MPI communicator
2061: . m - number of local rows (or PETSC_DECIDE to have calculated)
2062: . M - number of global rows (or PETSC_DECIDE to have calculated)
2063: . nz - number of nonzeros per row (same for all local rows)
2064: - nnz - number of nonzeros per row (possibly different for each row).
2066: Output Parameter:
2067: . newA - the matrix
2069: Notes:
2070: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2071: than it must be used on all processors that share the object for that argument.
2073: The user MUST specify either the local or global matrix dimensions
2074: (possibly both).
2076: Specify the preallocated storage with either nz or nnz (not both). Set
2077: nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2078: allocation.
2080: Notes:
2081: By default, the matrix is assumed to be nonsymmetric; the user can
2082: take advantage of special optimizations for symmetric matrices by calling
2083: $ MatSetOption(mat,MAT_SYMMETRIC,PETSC_TRUE)
2084: $ MatSetOption(mat,MAT_SYMMETRY_ETERNAL,PETSC_TRUE)
2085: BEFORE calling the routine MatAssemblyBegin().
2087: Internally, the MATMPIROWBS format inserts zero elements to the
2088: matrix if necessary, so that nonsymmetric matrices are considered
2089: to be symmetric in terms of their sparsity structure; this format
2090: is required for use of the parallel communication routines within
2091: BlockSolve95. In particular, if the matrix element A[i,j] exists,
2092: then PETSc will internally allocate a 0 value for the element
2093: A[j,i] during MatAssemblyEnd() if the user has not already set
2094: a value for the matrix element A[j,i].
2096: Options Database Keys:
2097: . -mat_rowbs_no_inode - Do not use inodes.
2099: Level: intermediate
2100:
2101: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve
2103: .seealso: MatCreate(), MatSetValues()
2104: @*/
2105: PetscErrorCode MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA)
2106: {
2108:
2110: MatCreate(comm,newA);
2111: MatSetSizes(*newA,m,m,M,M);
2112: MatSetType(*newA,MATMPIROWBS);
2113: MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2114: return(0);
2115: }
2118: /* -------------------------------------------------------------------------*/
2120: #include ../src/mat/impls/aij/seq/aij.h
2121: #include ../src/mat/impls/aij/mpi/mpiaij.h
2125: PetscErrorCode MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
2126: {
2128: int nmax,nstages_local,nstages,i,pos,max_no;
2132: /* Allocate memory to hold all the submatrices */
2133: if (scall != MAT_REUSE_MATRIX) {
2134: PetscMalloc((ismax+1)*sizeof(Mat),submat);
2135: }
2136:
2137: /* Determine the number of stages through which submatrices are done */
2138: nmax = 20*1000000 / (C->cmap->N * sizeof(int));
2139: if (!nmax) nmax = 1;
2140: nstages_local = ismax/nmax + ((ismax % nmax)?1:0);
2142: /* Make sure every processor loops through the nstages */
2143: MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,((PetscObject)C)->comm);
2145: for (i=0,pos=0; i<nstages; i++) {
2146: if (pos+nmax <= ismax) max_no = nmax;
2147: else if (pos == ismax) max_no = 0;
2148: else max_no = ismax-pos;
2149: MatGetSubMatrices_MPIRowbs_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
2150: pos += max_no;
2151: }
2152: return(0);
2153: }
2154: /* -------------------------------------------------------------------------*/
2155: /* for now MatGetSubMatrices_MPIRowbs_Local get MPIAij submatrices of input
2156: matrix and preservs zeroes from structural symetry
2157: */
2160: PetscErrorCode MatGetSubMatrices_MPIRowbs_Local(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
2161: {
2162: Mat_MPIRowbs *c = (Mat_MPIRowbs *)(C->data);
2163: BSspmat *A = c->A;
2164: Mat_SeqAIJ *mat;
2166: const int **irow,**icol,*irow_i;
2167: int *nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
2168: int **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
2169: int nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr;
2170: int **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap;
2171: int **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
2172: int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i;
2173: int *rmap_i,tag0,tag1,tag2,tag3;
2174: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2175: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2176: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2177: MPI_Status *r_status3,*r_status4,*s_status4;
2178: MPI_Comm comm;
2179: FLOAT **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i;
2180: PetscScalar *mat_a;
2181: PetscTruth sorted;
2182: int *onodes1,*olengths1;
2185: comm = ((PetscObject)C)->comm;
2186: tag0 = ((PetscObject)C)->tag;
2187: size = c->size;
2188: rank = c->rank;
2189: m = C->rmap->N;
2190:
2191: /* Get some new tags to keep the communication clean */
2192: PetscObjectGetNewTag((PetscObject)C,&tag1);
2193: PetscObjectGetNewTag((PetscObject)C,&tag2);
2194: PetscObjectGetNewTag((PetscObject)C,&tag3);
2196: /* Check if the col indices are sorted */
2197: for (i=0; i<ismax; i++) {
2198: ISSorted(isrow[i],&sorted);
2199: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2200: ISSorted(iscol[i],&sorted);
2201: /* if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); */
2202: }
2204: PetscMalloc((2*ismax+1)*(sizeof(int*)+ sizeof(int)) + (m+1)*sizeof(int),&irow);
2205: icol = irow + ismax;
2206: nrow = (int*)(icol + ismax);
2207: ncol = nrow + ismax;
2208: rtable = ncol + ismax;
2210: for (i=0; i<ismax; i++) {
2211: ISGetIndices(isrow[i],&irow[i]);
2212: ISGetIndices(iscol[i],&icol[i]);
2213: ISGetLocalSize(isrow[i],&nrow[i]);
2214: ISGetLocalSize(iscol[i],&ncol[i]);
2215: }
2217: /* Create hash table for the mapping :row -> proc*/
2218: for (i=0,j=0; i<size; i++) {
2219: jmax = C->rmap->range[i+1];
2220: for (; j<jmax; j++) {
2221: rtable[j] = i;
2222: }
2223: }
2225: /* evaluate communication - mesg to who, length of mesg, and buffer space
2226: required. Based on this, buffers are allocated, and data copied into them*/
2227: PetscMalloc(size*4*sizeof(int),&w1); /* mesg size */
2228: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2229: w3 = w2 + size; /* no of IS that needs to be sent to proc i */
2230: w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */
2231: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector*/
2232: for (i=0; i<ismax; i++) {
2233: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector*/
2234: jmax = nrow[i];
2235: irow_i = irow[i];
2236: for (j=0; j<jmax; j++) {
2237: row = irow_i[j];
2238: proc = rtable[row];
2239: w4[proc]++;
2240: }
2241: for (j=0; j<size; j++) {
2242: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
2243: }
2244: }
2245:
2246: nrqs = 0; /* no of outgoing messages */
2247: msz = 0; /* total mesg length (for all procs) */
2248: w1[rank] = 0; /* no mesg sent to self */
2249: w3[rank] = 0;
2250: for (i=0; i<size; i++) {
2251: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2252: }
2253: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2254: for (i=0,j=0; i<size; i++) {
2255: if (w1[i]) { pa[j] = i; j++; }
2256: }
2258: /* Each message would have a header = 1 + 2*(no of IS) + data */
2259: for (i=0; i<nrqs; i++) {
2260: j = pa[i];
2261: w1[j] += w2[j] + 2* w3[j];
2262: msz += w1[j];
2263: }
2265: /* Determine the number of messages to expect, their lengths, from from-ids */
2266: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2267: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2269: /* Now post the Irecvs corresponding to these messages */
2270: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2271:
2272: PetscFree(onodes1);
2273: PetscFree(olengths1);
2274:
2275: /* Allocate Memory for outgoing messages */
2276: PetscMalloc(2*size*sizeof(int*) + 2*msz*sizeof(int) + size*sizeof(int),&sbuf1);
2277: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2278: PetscMemzero(sbuf1,2*size*sizeof(int*));
2279: /* allocate memory for outgoing data + buf to receive the first reply */
2280: tmp = (int*)(ptr + size);
2281: ctr = tmp + 2*msz;
2283: {
2284: int *iptr = tmp,ict = 0;
2285: for (i=0; i<nrqs; i++) {
2286: j = pa[i];
2287: iptr += ict;
2288: sbuf1[j] = iptr;
2289: ict = w1[j];
2290: }
2291: }
2293: /* Form the outgoing messages */
2294: /* Initialize the header space */
2295: for (i=0; i<nrqs; i++) {
2296: j = pa[i];
2297: sbuf1[j][0] = 0;
2298: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
2299: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
2300: }
2301:
2302: /* Parse the isrow and copy data into outbuf */
2303: for (i=0; i<ismax; i++) {
2304: PetscMemzero(ctr,size*sizeof(int));
2305: irow_i = irow[i];
2306: jmax = nrow[i];
2307: for (j=0; j<jmax; j++) { /* parse the indices of each IS */
2308: row = irow_i[j];
2309: proc = rtable[row];
2310: if (proc != rank) { /* copy to the outgoing buf*/
2311: ctr[proc]++;
2312: *ptr[proc] = row;
2313: ptr[proc]++;
2314: }
2315: }
2316: /* Update the headers for the current IS */
2317: for (j=0; j<size; j++) { /* Can Optimise this loop too */
2318: if ((ctr_j = ctr[j])) {
2319: sbuf1_j = sbuf1[j];
2320: k = ++sbuf1_j[0];
2321: sbuf1_j[2*k] = ctr_j;
2322: sbuf1_j[2*k-1] = i;
2323: }
2324: }
2325: }
2327: /* Now post the sends */
2328: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2329: for (i=0; i<nrqs; ++i) {
2330: j = pa[i];
2331: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2332: }
2334: /* Post Receives to capture the buffer size */
2335: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2336: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2337: rbuf2[0] = tmp + msz;
2338: for (i=1; i<nrqs; ++i) {
2339: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2340: }
2341: for (i=0; i<nrqs; ++i) {
2342: j = pa[i];
2343: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2344: }
2346: /* Send to other procs the buf size they should allocate */
2347:
2349: /* Receive messages*/
2350: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2351: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2352: PetscMalloc(2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*),&sbuf2);
2353: req_size = (int*)(sbuf2 + nrqr);
2354: req_source = req_size + nrqr;
2355:
2356: {
2357: BSsprow **sAi = A->rows;
2358: int id,rstart = C->rmap->rstart;
2359: int *sbuf2_i;
2361: for (i=0; i<nrqr; ++i) {
2362: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2363: req_size[idx] = 0;
2364: rbuf1_i = rbuf1[idx];
2365: start = 2*rbuf1_i[0] + 1;
2366: MPI_Get_count(r_status1+i,MPI_INT,&end);
2367: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2368: sbuf2_i = sbuf2[idx];
2369: for (j=start; j<end; j++) {
2370: id = rbuf1_i[j] - rstart;
2371: ncols = (sAi[id])->length;
2372: sbuf2_i[j] = ncols;
2373: req_size[idx] += ncols;
2374: }
2375: req_source[idx] = r_status1[i].MPI_SOURCE;
2376: /* form the header */
2377: sbuf2_i[0] = req_size[idx];
2378: for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
2379: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2380: }
2381: }
2382: PetscFree(r_status1);
2383: PetscFree(r_waits1);
2385: /* recv buffer sizes */
2386: /* Receive messages*/
2387:
2388: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2389: PetscMalloc((nrqs+1)*sizeof(FLOAT *),&rbuf4);
2390: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2391: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2392: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
2394: for (i=0; i<nrqs; ++i) {
2395: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2396: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2397: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2398: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2399: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2400: }
2401: PetscFree(r_status2);
2402: PetscFree(r_waits2);
2403:
2404: /* Wait on sends1 and sends2 */
2405: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2406: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
2408: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
2409: if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
2410: PetscFree(s_status1);
2411: PetscFree(s_status2);
2412: PetscFree(s_waits1);
2413: PetscFree(s_waits2);
2415: /* Now allocate buffers for a->j, and send them off */
2416: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);
2417: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2418: PetscMalloc((j+1)*sizeof(int),&sbuf_aj[0]);
2419: for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
2420:
2421: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
2422: {
2423: BSsprow *brow;
2424: int *Acol;
2425: int rstart = C->rmap->rstart;
2427: for (i=0; i<nrqr; i++) {
2428: rbuf1_i = rbuf1[i];
2429: sbuf_aj_i = sbuf_aj[i];
2430: ct1 = 2*rbuf1_i[0] + 1;
2431: ct2 = 0;
2432: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2433: kmax = rbuf1[i][2*j];
2434: for (k=0; k<kmax; k++,ct1++) {
2435: brow = A->rows[rbuf1_i[ct1] - rstart];
2436: ncols = brow->length;
2437: Acol = brow->col;
2438: /* load the column indices for this row into cols*/
2439: cols = sbuf_aj_i + ct2;
2440: PetscMemcpy(cols,Acol,ncols*sizeof(int));
2441: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with
2442: mappings?? */
2443: ct2 += ncols;
2444: }
2445: }
2446: MPI_Isend(sbuf_aj_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
2447: }
2448: }
2449: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
2450: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
2452: /* Allocate buffers for a->a, and send them off */
2453: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);
2454: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2455: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf_aa[0]);
2456: for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
2457:
2458: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
2459: {
2460: BSsprow *brow;
2461: FLOAT *Aval;
2462: int rstart = C->rmap->rstart;
2463:
2464: for (i=0; i<nrqr; i++) {
2465: rbuf1_i = rbuf1[i];
2466: sbuf_aa_i = sbuf_aa[i];
2467: ct1 = 2*rbuf1_i[0]+1;
2468: ct2 = 0;
2469: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2470: kmax = rbuf1_i[2*j];
2471: for (k=0; k<kmax; k++,ct1++) {
2472: brow = A->rows[rbuf1_i[ct1] - rstart];
2473: ncols = brow->length;
2474: Aval = brow->nz;
2475: /* load the column values for this row into vals*/
2476: vals = sbuf_aa_i+ct2;
2477: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
2478: ct2 += ncols;
2479: }
2480: }
2481: MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
2482: }
2483: }
2484: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
2485: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
2486: PetscFree(rbuf1);
2488: /* Form the matrix */
2489: /* create col map */
2490: {
2491: int *icol_i;
2492:
2493: PetscMalloc((1+ismax)*sizeof(int*)+ ismax*C->cmap->N*sizeof(int),&cmap);
2494: cmap[0] = (int*)(cmap + ismax);
2495: PetscMemzero(cmap[0],(1+ismax*C->cmap->N)*sizeof(int));
2496: for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->cmap->N; }
2497: for (i=0; i<ismax; i++) {
2498: jmax = ncol[i];
2499: icol_i = icol[i];
2500: cmap_i = cmap[i];
2501: for (j=0; j<jmax; j++) {
2502: cmap_i[icol_i[j]] = j+1;
2503: }
2504: }
2505: }
2507: /* Create lens which is required for MatCreate... */
2508: for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
2509: PetscMalloc((1+ismax)*sizeof(int*)+ j*sizeof(int),&lens);
2510: lens[0] = (int*)(lens + ismax);
2511: PetscMemzero(lens[0],j*sizeof(int));
2512: for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
2513:
2514: /* Update lens from local data */
2515: { BSsprow *Arow;
2516: for (i=0; i<ismax; i++) {
2517: jmax = nrow[i];
2518: cmap_i = cmap[i];
2519: irow_i = irow[i];
2520: lens_i = lens[i];
2521: for (j=0; j<jmax; j++) {
2522: row = irow_i[j];
2523: proc = rtable[row];
2524: if (proc == rank) {
2525: Arow=A->rows[row-C->rmap->rstart];
2526: ncols=Arow->length;
2527: cols=Arow->col;
2528: for (k=0; k<ncols; k++) {
2529: if (cmap_i[cols[k]]) { lens_i[j]++;}
2530: }
2531: }
2532: }
2533: }
2534: }
2535:
2536: /* Create row map*/
2537: PetscMalloc((1+ismax)*sizeof(int*)+ ismax*C->rmap->N*sizeof(int),&rmap);
2538: rmap[0] = (int*)(rmap + ismax);
2539: PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(int));
2540: for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap->N;}
2541: for (i=0; i<ismax; i++) {
2542: rmap_i = rmap[i];
2543: irow_i = irow[i];
2544: jmax = nrow[i];
2545: for (j=0; j<jmax; j++) {
2546: rmap_i[irow_i[j]] = j;
2547: }
2548: }
2549:
2550: /* Update lens from offproc data */
2551: {
2552: int *rbuf2_i,*rbuf3_i,*sbuf1_i;
2554: for (tmp2=0; tmp2<nrqs; tmp2++) {
2555: MPI_Waitany(nrqs,r_waits3,&i,r_status3+tmp2);
2556: idx = pa[i];
2557: sbuf1_i = sbuf1[idx];
2558: jmax = sbuf1_i[0];
2559: ct1 = 2*jmax+1;
2560: ct2 = 0;
2561: rbuf2_i = rbuf2[i];
2562: rbuf3_i = rbuf3[i];
2563: for (j=1; j<=jmax; j++) {
2564: is_no = sbuf1_i[2*j-1];
2565: max1 = sbuf1_i[2*j];
2566: lens_i = lens[is_no];
2567: cmap_i = cmap[is_no];
2568: rmap_i = rmap[is_no];
2569: for (k=0; k<max1; k++,ct1++) {
2570: row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
2571: max2 = rbuf2_i[ct1];
2572: for (l=0; l<max2; l++,ct2++) {
2573: if (cmap_i[rbuf3_i[ct2]]) {
2574: lens_i[row]++;
2575: }
2576: }
2577: }
2578: }
2579: }
2580: }
2581: PetscFree(r_status3);
2582: PetscFree(r_waits3);
2583: if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
2584: PetscFree(s_status3);
2585: PetscFree(s_waits3);
2587: /* Create the submatrices */
2588: if (scall == MAT_REUSE_MATRIX) {
2589: PetscTruth same;
2590:
2591: /*
2592: Assumes new rows are same length as the old rows,hence bug!
2593: */
2594: for (i=0; i<ismax; i++) {
2595: PetscTypeCompare((PetscObject)(submats[i]),MATSEQAIJ,&same);
2596: if (!same) {
2597: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
2598: }
2599: mat = (Mat_SeqAIJ*)(submats[i]->data);
2600: if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) {
2601: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
2602: }
2603: PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(int),&same);
2604: if (!same) {
2605: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
2606: }
2607: /* Initial matrix as if empty */
2608: PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(int));
2609: submats[i]->factor = C->factor;
2610: }
2611: } else {
2612: for (i=0; i<ismax; i++) {
2613: /* Here we want to explicitly generate SeqAIJ matrices */
2614: MatCreate(PETSC_COMM_SELF,submats+i);
2615: MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);
2616: MatSetType(submats[i],MATSEQAIJ);
2617: MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
2618: }
2619: }
2621: /* Assemble the matrices */
2622: /* First assemble the local rows */
2623: {
2624: int ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
2625: PetscScalar *imat_a;
2626: BSsprow *Arow;
2627:
2628: for (i=0; i<ismax; i++) {
2629: mat = (Mat_SeqAIJ*)submats[i]->data;
2630: imat_ilen = mat->ilen;
2631: imat_j = mat->j;
2632: imat_i = mat->i;
2633: imat_a = mat->a;
2634: cmap_i = cmap[i];
2635: rmap_i = rmap[i];
2636: irow_i = irow[i];
2637: jmax = nrow[i];
2638: for (j=0; j<jmax; j++) {
2639: row = irow_i[j];
2640: proc = rtable[row];
2641: if (proc == rank) {
2642: old_row = row;
2643: row = rmap_i[row];
2644: ilen_row = imat_ilen[row];
2645:
2646: Arow=A->rows[old_row-C->rmap->rstart];
2647: ncols=Arow->length;
2648: cols=Arow->col;
2649: vals=Arow->nz;
2650:
2651: mat_i = imat_i[row];
2652: mat_a = imat_a + mat_i;
2653: mat_j = imat_j + mat_i;
2654: for (k=0; k<ncols; k++) {
2655: if ((tcol = cmap_i[cols[k]])) {
2656: *mat_j++ = tcol - 1;
2657: *mat_a++ = (PetscScalar)vals[k];
2658: ilen_row++;
2659: }
2660: }
2661: imat_ilen[row] = ilen_row;
2662: }
2663: }
2664: }
2665: }
2667: /* Now assemble the off proc rows*/
2668: {
2669: int *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
2670: int *imat_j,*imat_i;
2671: PetscScalar *imat_a;
2672: FLOAT *rbuf4_i;
2673:
2674: for (tmp2=0; tmp2<nrqs; tmp2++) {
2675: MPI_Waitany(nrqs,r_waits4,&i,r_status4+tmp2);
2676: idx = pa[i];
2677: sbuf1_i = sbuf1[idx];
2678: jmax = sbuf1_i[0];
2679: ct1 = 2*jmax + 1;
2680: ct2 = 0;
2681: rbuf2_i = rbuf2[i];
2682: rbuf3_i = rbuf3[i];
2683: rbuf4_i = rbuf4[i];
2684: for (j=1; j<=jmax; j++) {
2685: is_no = sbuf1_i[2*j-1];
2686: rmap_i = rmap[is_no];
2687: cmap_i = cmap[is_no];
2688: mat = (Mat_SeqAIJ*)submats[is_no]->data;
2689: imat_ilen = mat->ilen;
2690: imat_j = mat->j;
2691: imat_i = mat->i;
2692: imat_a = mat->a;
2693: max1 = sbuf1_i[2*j];
2694: for (k=0; k<max1; k++,ct1++) {
2695: row = sbuf1_i[ct1];
2696: row = rmap_i[row];
2697: ilen = imat_ilen[row];
2698: mat_i = imat_i[row];
2699: mat_a = imat_a + mat_i;
2700: mat_j = imat_j + mat_i;
2701: max2 = rbuf2_i[ct1];
2702: for (l=0; l<max2; l++,ct2++) {
2703: if ((tcol = cmap_i[rbuf3_i[ct2]])) {
2704: *mat_j++ = tcol - 1;
2705: *mat_a++ = (PetscScalar)rbuf4_i[ct2];
2706: ilen++;
2707: }
2708: }
2709: imat_ilen[row] = ilen;
2710: }
2711: }
2712: }
2713: }
2714: PetscFree(r_status4);
2715: PetscFree(r_waits4);
2716: if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
2717: PetscFree(s_waits4);
2718: PetscFree(s_status4);
2720: /* Restore the indices */
2721: for (i=0; i<ismax; i++) {
2722: ISRestoreIndices(isrow[i],irow+i);
2723: ISRestoreIndices(iscol[i],icol+i);
2724: }
2726: /* Destroy allocated memory */
2727: PetscFree(irow);
2728: PetscFree(w1);
2729: PetscFree(pa);
2731: PetscFree(sbuf1);
2732: PetscFree(rbuf2);
2733: for (i=0; i<nrqr; ++i) {
2734: PetscFree(sbuf2[i]);
2735: }
2736: for (i=0; i<nrqs; ++i) {
2737: PetscFree(rbuf3[i]);
2738: PetscFree(rbuf4[i]);
2739: }
2741: PetscFree(sbuf2);
2742: PetscFree(rbuf3);
2743: PetscFree(rbuf4);
2744: PetscFree(sbuf_aj[0]);
2745: PetscFree(sbuf_aj);
2746: PetscFree(sbuf_aa[0]);
2747: PetscFree(sbuf_aa);
2748:
2749: PetscFree(cmap);
2750: PetscFree(rmap);
2751: PetscFree(lens);
2753: for (i=0; i<ismax; i++) {
2754: MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
2755: MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
2756: }
2757: return(0);
2758: }
2760: /*
2761: can be optimized by send only non-zeroes in iscol IS -
2762: so prebuild submatrix on sending side including A,B partitioning
2763: */
2766: #include ../src/vec/is/impls/general/general.h
2767: PetscErrorCode MatGetSubMatrix_MPIRowbs(Mat C,IS isrow,IS iscol,int csize,MatReuse scall,Mat *submat)
2768: {
2769: Mat_MPIRowbs *c = (Mat_MPIRowbs*)C->data;
2770: BSspmat *A = c->A;
2771: BSsprow *Arow;
2772: Mat_SeqAIJ *matA,*matB; /* on prac , off proc part of submat */
2773: Mat_MPIAIJ *mat; /* submat->data */
2775: const int *irow,*icol;
2776: int nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3;
2777: int *w1,*w2,*pa,nrqs,nrqr,msz,row_t;
2778: int i,j,k,l,len,jmax,proc,idx;
2779: int **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3;
2780: FLOAT **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */
2782: int *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend;
2783: int *req_source;
2784: int ncols_t;
2785:
2786:
2787: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2788: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2789:
2790: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2791: MPI_Status *r_status3,*r_status4,*s_status4;
2792: MPI_Comm comm;
2796: comm = ((PetscObject)C)->comm;
2797: tag0 = ((PetscObject)C)->tag;
2798: size = c->size;
2799: rank = c->rank;
2801: if (size==1) {
2802: if (scall == MAT_REUSE_MATRIX) {
2803: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);
2804: return(0);
2805: } else {
2806: Mat *newsubmat;
2807:
2808: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);
2809: *submat=*newsubmat;
2810: ierr=PetscFree(newsubmat);
2811: return(0);
2812: }
2813: }
2814:
2815: /* Get some new tags to keep the communication clean */
2816: PetscObjectGetNewTag((PetscObject)C,&tag1);
2817: PetscObjectGetNewTag((PetscObject)C,&tag2);
2818: PetscObjectGetNewTag((PetscObject)C,&tag3);
2820: /* Check if the col indices are sorted */
2821: {PetscTruth sorted;
2822: ISSorted(isrow,&sorted);
2823: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2824: ISSorted(iscol,&sorted);
2825: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
2826: }
2827:
2828: ISGetIndices(isrow,&irow);
2829: ISGetIndices(iscol,&icol);
2830: ISGetLocalSize(isrow,&nrow);
2831: ISGetLocalSize(iscol,&ncol);
2832:
2833: if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow");
2834: if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol");
2835:
2836:
2837: PetscMalloc((C->rmap->N+1)*sizeof(int),&rtable);
2838: /* Create hash table for the mapping :row -> proc*/
2839: for (i=0,j=0; i<size; i++) {
2840: jmax = C->rmap->range[i+1];
2841: for (; j<jmax; j++) {
2842: rtable[j] = i;
2843: }
2844: }
2846: /* evaluate communication - mesg to who, length of mesg, and buffer space
2847: required. Based on this, buffers are allocated, and data copied into them*/
2848: PetscMalloc(size*2*sizeof(int),&w1); /* mesg size */
2849: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2850: PetscMemzero(w1,size*2*sizeof(int)); /* initialize work vector*/
2851: for (j=0; j<nrow; j++) {
2852: row_t = irow[j];
2853: proc = rtable[row_t];
2854: w1[proc]++;
2855: }
2856: nrqs = 0; /* no of outgoing messages */
2857: msz = 0; /* total mesg length (for all procs) */
2858: w1[rank] = 0; /* no mesg sent to self */
2859: for (i=0; i<size; i++) {
2860: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2861: }
2862:
2863: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2864: for (i=0,j=0; i<size; i++) {
2865: if (w1[i]) {
2866: pa[j++] = i;
2867: w1[i]++; /* header for return data */
2868: msz+=w1[i];
2869: }
2870: }
2871:
2872: {int *onodes1,*olengths1;
2873: /* Determine the number of messages to expect, their lengths, from from-ids */
2874: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2875: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2876: /* Now post the Irecvs corresponding to these messages */
2877: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2878: PetscFree(onodes1);
2879: PetscFree(olengths1);
2880: }
2881:
2882: { int **ptr,*iptr,*tmp;
2883: /* Allocate Memory for outgoing messages */
2884: PetscMalloc(2*size*sizeof(int*) + msz*sizeof(int),&sbuf1);
2885: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2886: PetscMemzero(sbuf1,2*size*sizeof(int*));
2887: /* allocate memory for outgoing data + buf to receive the first reply */
2888: tmp = (int*)(ptr + size);
2890: for (i=0,iptr=tmp; i<nrqs; i++) {
2891: j = pa[i];
2892: sbuf1[j] = iptr;
2893: iptr += w1[j];
2894: }
2896: /* Form the outgoing messages */
2897: for (i=0; i<nrqs; i++) {
2898: j = pa[i];
2899: sbuf1[j][0] = 0; /*header */
2900: ptr[j] = sbuf1[j] + 1;
2901: }
2902:
2903: /* Parse the isrow and copy data into outbuf */
2904: for (j=0; j<nrow; j++) {
2905: row_t = irow[j];
2906: proc = rtable[row_t];
2907: if (proc != rank) { /* copy to the outgoing buf*/
2908: sbuf1[proc][0]++;
2909: *ptr[proc] = row_t;
2910: ptr[proc]++;
2911: }
2912: }
2913: } /* block */
2915: /* Now post the sends */
2916:
2917: /* structure of sbuf1[i]/rbuf1[i] : 1 (num of rows) + nrow-local rows (nuberes
2918: * of requested rows)*/
2920: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2921: for (i=0; i<nrqs; ++i) {
2922: j = pa[i];
2923: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2924: }
2926: /* Post Receives to capture the buffer size */
2927: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2928: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2929: PetscMalloc(msz*sizeof(int)+1,&(rbuf2[0]));
2930: for (i=1; i<nrqs; ++i) {
2931: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2932: }
2933: for (i=0; i<nrqs; ++i) {
2934: j = pa[i];
2935: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2936: }
2938: /* Send to other procs the buf size they should allocate */
2939: /* structure of sbuf2[i]/rbuf2[i]: 1 (total size to allocate) + nrow-locrow
2940: * (row sizes) */
2942: /* Receive messages*/
2943: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2944: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2945: PetscMalloc(2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*),&sbuf2);
2946: req_size = (int*)(sbuf2 + nrqr);
2947: req_source = req_size + nrqr;
2948:
2949: {
2950: BSsprow **sAi = A->rows;
2951: int id,rstart = C->rmap->rstart;
2952: int *sbuf2_i,*rbuf1_i,end;
2954: for (i=0; i<nrqr; ++i) {
2955: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2956: req_size[idx] = 0;
2957: rbuf1_i = rbuf1[idx];
2958: MPI_Get_count(r_status1+i,MPI_INT,&end);
2959: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2960: sbuf2_i = sbuf2[idx];
2961: for (j=1; j<end; j++) {
2962: id = rbuf1_i[j] - rstart;
2963: ncols_t = (sAi[id])->length;
2964: sbuf2_i[j] = ncols_t;
2965: req_size[idx] += ncols_t;
2966: }
2967: req_source[idx] = r_status1[i].MPI_SOURCE;
2968: /* form the header */
2969: sbuf2_i[0] = req_size[idx];
2970: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2971: }
2972: }
2973: PetscFree(r_status1);
2974: PetscFree(r_waits1);
2976: /* recv buffer sizes */
2977: /* Receive messages*/
2978:
2979: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2980: PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);
2981: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2982: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2983: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
2985: for (i=0; i<nrqs; ++i) {
2986: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2987: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2988: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2989: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2990: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2991: }
2992: PetscFree(r_status2);
2993: PetscFree(r_waits2);
2994:
2995: /* Wait on sends1 and sends2 */
2996: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2997: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
2999: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
3000: if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
3001: PetscFree(s_status1);
3002: PetscFree(s_status2);
3003: PetscFree(s_waits1);
3004: PetscFree(s_waits2);
3006: /* Now allocate buffers for a->j, and send them off */
3007: /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp.
3008: * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */
3009:
3010: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);
3011: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3012: PetscMalloc((j+1)*sizeof(int),&sbuf3[0]);
3013: for (i=1; i<nrqr; i++) sbuf3[i] = sbuf3[i-1] + req_size[i-1];
3014:
3015: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
3016: {
3017: int *Acol,*rbuf1_i,*sbuf3_i,rqrow,noutcols,kmax,*cols,ncols;
3018: int rstart = C->rmap->rstart;
3020: for (i=0; i<nrqr; i++) {
3021: rbuf1_i = rbuf1[i];
3022: sbuf3_i = sbuf3[i];
3023: noutcols = 0;
3024: kmax = rbuf1_i[0]; /* num. of req. rows */
3025: for (k=0,rqrow=1; k<kmax; k++,rqrow++) {
3026: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3027: ncols = Arow->length;
3028: Acol = Arow->col;
3029: /* load the column indices for this row into cols*/
3030: cols = sbuf3_i + noutcols;
3031: PetscMemcpy(cols,Acol,ncols*sizeof(int));
3032: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with mappings?? */
3033: noutcols += ncols;
3034: }
3035: MPI_Isend(sbuf3_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
3036: }
3037: }
3038: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
3039: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
3041: /* Allocate buffers for a->a, and send them off */
3042: /* can be optimized by conect with previous block */
3043: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);
3044: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3045: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf4[0]);
3046: for (i=1; i<nrqr; i++) sbuf4[i] = sbuf4[i-1] + req_size[i-1];
3047:
3048: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
3049: {
3050: FLOAT *Aval,*vals,*sbuf4_i;
3051: int rstart = C->rmap->rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols;
3052:
3053:
3054: for (i=0; i<nrqr; i++) {
3055: rbuf1_i = rbuf1[i];
3056: sbuf4_i = sbuf4[i];
3057: rqrow = 1;
3058: noutvals = 0;
3059: kmax = rbuf1_i[0]; /* num of req. rows */
3060: for (k=0; k<kmax; k++,rqrow++) {
3061: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3062: ncols = Arow->length;
3063: Aval = Arow->nz;
3064: /* load the column values for this row into vals*/
3065: vals = sbuf4_i+noutvals;
3066: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
3067: noutvals += ncols;
3068: }
3069: MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
3070: }
3071: }
3072: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
3073: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
3074: PetscFree(rbuf1);
3076: /* Form the matrix */
3078: /* create col map */
3079: PetscMalloc(C->cmap->N*sizeof(int),&cmap);
3080: PetscMemzero(cmap,C->cmap->N*sizeof(int));
3081: for (j=0; j<ncol; j++) {
3082: cmap[icol[j]] = j+1;
3083: }
3084:
3085: /* Create row map / maybe I will need global rowmap but here is local rowmap*/
3086: PetscMalloc(C->rmap->N*sizeof(int),&rmap);
3087: PetscMemzero(rmap,C->rmap->N*sizeof(int));
3088: for (j=0; j<nrow; j++) {
3089: rmap[irow[j]] = j;
3090: }
3092: /*
3093: Determine the number of non-zeros in the diagonal and off-diagonal
3094: portions of the matrix in order to do correct preallocation
3095: */
3097: /* first get start and end of "diagonal" columns */
3098: if (csize == PETSC_DECIDE) {
3099: nlocal = ncol/size + ((ncol % size) > rank);
3100: } else {
3101: nlocal = csize;
3102: }
3103: {
3104: int ncols,*cols,olen,dlen,thecol;
3105: int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx;
3106:
3107: MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);
3108: cstart = cend - nlocal;
3109: if (rank == size - 1 && cend != ncol) {
3110: SETERRQ(PETSC_ERR_ARG_SIZ,"Local column sizes do not add up to total number of columns");
3111: }
3113: PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);
3114: o_nz = d_nz + nrow;
3115:
3116: /* Update lens from local data */
3117: for (j=0; j<nrow; j++) {
3118: row = irow[j];
3119: proc = rtable[row];
3120: if (proc == rank) {
3121: Arow=A->rows[row-C->rmap->rstart];
3122: ncols=Arow->length;
3123: cols=Arow->col;
3124: olen=dlen=0;
3125: for (k=0; k<ncols; k++) {
3126: if ((thecol=cmap[cols[k]])) {
3127: if (cstart<thecol && thecol<=cend) dlen++; /* thecol is from 1 */
3128: else olen++;
3129: }
3130: }
3131: o_nz[j]=olen;
3132: d_nz[j]=dlen;
3133: } else d_nz[j]=o_nz[j]=0;
3134: }
3135: /* Update lens from offproc data and done waits */
3136: /* this will be much simplier after sending only appropriate columns */
3137: for (j=0; j<nrqs;j++) {
3138: MPI_Waitany(nrqs,r_waits3,&i,r_status3+j);
3139: proc = pa[i];
3140: sbuf1_i = sbuf1[proc];
3141: cidx = 0;
3142: rbuf2_i = rbuf2[i];
3143: rbuf3_i = rbuf3[i];
3144: kmax = sbuf1_i[0]; /*num of rq. rows*/
3145: for (k=1; k<=kmax; k++) {
3146: row = rmap[sbuf1_i[k]]; /* the val in the new matrix to be */
3147: for (l=0; l<rbuf2_i[k]; l++,cidx++) {
3148: if ((thecol=cmap[rbuf3_i[cidx]])) {
3149:
3150: if (cstart<thecol && thecol<=cend) d_nz[row]++; /* thecol is from 1 */
3151: else o_nz[row]++;
3152: }
3153: }
3154: }
3155: }
3156: }
3157: PetscFree(r_status3);
3158: PetscFree(r_waits3);
3159: if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
3160: PetscFree(s_status3);
3161: PetscFree(s_waits3);
3163: if (scall == MAT_INITIAL_MATRIX) {
3164: MatCreate(comm,submat);
3165: MatSetSizes(*submat,nrow,nlocal,PETSC_DECIDE,ncol);
3166: MatSetType(*submat,((PetscObject)C)->type_name);
3167: MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);
3168: mat=(Mat_MPIAIJ *)((*submat)->data);
3169: matA=(Mat_SeqAIJ *)(mat->A->data);
3170: matB=(Mat_SeqAIJ *)(mat->B->data);
3171:
3172: } else {
3173: PetscTruth same;
3174: /* folowing code can be optionaly dropped for debuged versions of users
3175: * program, but I don't know PETSc option which can switch off such safety
3176: * tests - in a same way counting of o_nz,d_nz can be droped for REUSE
3177: * matrix */
3178:
3179: PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same);
3180: if (!same) {
3181: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
3182: }
3183: if (((*submat)->rmap->n != nrow) || ((*submat)->cmap->N != ncol)) {
3184: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
3185: }
3186: mat=(Mat_MPIAIJ *)((*submat)->data);
3187: matA=(Mat_SeqAIJ *)(mat->A->data);
3188: matB=(Mat_SeqAIJ *)(mat->B->data);
3189: PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);
3190: if (!same) {
3191: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3192: }
3193: PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);
3194: if (!same) {
3195: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3196: }
3197: /* Initial matrix as if empty */
3198: PetscMemzero(matA->ilen,nrow*sizeof(int));
3199: PetscMemzero(matB->ilen,nrow*sizeof(int));
3200: /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must
3201: * delete all possibly nonactual inforamtion */
3202: /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must
3203: * copy some factor information - where are thay */
3204: (*submat)->was_assembled=PETSC_FALSE;
3205: (*submat)->assembled=PETSC_FALSE;
3206:
3207: }
3208: PetscFree(d_nz);
3210: /* Assemble the matrix */
3211: /* First assemble from local rows */
3212: {
3213: int i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol;
3214: FLOAT *vals;
3215: PetscScalar *matA_a,*matB_a;
3216:
3217: for (j=0; j<nrow; j++) {
3218: oldrow = irow[j];
3219: proc = rtable[oldrow];
3220: if (proc == rank) {
3221: row = rmap[oldrow];
3222:
3223: Arow = A->rows[oldrow-C->rmap->rstart];
3224: ncols = Arow->length;
3225: cols = Arow->col;
3226: vals = Arow->nz;
3227:
3228: i_row = matA->i[row];
3229: matA_a = matA->a + i_row;
3230: matA_j = matA->j + i_row;
3231: i_row = matB->i[row];
3232: matB_a = matB->a + i_row;
3233: matB_j = matB->j + i_row;
3234: for (k=0,ilenA=0,ilenB=0; k<ncols; k++) {
3235: if ((tcol = cmap[cols[k]])) {
3236: if (tcol<=cstart) {
3237: *matB_j++ = tcol-1;
3238: *matB_a++ = vals[k];
3239: ilenB++;
3240: } else if (tcol<=cend) {
3241: *matA_j++ = (tcol-1)-cstart;
3242: *matA_a++ = (PetscScalar)(vals[k]);
3243: ilenA++;
3244: } else {
3245: *matB_j++ = tcol-1;
3246: *matB_a++ = vals[k];
3247: ilenB++;
3248: }
3249: }
3250: }
3251: matA->ilen[row]=ilenA;
3252: matB->ilen[row]=ilenB;
3253:
3254: }
3255: }
3256: }
3258: /* Now assemble the off proc rows*/
3259: {
3260: int *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row;
3261: int *matA_j,*matB_j,lmax,tcol,ilenA,ilenB;
3262: PetscScalar *matA_a,*matB_a;
3263: FLOAT *rbuf4_i;
3265: for (j=0; j<nrqs; j++) {
3266: MPI_Waitany(nrqs,r_waits4,&i,r_status4+j);
3267: proc = pa[i];
3268: sbuf1_i = sbuf1[proc];
3269:
3270: cidx = 0;
3271: rbuf2_i = rbuf2[i];
3272: rbuf3_i = rbuf3[i];
3273: rbuf4_i = rbuf4[i];
3274: kmax = sbuf1_i[0];
3275: for (k=1; k<=kmax; k++) {
3276: row = rmap[sbuf1_i[k]];
3277:
3278: i_row = matA->i[row];
3279: matA_a = matA->a + i_row;
3280: matA_j = matA->j + i_row;
3281: i_row = matB->i[row];
3282: matB_a = matB->a + i_row;
3283: matB_j = matB->j + i_row;
3284:
3285: lmax = rbuf2_i[k];
3286: for (l=0,ilenA=0,ilenB=0; l<lmax; l++,cidx++) {
3287: if ((tcol = cmap[rbuf3_i[cidx]])) {
3288: if (tcol<=cstart) {
3289: *matB_j++ = tcol-1;
3290: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);;
3291: ilenB++;
3292: } else if (tcol<=cend) {
3293: *matA_j++ = (tcol-1)-cstart;
3294: *matA_a++ = (PetscScalar)(rbuf4_i[cidx]);
3295: ilenA++;
3296: } else {
3297: *matB_j++ = tcol-1;
3298: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);
3299: ilenB++;
3300: }
3301: }
3302: }
3303: matA->ilen[row]=ilenA;
3304: matB->ilen[row]=ilenB;
3305: }
3306: }
3307: }
3309: PetscFree(r_status4);
3310: PetscFree(r_waits4);
3311: if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
3312: PetscFree(s_waits4);
3313: PetscFree(s_status4);
3315: /* Restore the indices */
3316: ISRestoreIndices(isrow,&irow);
3317: ISRestoreIndices(iscol,&icol);
3319: /* Destroy allocated memory */
3320: PetscFree(rtable);
3321: PetscFree(w1);
3322: PetscFree(pa);
3324: PetscFree(sbuf1);
3325: PetscFree(rbuf2[0]);
3326: PetscFree(rbuf2);
3327: for (i=0; i<nrqr; ++i) {
3328: PetscFree(sbuf2[i]);
3329: }
3330: for (i=0; i<nrqs; ++i) {
3331: PetscFree(rbuf3[i]);
3332: PetscFree(rbuf4[i]);
3333: }
3335: PetscFree(sbuf2);
3336: PetscFree(rbuf3);
3337: PetscFree(rbuf4);
3338: PetscFree(sbuf3[0]);
3339: PetscFree(sbuf3);
3340: PetscFree(sbuf4[0]);
3341: PetscFree(sbuf4);
3342:
3343: PetscFree(cmap);
3344: PetscFree(rmap);
3347: MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3348: MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);
3351: return(0);
3352: }