Actual source code: greedy.c
petsc-3.5.4 2015-05-23
1: #include <petsc-private/matimpl.h> /*I "petscmat.h" I*/
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/aij/mpi/mpiaij.h>
4: #include <petscsf.h>
6: typedef struct {
7: PetscReal dummy;
8: } MC_Greedy;
12: PetscErrorCode MatColoringDestroy_Greedy(MatColoring mc)
13: {
17: PetscFree(mc->data);
18: return(0);
19: }
23: PETSC_EXTERN PetscErrorCode GreedyColoringLocalDistanceOne_Private(MatColoring mc,PetscReal *wts,PetscInt *lperm,ISColoringValue *colors)
24: {
25: PetscInt i,j,k,s,e,n,no,nd,nd_global,n_global,idx,ncols,maxcolors,masksize,ccol,*mask;
26: PetscErrorCode ierr;
27: Mat m=mc->mat;
28: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)m->data;
29: Mat md=NULL,mo=NULL;
30: const PetscInt *md_i,*mo_i,*md_j,*mo_j;
31: PetscBool isMPIAIJ,isSEQAIJ;
32: ISColoringValue pcol;
33: const PetscInt *cidx;
34: PetscInt *lcolors,*ocolors;
35: PetscReal *owts=NULL;
36: PetscSF sf;
37: PetscLayout layout;
40: MatGetSize(m,&n_global,NULL);
41: MatGetOwnershipRange(m,&s,&e);
42: n=e-s;
43: masksize=20;
44: nd_global = 0;
45: /* get the matrix communication structures */
46: PetscObjectTypeCompare((PetscObject)m, MATMPIAIJ, &isMPIAIJ);
47: PetscObjectTypeCompare((PetscObject)m, MATSEQAIJ, &isSEQAIJ);
48: if (isMPIAIJ) {
49: /* get the CSR data for on and off diagonal portions of m */
50: Mat_SeqAIJ *dseq;
51: Mat_SeqAIJ *oseq;
52: md=aij->A;
53: dseq = (Mat_SeqAIJ*)md->data;
54: mo=aij->B;
55: oseq = (Mat_SeqAIJ*)mo->data;
56: md_i = dseq->i;
57: md_j = dseq->j;
58: mo_i = oseq->i;
59: mo_j = oseq->j;
60: } else if (isSEQAIJ) {
61: /* get the CSR data for m */
62: Mat_SeqAIJ *dseq;
63: /* no off-processor nodes */
64: md=m;
65: dseq = (Mat_SeqAIJ*)md->data;
66: mo=NULL;
67: no=0;
68: md_i = dseq->i;
69: md_j = dseq->j;
70: mo_i = NULL;
71: mo_j = NULL;
72: } else {
73: SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_ARG_WRONG,"Matrix must be AIJ for greedy coloring");
74: }
75: MatColoringGetMaxColors(mc,&maxcolors);
76: if (mo) {
77: VecGetSize(aij->lvec,&no);
78: PetscMalloc2(no,&ocolors,no,&owts);
79: for(i=0;i<no;i++) {
80: ocolors[i]=maxcolors;
81: }
82: }
84: PetscMalloc1(masksize,&mask);
85: PetscMalloc1(n,&lcolors);
86: for(i=0;i<n;i++) {
87: lcolors[i]=maxcolors;
88: }
89: for (i=0;i<masksize;i++) {
90: mask[i]=-1;
91: }
92: if (mo) {
93: /* transfer neighbor weights */
94: PetscSFCreate(PetscObjectComm((PetscObject)m),&sf);
95: MatGetLayouts(m,&layout,NULL);
96: PetscSFSetGraphLayout(sf,layout,no,NULL,PETSC_COPY_VALUES,aij->garray);
97: PetscSFBcastBegin(sf,MPIU_REAL,wts,owts);
98: PetscSFBcastEnd(sf,MPIU_REAL,wts,owts);
99: }
100: while (nd_global < n_global) {
101: nd=n;
102: /* assign lowest possible color to each local vertex */
103: PetscLogEventBegin(Mat_Coloring_Local,mc,0,0,0);
104: for (i=0;i<n;i++) {
105: idx=lperm[i];
106: if (lcolors[idx] == maxcolors) {
107: ncols = md_i[idx+1]-md_i[idx];
108: cidx = &(md_j[md_i[idx]]);
109: for (j=0;j<ncols;j++) {
110: if (lcolors[cidx[j]] != maxcolors) {
111: ccol=lcolors[cidx[j]];
112: if (ccol>=masksize) {
113: PetscInt *newmask;
114: PetscMalloc1(masksize*2,&newmask);
115: for(k=0;k<2*masksize;k++) {
116: newmask[k]=-1;
117: }
118: for(k=0;k<masksize;k++) {
119: newmask[k]=mask[k];
120: }
121: PetscFree(mask);
122: mask=newmask;
123: masksize*=2;
124: }
125: mask[ccol]=idx;
126: }
127: }
128: if (mo) {
129: ncols = mo_i[idx+1]-mo_i[idx];
130: cidx = &(mo_j[mo_i[idx]]);
131: for (j=0;j<ncols;j++) {
132: if (ocolors[cidx[j]] != maxcolors) {
133: ccol=ocolors[cidx[j]];
134: if (ccol>=masksize) {
135: PetscInt *newmask;
136: PetscMalloc1(masksize*2,&newmask);
137: for(k=0;k<2*masksize;k++) {
138: newmask[k]=-1;
139: }
140: for(k=0;k<masksize;k++) {
141: newmask[k]=mask[k];
142: }
143: PetscFree(mask);
144: mask=newmask;
145: masksize*=2;
146: }
147: mask[ccol]=idx;
148: }
149: }
150: }
151: for (j=0;j<masksize;j++) {
152: if (mask[j]!=idx) {
153: break;
154: }
155: }
156: pcol=j;
157: if (pcol>maxcolors)pcol=maxcolors;
158: lcolors[idx]=pcol;
159: }
160: }
161: PetscLogEventEnd(Mat_Coloring_Local,mc,0,0,0);
162: if (mo) {
163: /* transfer neighbor colors */
164: PetscLogEventBegin(Mat_Coloring_Comm,mc,0,0,0);
165: PetscSFBcastBegin(sf,MPIU_INT,lcolors,ocolors);
166: PetscSFBcastEnd(sf,MPIU_INT,lcolors,ocolors);
167: /* check for conflicts -- this is merely checking if any adjacent off-processor rows have the same color and marking the ones that are lower weight locally for changing */
168: for (i=0;i<n;i++) {
169: ncols = mo_i[i+1]-mo_i[i];
170: cidx = &(mo_j[mo_i[i]]);
171: for (j=0;j<ncols;j++) {
172: /* in the case of conflicts, the highest weight one stays and the others go */
173: if ((ocolors[cidx[j]] == lcolors[i]) && (owts[cidx[j]] > wts[i]) && lcolors[i] < maxcolors) {
174: lcolors[i]=maxcolors;
175: nd--;
176: }
177: }
178: }
179: nd_global=0;
180: }
181: MPI_Allreduce(&nd,&nd_global,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mc));
182: }
183: for (i=0;i<n;i++) {
184: colors[i] = (ISColoringValue)lcolors[i];
185: }
186: PetscFree(mask);
187: PetscFree(lcolors);
188: if (mo) {
189: PetscFree2(ocolors,owts);
190: PetscSFDestroy(&sf);
191: }
192: return(0);
193: }
199: PETSC_EXTERN PetscErrorCode GreedyColoringLocalDistanceTwo_Private(MatColoring mc,PetscReal *wts,PetscInt *lperm,ISColoringValue *colors)
200: {
201: PetscInt i,j,k,l,s,e,n,nd,nd_global,n_global,idx,ncols,maxcolors,mcol,mcol_global,nd1cols,*mask,masksize,*d1cols,*bad,*badnext,nbad,badsize,ccol,no,cbad;
202: PetscErrorCode ierr;
203: Mat m=mc->mat;
204: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)m->data;
205: Mat md=NULL,mo=NULL;
206: const PetscInt *md_i,*mo_i,*md_j,*mo_j;
207: PetscBool isMPIAIJ,isSEQAIJ;
208: PetscInt pcol,*dcolors,*ocolors;
209: ISColoringValue *badidx;
210: const PetscInt *cidx;
211: PetscReal *owts,*colorweights;
212: PetscInt *oconf,*conf;
213: PetscSF sf;
214: PetscLayout layout;
217: MatGetSize(m,&n_global,NULL);
218: MatGetOwnershipRange(m,&s,&e);
219: n=e-s;
220: nd_global = 0;
221: /* get the matrix communication structures */
222: PetscObjectTypeCompare((PetscObject)m, MATMPIAIJ, &isMPIAIJ);
223: PetscObjectTypeCompare((PetscObject)m, MATSEQAIJ, &isSEQAIJ);
224: if (isMPIAIJ) {
225: Mat_SeqAIJ *dseq;
226: Mat_SeqAIJ *oseq;
227: md=aij->A;
228: dseq = (Mat_SeqAIJ*)md->data;
229: mo=aij->B;
230: oseq = (Mat_SeqAIJ*)mo->data;
231: md_i = dseq->i;
232: md_j = dseq->j;
233: mo_i = oseq->i;
234: mo_j = oseq->j;
235: } else if (isSEQAIJ) {
236: Mat_SeqAIJ *dseq;
237: /* no off-processor nodes */
238: md=m;
239: dseq = (Mat_SeqAIJ*)md->data;
240: md_i = dseq->i;
241: md_j = dseq->j;
242: mo_i = NULL;
243: mo_j = NULL;
244: } else {
245: SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_ARG_WRONG,"Matrix must be AIJ for greedy coloring");
246: }
247: /* create the vectors and communication structures if necessary */
248: no=0;
249: if (mo) {
250: VecGetLocalSize(aij->lvec,&no);
251: PetscSFCreate(PetscObjectComm((PetscObject)m),&sf);
252: MatGetLayouts(m,&layout,NULL);
253: PetscSFSetGraphLayout(sf,layout,no,NULL,PETSC_COPY_VALUES,aij->garray);
254: }
255: MatColoringGetMaxColors(mc,&maxcolors);
256: masksize=n;
257: nbad=0;
258: badsize=n;
259: PetscMalloc1(masksize,&mask);
260: PetscMalloc4(n,&d1cols,n,&dcolors,n,&conf,n,&bad);
261: PetscMalloc2(badsize,&badidx,badsize,&badnext);
262: for(i=0;i<masksize;i++) {
263: mask[i]=-1;
264: }
265: for (i=0;i<n;i++) {
266: dcolors[i]=maxcolors;
267: bad[i]=-1;
268: }
269: for (i=0;i<badsize;i++) {
270: badnext[i]=-1;
271: }
272: if (mo) {
273: PetscMalloc3(no,&owts,no,&oconf,no,&ocolors);
274: PetscSFBcastBegin(sf,MPIU_REAL,wts,owts);
275: PetscSFBcastEnd(sf,MPIU_REAL,wts,owts);
276: for (i=0;i<no;i++) {
277: ocolors[i]=maxcolors;
278: }
279: } else { /* Appease overzealous -Wmaybe-initialized */
280: owts = NULL;
281: oconf = NULL;
282: ocolors = NULL;
283: }
284: mcol=0;
285: while (nd_global < n_global) {
286: nd=n;
287: /* assign lowest possible color to each local vertex */
288: mcol_global=0;
289: PetscLogEventBegin(Mat_Coloring_Local,mc,0,0,0);
290: for (i=0;i<n;i++) {
291: idx=lperm[i];
292: if (dcolors[idx] == maxcolors) {
293: /* entries in bad */
294: cbad=bad[idx];
295: while (cbad>=0) {
296: ccol=badidx[cbad];
297: if (ccol>=masksize) {
298: PetscInt *newmask;
299: PetscMalloc1(masksize*2,&newmask);
300: for(k=0;k<2*masksize;k++) {
301: newmask[k]=-1;
302: }
303: for(k=0;k<masksize;k++) {
304: newmask[k]=mask[k];
305: }
306: PetscFree(mask);
307: mask=newmask;
308: masksize*=2;
309: }
310: mask[ccol]=idx;
311: cbad=badnext[cbad];
312: }
313: /* diagonal distance-one rows */
314: nd1cols=0;
315: ncols = md_i[idx+1]-md_i[idx];
316: cidx = &(md_j[md_i[idx]]);
317: for (j=0;j<ncols;j++) {
318: d1cols[nd1cols] = cidx[j];
319: nd1cols++;
320: ccol=dcolors[cidx[j]];
321: if (ccol != maxcolors) {
322: if (ccol>=masksize) {
323: PetscInt *newmask;
324: PetscMalloc1(masksize*2,&newmask);
325: for(k=0;k<2*masksize;k++) {
326: newmask[k]=-1;
327: }
328: for(k=0;k<masksize;k++) {
329: newmask[k]=mask[k];
330: }
331: PetscFree(mask);
332: mask=newmask;
333: masksize*=2;
334: }
335: mask[ccol]=idx;
336: }
337: }
338: /* off-diagonal distance-one rows */
339: if (mo) {
340: ncols = mo_i[idx+1]-mo_i[idx];
341: cidx = &(mo_j[mo_i[idx]]);
342: for (j=0;j<ncols;j++) {
343: ccol=dcolors[cidx[j]];
344: if (ccol != maxcolors) {
345: if (ccol>=masksize) {
346: PetscInt *newmask;
347: PetscMalloc1(masksize*2,&newmask);
348: for(k=0;k<2*masksize;k++) {
349: newmask[k]=-1;
350: }
351: for(k=0;k<masksize;k++) {
352: newmask[k]=mask[k];
353: }
354: PetscFree(mask);
355: mask=newmask;
356: masksize*=2;
357: }
358: mask[ccol]=idx;
359: }
360: }
361: }
362: /* diagonal distance-two rows */
363: for (j=0;j<nd1cols;j++) {
364: ncols = md_i[d1cols[j]+1]-md_i[d1cols[j]];
365: cidx = &(md_j[md_i[d1cols[j]]]);
366: for (l=0;l<ncols;l++) {
367: ccol=dcolors[cidx[l]];
368: if (ccol != maxcolors) {
369: if (ccol>=masksize) {
370: PetscInt *newmask;
371: PetscMalloc1(masksize*2,&newmask);
372: for(k=0;k<2*masksize;k++) {
373: newmask[k]=-1;
374: }
375: for(k=0;k<masksize;k++) {
376: newmask[k]=mask[k];
377: }
378: PetscFree(mask);
379: mask=newmask;
380: masksize*=2;
381: }
382: mask[ccol]=idx;
383: }
384: }
385: }
386: /* off-diagonal distance-two rows */
387: if (mo) {
388: for (j=0;j<nd1cols;j++) {
389: ncols = mo_i[d1cols[j]+1]-mo_i[d1cols[j]];
390: cidx = &(mo_j[mo_i[d1cols[j]]]);
391: for (l=0;l<ncols;l++) {
392: ccol=ocolors[cidx[l]];
393: if (ccol != maxcolors) {
394: if (ccol>=masksize) {
395: PetscInt *newmask;
396: PetscMalloc1(masksize*2,&newmask);
397: for(k=0;k<2*masksize;k++) {
398: newmask[k]=-1;
399: }
400: for(k=0;k<masksize;k++) {
401: newmask[k]=mask[k];
402: }
403: PetscFree(mask);
404: mask=newmask;
405: masksize*=2;
406: }
407: mask[ccol]=idx;
408: }
409: }
410: }
411: }
412: /* assign this one the lowest color possible by seeing if there's a gap in the sequence of sorted neighbor colors */
413: pcol=0;
414: for (j=0;j<masksize;j++) {
415: if (mask[j]!=idx) {
416: break;
417: }
418: }
419: pcol=j;
420: if (pcol>maxcolors) pcol=maxcolors;
421: dcolors[idx]=pcol;
422: if (pcol>mcol) mcol=pcol;
423: }
424: }
425: PetscLogEventEnd(Mat_Coloring_Local,mc,0,0,0);
426: if (mo) {
427: /* transfer neighbor colors */
428: PetscSFBcastBegin(sf,MPIU_INT,dcolors,ocolors);
429: PetscSFBcastEnd(sf,MPIU_INT,dcolors,ocolors);
430: /* find the maximum color assigned locally and allocate a mask */
431: MPI_Allreduce(&mcol,&mcol_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)mc));
432: PetscMalloc1(mcol_global+1,&colorweights);
433: /* check for conflicts */
434: for (i=0;i<n;i++) {
435: conf[i]=PETSC_FALSE;
436: }
437: for (i=0;i<no;i++) {
438: oconf[i]=PETSC_FALSE;
439: }
440: for (i=0;i<n;i++) {
441: ncols = mo_i[i+1]-mo_i[i];
442: cidx = &(mo_j[mo_i[i]]);
443: if (ncols > 0) {
444: /* fill in the mask */
445: for (j=0;j<mcol_global+1;j++) {
446: colorweights[j]=0;
447: }
448: colorweights[dcolors[i]]=wts[i];
449: /* fill in the off-diagonal part of the mask */
450: for (j=0;j<ncols;j++) {
451: ccol=ocolors[cidx[j]];
452: if (ccol < maxcolors) {
453: if (colorweights[ccol] < owts[cidx[j]]) {
454: colorweights[ccol] = owts[cidx[j]];
455: }
456: }
457: }
458: /* fill in the on-diagonal part of the mask */
459: ncols = md_i[i+1]-md_i[i];
460: cidx = &(md_j[md_i[i]]);
461: for (j=0;j<ncols;j++) {
462: ccol=dcolors[cidx[j]];
463: if (ccol < maxcolors) {
464: if (colorweights[ccol] < wts[cidx[j]]) {
465: colorweights[ccol] = wts[cidx[j]];
466: }
467: }
468: }
469: /* go back through and set up on and off-diagonal conflict vectors */
470: ncols = md_i[i+1]-md_i[i];
471: cidx = &(md_j[md_i[i]]);
472: for (j=0;j<ncols;j++) {
473: ccol=dcolors[cidx[j]];
474: if (ccol < maxcolors) {
475: if (colorweights[ccol] > wts[cidx[j]]) {
476: conf[cidx[j]]=PETSC_TRUE;
477: }
478: }
479: }
480: ncols = mo_i[i+1]-mo_i[i];
481: cidx = &(mo_j[mo_i[i]]);
482: for (j=0;j<ncols;j++) {
483: ccol=ocolors[cidx[j]];
484: if (ccol < maxcolors) {
485: if (colorweights[ccol] > owts[cidx[j]]) {
486: oconf[cidx[j]]=PETSC_TRUE;
487: }
488: }
489: }
490: }
491: }
492: nd_global=0;
493: PetscFree(colorweights);
494: PetscLogEventBegin(Mat_Coloring_Comm,mc,0,0,0);
495: PetscSFReduceBegin(sf,MPIU_INT,oconf,conf,MPIU_SUM);
496: PetscSFReduceEnd(sf,MPIU_INT,oconf,conf,MPIU_SUM);
497: PetscLogEventEnd(Mat_Coloring_Comm,mc,0,0,0);
498: /* go through and unset local colors that have conflicts */
499: for (i=0;i<n;i++) {
500: if (conf[i]>0) {
501: /* push this color onto the bad stack */
502: badidx[nbad]=dcolors[i];
503: badnext[nbad]=bad[i];
504: bad[i]=nbad;
505: nbad++;
506: if (nbad>=badsize) {
507: PetscInt *newbadnext;
508: ISColoringValue *newbadidx;
509: PetscMalloc2(badsize*2,&newbadidx,badsize*2,&newbadnext);
510: for(k=0;k<2*badsize;k++) {
511: newbadnext[k]=-1;
512: }
513: for(k=0;k<badsize;k++) {
514: newbadidx[k]=badidx[k];
515: newbadnext[k]=badnext[k];
516: }
517: PetscFree2(badidx,badnext);
518: badidx=newbadidx;
519: badnext=newbadnext;
520: badsize*=2;
521: }
522: dcolors[i] = maxcolors;
523: nd--;
524: }
525: }
526: }
527: MPI_Allreduce(&nd,&nd_global,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mc));
528: }
529: if (mo) {
530: PetscSFDestroy(&sf);
531: PetscFree3(owts,oconf,ocolors);
532: }
533: for (i=0;i<n;i++) {
534: colors[i]=dcolors[i];
535: }
536: PetscFree(mask);
537: PetscFree4(d1cols,dcolors,conf,bad);
538: PetscFree2(badidx,badnext);
539: return(0);
540: }
544: PETSC_EXTERN PetscErrorCode MatColoringApply_Greedy(MatColoring mc,ISColoring *iscoloring)
545: {
546: /* MC_Greedy *gr=(MC_Greedy*)mc->data; */
547: PetscErrorCode ierr;
548: PetscInt finalcolor,finalcolor_global;
549: ISColoringValue *colors;
550: PetscInt ncolstotal,ncols;
551: PetscReal *wts;
552: PetscInt i,*lperm;
555: MatGetSize(mc->mat,NULL,&ncolstotal);
556: MatGetLocalSize(mc->mat,NULL,&ncols);
557: if (!mc->user_weights) {
558: MatColoringCreateWeights(mc,&wts,&lperm);
559: } else {
560: wts = mc->user_weights;
561: lperm = mc->user_lperm;
562: }
563: PetscMalloc1(ncols,&colors);
564: if (mc->dist == 1) {
565: GreedyColoringLocalDistanceOne_Private(mc,wts,lperm,colors);
566: } else if (mc->dist == 2) {
567: GreedyColoringLocalDistanceTwo_Private(mc,wts,lperm,colors);
568: } else {
569: SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_ARG_OUTOFRANGE,"Only distance 1 and distance 2 supported by MatColoringGreedy");
570: }
571: finalcolor=0;
572: for (i=0;i<ncols;i++) {
573: if (colors[i] > finalcolor) finalcolor=colors[i];
574: }
575: finalcolor_global=0;
576: MPI_Allreduce(&finalcolor,&finalcolor_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)mc));
577: PetscLogEventBegin(Mat_Coloring_ISCreate,mc,0,0,0);
578: ISColoringCreate(PetscObjectComm((PetscObject)mc),finalcolor_global+1,ncols,colors,iscoloring);
579: PetscLogEventEnd(Mat_Coloring_ISCreate,mc,0,0,0);
580: if (!mc->user_weights) {
581: PetscFree(wts);
582: PetscFree(lperm);
583: }
584: return(0);
585: }
589: /*MC
590: MATCOLORINGGREEDY - Greedy-with-conflict correction based Matrix Coloring for distance 1 and 2.
592: Level: beginner
594: Notes:
596: These algorithms proceed in two phases -- local coloring and conflict resolution. The local coloring
597: tentatively colors all vertices at the distance required given what's known of the global coloring. Then,
598: the updated colors are transferred to different processors at distance one. In the distance one case, each
599: vertex with nonlocal neighbors is then checked to see if it conforms, with the vertex being
600: marked for recoloring if its lower weight than its same colored neighbor. In the distance two case,
601: each boundary vertex's immediate star is checked for validity of the coloring. Lower-weight conflict
602: vertices are marked, and then the conflicts are gathered back on owning processors. In both cases
603: this is done until each column has received a valid color.
605: References:
607: Bozdag et al. "A Parallel Distance-2 Graph Coloring Algorithm for Distributed Memory Computers"
608: HPCC'05 Proceedings of the First international conference on High Performance Computing and Communications
609: Pages 796--806
611: .seealso: MatColoringCreate(), MatColoring, MatColoringSetType()
612: M*/
613: PETSC_EXTERN PetscErrorCode MatColoringCreate_Greedy(MatColoring mc)
614: {
615: MC_Greedy *gr;
619: PetscNewLog(mc,&gr);
620: mc->data = gr;
621: mc->ops->apply = MatColoringApply_Greedy;
622: mc->ops->view = NULL;
623: mc->ops->destroy = MatColoringDestroy_Greedy;
624: mc->ops->setfromoptions = NULL;
625: return(0);
626: }