Actual source code: baij.c

petsc-master 2015-06-01
Report Typos and Errors
  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6: #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
  7: #include <petscblaslapack.h>
  8: #include <petsc/private/kernels/blockinvert.h>
  9: #include <petsc/private/kernels/blockmatmult.h>

 13: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
 14: {
 15:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 17:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 18:   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
 19:   PetscReal      shift = 0.0;

 22:   if (a->idiagvalid) {
 23:     if (values) *values = a->idiag;
 24:     return(0);
 25:   }
 26:   MatMarkDiagonal_SeqBAIJ(A);
 27:   diag_offset = a->diag;
 28:   if (!a->idiag) {
 29:     PetscMalloc1(2*bs2*mbs,&a->idiag);
 30:     PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));
 31:   }
 32:   diag  = a->idiag;
 33:   mdiag = a->idiag+bs2*mbs;
 34:   if (values) *values = a->idiag;
 35:   /* factor and invert each block */
 36:   switch (bs) {
 37:   case 1:
 38:     for (i=0; i<mbs; i++) {
 39:       odiag    = v + 1*diag_offset[i];
 40:       diag[0]  = odiag[0];
 41:       mdiag[0] = odiag[0];
 42:       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
 43:       diag    += 1;
 44:       mdiag   += 1;
 45:     }
 46:     break;
 47:   case 2:
 48:     for (i=0; i<mbs; i++) {
 49:       odiag    = v + 4*diag_offset[i];
 50:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 51:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 52:       PetscKernel_A_gets_inverse_A_2(diag,shift);
 53:       diag    += 4;
 54:       mdiag   += 4;
 55:     }
 56:     break;
 57:   case 3:
 58:     for (i=0; i<mbs; i++) {
 59:       odiag    = v + 9*diag_offset[i];
 60:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 61:       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 62:       diag[8]  = odiag[8];
 63:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 64:       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 65:       mdiag[8] = odiag[8];
 66:       PetscKernel_A_gets_inverse_A_3(diag,shift);
 67:       diag    += 9;
 68:       mdiag   += 9;
 69:     }
 70:     break;
 71:   case 4:
 72:     for (i=0; i<mbs; i++) {
 73:       odiag  = v + 16*diag_offset[i];
 74:       PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 75:       PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 76:       PetscKernel_A_gets_inverse_A_4(diag,shift);
 77:       diag  += 16;
 78:       mdiag += 16;
 79:     }
 80:     break;
 81:   case 5:
 82:     for (i=0; i<mbs; i++) {
 83:       odiag  = v + 25*diag_offset[i];
 84:       PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
 85:       PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
 86:       PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);
 87:       diag  += 25;
 88:       mdiag += 25;
 89:     }
 90:     break;
 91:   case 6:
 92:     for (i=0; i<mbs; i++) {
 93:       odiag  = v + 36*diag_offset[i];
 94:       PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
 95:       PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
 96:       PetscKernel_A_gets_inverse_A_6(diag,shift);
 97:       diag  += 36;
 98:       mdiag += 36;
 99:     }
100:     break;
101:   case 7:
102:     for (i=0; i<mbs; i++) {
103:       odiag  = v + 49*diag_offset[i];
104:       PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
105:       PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
106:       PetscKernel_A_gets_inverse_A_7(diag,shift);
107:       diag  += 49;
108:       mdiag += 49;
109:     }
110:     break;
111:   default:
112:     PetscMalloc2(bs,&v_work,bs,&v_pivots);
113:     for (i=0; i<mbs; i++) {
114:       odiag  = v + bs2*diag_offset[i];
115:       PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
116:       PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
117:       PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);
118:       diag  += bs2;
119:       mdiag += bs2;
120:     }
121:     PetscFree2(v_work,v_pivots);
122:   }
123:   a->idiagvalid = PETSC_TRUE;
124:   return(0);
125: }

129: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130: {
131:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132:   PetscScalar       *x,*work,*w,*workt,*t;
133:   const MatScalar   *v,*aa = a->a, *idiag;
134:   const PetscScalar *b,*xb;
135:   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136:   PetscErrorCode    ierr;
137:   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

141:   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
142:   its = its*lits;
143:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
144:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
145:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
146:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
147:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");

149:   if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}

151:   if (!m) return(0);
152:   diag  = a->diag;
153:   idiag = a->idiag;
154:   k    = PetscMax(A->rmap->n,A->cmap->n);
155:   if (!a->mult_work) {
156:     PetscMalloc1(k+1,&a->mult_work);
157:   }
158:   if (!a->sor_workt) {
159:     PetscMalloc1(k,&a->sor_workt);
160:   }
161:   if (!a->sor_work) {
162:     PetscMalloc1(bs,&a->sor_work);
163:   }
164:   work = a->mult_work;
165:   t    = a->sor_workt;
166:   w    = a->sor_work;

168:   VecGetArray(xx,&x);
169:   VecGetArrayRead(bb,&b);

171:   if (flag & SOR_ZERO_INITIAL_GUESS) {
172:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
173:       switch (bs) {
174:       case 1:
175:         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
176:         t[0] = b[0];
177:         i2     = 1;
178:         idiag += 1;
179:         for (i=1; i<m; i++) {
180:           v  = aa + ai[i];
181:           vi = aj + ai[i];
182:           nz = diag[i] - ai[i];
183:           s[0] = b[i2];
184:           for (j=0; j<nz; j++) {
185:             xw[0] = x[vi[j]];
186:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
187:           }
188:           t[i2] = s[0];
189:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
190:           x[i2]  = xw[0];
191:           idiag += 1;
192:           i2    += 1;
193:         }
194:         break;
195:       case 2:
196:         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
197:         t[0] = b[0]; t[1] = b[1];
198:         i2     = 2;
199:         idiag += 4;
200:         for (i=1; i<m; i++) {
201:           v  = aa + 4*ai[i];
202:           vi = aj + ai[i];
203:           nz = diag[i] - ai[i];
204:           s[0] = b[i2]; s[1] = b[i2+1];
205:           for (j=0; j<nz; j++) {
206:             idx = 2*vi[j];
207:             it  = 4*j;
208:             xw[0] = x[idx]; xw[1] = x[1+idx];
209:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
210:           }
211:           t[i2] = s[0]; t[i2+1] = s[1];
212:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
213:           x[i2]   = xw[0]; x[i2+1] = xw[1];
214:           idiag  += 4;
215:           i2     += 2;
216:         }
217:         break;
218:       case 3:
219:         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
220:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
221:         i2     = 3;
222:         idiag += 9;
223:         for (i=1; i<m; i++) {
224:           v  = aa + 9*ai[i];
225:           vi = aj + ai[i];
226:           nz = diag[i] - ai[i];
227:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
228:           while (nz--) {
229:             idx = 3*(*vi++);
230:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
231:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
232:             v  += 9;
233:           }
234:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
235:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
236:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
237:           idiag  += 9;
238:           i2     += 3;
239:         }
240:         break;
241:       case 4:
242:         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
243:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
244:         i2     = 4;
245:         idiag += 16;
246:         for (i=1; i<m; i++) {
247:           v  = aa + 16*ai[i];
248:           vi = aj + ai[i];
249:           nz = diag[i] - ai[i];
250:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
251:           while (nz--) {
252:             idx = 4*(*vi++);
253:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
254:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
255:             v  += 16;
256:           }
257:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
258:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
259:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
260:           idiag  += 16;
261:           i2     += 4;
262:         }
263:         break;
264:       case 5:
265:         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
266:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
267:         i2     = 5;
268:         idiag += 25;
269:         for (i=1; i<m; i++) {
270:           v  = aa + 25*ai[i];
271:           vi = aj + ai[i];
272:           nz = diag[i] - ai[i];
273:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
274:           while (nz--) {
275:             idx = 5*(*vi++);
276:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
277:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
278:             v  += 25;
279:           }
280:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
281:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
282:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
283:           idiag  += 25;
284:           i2     += 5;
285:         }
286:         break;
287:       case 6:
288:         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
289:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
290:         i2     = 6;
291:         idiag += 36;
292:         for (i=1; i<m; i++) {
293:           v  = aa + 36*ai[i];
294:           vi = aj + ai[i];
295:           nz = diag[i] - ai[i];
296:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
297:           while (nz--) {
298:             idx = 6*(*vi++);
299:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
300:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
301:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
302:             v  += 36;
303:           }
304:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
305:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
306:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
307:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
308:           idiag  += 36;
309:           i2     += 6;
310:         }
311:         break;
312:       case 7:
313:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
314:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
315:         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
316:         i2     = 7;
317:         idiag += 49;
318:         for (i=1; i<m; i++) {
319:           v  = aa + 49*ai[i];
320:           vi = aj + ai[i];
321:           nz = diag[i] - ai[i];
322:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
323:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
324:           while (nz--) {
325:             idx = 7*(*vi++);
326:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
327:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
328:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
329:             v  += 49;
330:           }
331:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
332:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
333:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
334:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
335:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
336:           idiag  += 49;
337:           i2     += 7;
338:         }
339:         break;
340:       default:
341:         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
342:         PetscMemcpy(t,b,bs*sizeof(PetscScalar));
343:         i2     = bs;
344:         idiag += bs2;
345:         for (i=1; i<m; i++) {
346:           v  = aa + bs2*ai[i];
347:           vi = aj + ai[i];
348:           nz = diag[i] - ai[i];

350:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
351:           /* copy all rows of x that are needed into contiguous space */
352:           workt = work;
353:           for (j=0; j<nz; j++) {
354:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
355:             workt += bs;
356:           }
357:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
358:           PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
359:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

361:           idiag += bs2;
362:           i2    += bs;
363:         }
364:         break;
365:       }
366:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
367:       PetscLogFlops(1.0*bs2*a->nz);
368:       xb = t;
369:     }
370:     else xb = b;
371:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
372:       idiag = a->idiag+bs2*(a->mbs-1);
373:       i2 = bs * (m-1);
374:       switch (bs) {
375:       case 1:
376:         s[0]  = xb[i2];
377:         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
378:         x[i2] = xw[0];
379:         i2   -= 1;
380:         for (i=m-2; i>=0; i--) {
381:           v  = aa + (diag[i]+1);
382:           vi = aj + diag[i] + 1;
383:           nz = ai[i+1] - diag[i] - 1;
384:           s[0] = xb[i2];
385:           for (j=0; j<nz; j++) {
386:             xw[0] = x[vi[j]];
387:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
388:           }
389:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
390:           x[i2]  = xw[0];
391:           idiag -= 1;
392:           i2    -= 1;
393:         }
394:         break;
395:       case 2:
396:         s[0]  = xb[i2]; s[1] = xb[i2+1];
397:         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
398:         x[i2] = xw[0]; x[i2+1] = xw[1];
399:         i2    -= 2;
400:         idiag -= 4;
401:         for (i=m-2; i>=0; i--) {
402:           v  = aa + 4*(diag[i] + 1);
403:           vi = aj + diag[i] + 1;
404:           nz = ai[i+1] - diag[i] - 1;
405:           s[0] = xb[i2]; s[1] = xb[i2+1];
406:           for (j=0; j<nz; j++) {
407:             idx = 2*vi[j];
408:             it  = 4*j;
409:             xw[0] = x[idx]; xw[1] = x[1+idx];
410:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
411:           }
412:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
413:           x[i2]   = xw[0]; x[i2+1] = xw[1];
414:           idiag  -= 4;
415:           i2     -= 2;
416:         }
417:         break;
418:       case 3:
419:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
420:         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
421:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
422:         i2    -= 3;
423:         idiag -= 9;
424:         for (i=m-2; i>=0; i--) {
425:           v  = aa + 9*(diag[i]+1);
426:           vi = aj + diag[i] + 1;
427:           nz = ai[i+1] - diag[i] - 1;
428:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
429:           while (nz--) {
430:             idx = 3*(*vi++);
431:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
432:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
433:             v  += 9;
434:           }
435:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
436:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
437:           idiag  -= 9;
438:           i2     -= 3;
439:         }
440:         break;
441:       case 4:
442:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
443:         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
444:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
445:         i2    -= 4;
446:         idiag -= 16;
447:         for (i=m-2; i>=0; i--) {
448:           v  = aa + 16*(diag[i]+1);
449:           vi = aj + diag[i] + 1;
450:           nz = ai[i+1] - diag[i] - 1;
451:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
452:           while (nz--) {
453:             idx = 4*(*vi++);
454:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
455:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
456:             v  += 16;
457:           }
458:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
459:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
460:           idiag  -= 16;
461:           i2     -= 4;
462:         }
463:         break;
464:       case 5:
465:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
466:         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
467:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
468:         i2    -= 5;
469:         idiag -= 25;
470:         for (i=m-2; i>=0; i--) {
471:           v  = aa + 25*(diag[i]+1);
472:           vi = aj + diag[i] + 1;
473:           nz = ai[i+1] - diag[i] - 1;
474:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
475:           while (nz--) {
476:             idx = 5*(*vi++);
477:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
478:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
479:             v  += 25;
480:           }
481:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
482:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
483:           idiag  -= 25;
484:           i2     -= 5;
485:         }
486:         break;
487:       case 6:
488:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
489:         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
490:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
491:         i2    -= 6;
492:         idiag -= 36;
493:         for (i=m-2; i>=0; i--) {
494:           v  = aa + 36*(diag[i]+1);
495:           vi = aj + diag[i] + 1;
496:           nz = ai[i+1] - diag[i] - 1;
497:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
498:           while (nz--) {
499:             idx = 6*(*vi++);
500:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
501:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
502:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
503:             v  += 36;
504:           }
505:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
506:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
507:           idiag  -= 36;
508:           i2     -= 6;
509:         }
510:         break;
511:       case 7:
512:         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
513:         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
514:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
515:         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
516:         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
517:         i2    -= 7;
518:         idiag -= 49;
519:         for (i=m-2; i>=0; i--) {
520:           v  = aa + 49*(diag[i]+1);
521:           vi = aj + diag[i] + 1;
522:           nz = ai[i+1] - diag[i] - 1;
523:           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
524:           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
525:           while (nz--) {
526:             idx = 7*(*vi++);
527:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
528:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
529:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
530:             v  += 49;
531:           }
532:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
533:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
534:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
535:           idiag  -= 49;
536:           i2     -= 7;
537:         }
538:         break;
539:       default:
540:         PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
541:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
542:         i2    -= bs;
543:         idiag -= bs2;
544:         for (i=m-2; i>=0; i--) {
545:           v  = aa + bs2*(diag[i]+1);
546:           vi = aj + diag[i] + 1;
547:           nz = ai[i+1] - diag[i] - 1;

549:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
550:           /* copy all rows of x that are needed into contiguous space */
551:           workt = work;
552:           for (j=0; j<nz; j++) {
553:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
554:             workt += bs;
555:           }
556:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
557:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

559:           idiag -= bs2;
560:           i2    -= bs;
561:         }
562:         break;
563:       }
564:       PetscLogFlops(1.0*bs2*(a->nz));
565:     }
566:     its--;
567:   }
568:   while (its--) {
569:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
570:       idiag = a->idiag;
571:       i2 = 0;
572:       switch (bs) {
573:       case 1:
574:         for (i=0; i<m; i++) {
575:           v  = aa + ai[i];
576:           vi = aj + ai[i];
577:           nz = ai[i+1] - ai[i];
578:           s[0] = b[i2];
579:           for (j=0; j<nz; j++) {
580:             xw[0] = x[vi[j]];
581:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
582:           }
583:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
584:           x[i2] += xw[0];
585:           idiag += 1;
586:           i2    += 1;
587:         }
588:         break;
589:       case 2:
590:         for (i=0; i<m; i++) {
591:           v  = aa + 4*ai[i];
592:           vi = aj + ai[i];
593:           nz = ai[i+1] - ai[i];
594:           s[0] = b[i2]; s[1] = b[i2+1];
595:           for (j=0; j<nz; j++) {
596:             idx = 2*vi[j];
597:             it  = 4*j;
598:             xw[0] = x[idx]; xw[1] = x[1+idx];
599:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
600:           }
601:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
602:           x[i2]  += xw[0]; x[i2+1] += xw[1];
603:           idiag  += 4;
604:           i2     += 2;
605:         }
606:         break;
607:       case 3:
608:         for (i=0; i<m; i++) {
609:           v  = aa + 9*ai[i];
610:           vi = aj + ai[i];
611:           nz = ai[i+1] - ai[i];
612:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
613:           while (nz--) {
614:             idx = 3*(*vi++);
615:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
616:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
617:             v  += 9;
618:           }
619:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
620:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
621:           idiag  += 9;
622:           i2     += 3;
623:         }
624:         break;
625:       case 4:
626:         for (i=0; i<m; i++) {
627:           v  = aa + 16*ai[i];
628:           vi = aj + ai[i];
629:           nz = ai[i+1] - ai[i];
630:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
631:           while (nz--) {
632:             idx = 4*(*vi++);
633:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
634:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
635:             v  += 16;
636:           }
637:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
638:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
639:           idiag  += 16;
640:           i2     += 4;
641:         }
642:         break;
643:       case 5:
644:         for (i=0; i<m; i++) {
645:           v  = aa + 25*ai[i];
646:           vi = aj + ai[i];
647:           nz = ai[i+1] - ai[i];
648:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
649:           while (nz--) {
650:             idx = 5*(*vi++);
651:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
652:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
653:             v  += 25;
654:           }
655:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
656:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
657:           idiag  += 25;
658:           i2     += 5;
659:         }
660:         break;
661:       case 6:
662:         for (i=0; i<m; i++) {
663:           v  = aa + 36*ai[i];
664:           vi = aj + ai[i];
665:           nz = ai[i+1] - ai[i];
666:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
667:           while (nz--) {
668:             idx = 6*(*vi++);
669:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
670:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
671:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
672:             v  += 36;
673:           }
674:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
675:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
676:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
677:           idiag  += 36;
678:           i2     += 6;
679:         }
680:         break;
681:       case 7:
682:         for (i=0; i<m; i++) {
683:           v  = aa + 49*ai[i];
684:           vi = aj + ai[i];
685:           nz = ai[i+1] - ai[i];
686:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
687:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
688:           while (nz--) {
689:             idx = 7*(*vi++);
690:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
691:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
692:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
693:             v  += 49;
694:           }
695:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
696:           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
697:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
698:           idiag  += 49;
699:           i2     += 7;
700:         }
701:         break;
702:       default:
703:         for (i=0; i<m; i++) {
704:           v  = aa + bs2*ai[i];
705:           vi = aj + ai[i];
706:           nz = ai[i+1] - ai[i];

708:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
709:           /* copy all rows of x that are needed into contiguous space */
710:           workt = work;
711:           for (j=0; j<nz; j++) {
712:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
713:             workt += bs;
714:           }
715:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
716:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

718:           idiag += bs2;
719:           i2    += bs;
720:         }
721:         break;
722:       }
723:       PetscLogFlops(2.0*bs2*a->nz);
724:     }
725:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
726:       idiag = a->idiag+bs2*(a->mbs-1);
727:       i2 = bs * (m-1);
728:       switch (bs) {
729:       case 1:
730:         for (i=m-1; i>=0; i--) {
731:           v  = aa + ai[i];
732:           vi = aj + ai[i];
733:           nz = ai[i+1] - ai[i];
734:           s[0] = b[i2];
735:           for (j=0; j<nz; j++) {
736:             xw[0] = x[vi[j]];
737:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
738:           }
739:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
740:           x[i2] += xw[0];
741:           idiag -= 1;
742:           i2    -= 1;
743:         }
744:         break;
745:       case 2:
746:         for (i=m-1; i>=0; i--) {
747:           v  = aa + 4*ai[i];
748:           vi = aj + ai[i];
749:           nz = ai[i+1] - ai[i];
750:           s[0] = b[i2]; s[1] = b[i2+1];
751:           for (j=0; j<nz; j++) {
752:             idx = 2*vi[j];
753:             it  = 4*j;
754:             xw[0] = x[idx]; xw[1] = x[1+idx];
755:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
756:           }
757:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
758:           x[i2]  += xw[0]; x[i2+1] += xw[1];
759:           idiag  -= 4;
760:           i2     -= 2;
761:         }
762:         break;
763:       case 3:
764:         for (i=m-1; i>=0; i--) {
765:           v  = aa + 9*ai[i];
766:           vi = aj + ai[i];
767:           nz = ai[i+1] - ai[i];
768:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
769:           while (nz--) {
770:             idx = 3*(*vi++);
771:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
772:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
773:             v  += 9;
774:           }
775:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
776:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
777:           idiag  -= 9;
778:           i2     -= 3;
779:         }
780:         break;
781:       case 4:
782:         for (i=m-1; i>=0; i--) {
783:           v  = aa + 16*ai[i];
784:           vi = aj + ai[i];
785:           nz = ai[i+1] - ai[i];
786:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
787:           while (nz--) {
788:             idx = 4*(*vi++);
789:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
790:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
791:             v  += 16;
792:           }
793:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
794:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
795:           idiag  -= 16;
796:           i2     -= 4;
797:         }
798:         break;
799:       case 5:
800:         for (i=m-1; i>=0; i--) {
801:           v  = aa + 25*ai[i];
802:           vi = aj + ai[i];
803:           nz = ai[i+1] - ai[i];
804:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
805:           while (nz--) {
806:             idx = 5*(*vi++);
807:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
808:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
809:             v  += 25;
810:           }
811:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
812:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
813:           idiag  -= 25;
814:           i2     -= 5;
815:         }
816:         break;
817:       case 6:
818:         for (i=m-1; i>=0; i--) {
819:           v  = aa + 36*ai[i];
820:           vi = aj + ai[i];
821:           nz = ai[i+1] - ai[i];
822:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
823:           while (nz--) {
824:             idx = 6*(*vi++);
825:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
826:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
827:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
828:             v  += 36;
829:           }
830:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
831:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
832:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
833:           idiag  -= 36;
834:           i2     -= 6;
835:         }
836:         break;
837:       case 7:
838:         for (i=m-1; i>=0; i--) {
839:           v  = aa + 49*ai[i];
840:           vi = aj + ai[i];
841:           nz = ai[i+1] - ai[i];
842:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
843:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
844:           while (nz--) {
845:             idx = 7*(*vi++);
846:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
847:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
848:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
849:             v  += 49;
850:           }
851:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
852:           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
853:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
854:           idiag  -= 49;
855:           i2     -= 7;
856:         }
857:         break;
858:       default:
859:         for (i=m-1; i>=0; i--) {
860:           v  = aa + bs2*ai[i];
861:           vi = aj + ai[i];
862:           nz = ai[i+1] - ai[i];

864:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
865:           /* copy all rows of x that are needed into contiguous space */
866:           workt = work;
867:           for (j=0; j<nz; j++) {
868:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
869:             workt += bs;
870:           }
871:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
872:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

874:           idiag -= bs2;
875:           i2    -= bs;
876:         }
877:         break;
878:       }
879:       PetscLogFlops(2.0*bs2*(a->nz));
880:     }
881:   }
882:   VecRestoreArray(xx,&x);
883:   VecRestoreArrayRead(bb,&b);
884:   return(0);
885: }


888: /*
889:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
890: */
891: #if defined(PETSC_HAVE_FORTRAN_CAPS)
892: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
893: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
894: #define matsetvaluesblocked4_ matsetvaluesblocked4
895: #endif

899: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
900: {
901:   Mat               A  = *AA;
902:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
903:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
904:   PetscInt          *ai    =a->i,*ailen=a->ilen;
905:   PetscInt          *aj    =a->j,stepval,lastcol = -1;
906:   const PetscScalar *value = v;
907:   MatScalar         *ap,*aa = a->a,*bap;

910:   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
911:   stepval = (n-1)*4;
912:   for (k=0; k<m; k++) { /* loop over added rows */
913:     row  = im[k];
914:     rp   = aj + ai[row];
915:     ap   = aa + 16*ai[row];
916:     nrow = ailen[row];
917:     low  = 0;
918:     high = nrow;
919:     for (l=0; l<n; l++) { /* loop over added columns */
920:       col = in[l];
921:       if (col <= lastcol)  low = 0;
922:       else                high = nrow;
923:       lastcol = col;
924:       value   = v + k*(stepval+4 + l)*4;
925:       while (high-low > 7) {
926:         t = (low+high)/2;
927:         if (rp[t] > col) high = t;
928:         else             low  = t;
929:       }
930:       for (i=low; i<high; i++) {
931:         if (rp[i] > col) break;
932:         if (rp[i] == col) {
933:           bap = ap +  16*i;
934:           for (ii=0; ii<4; ii++,value+=stepval) {
935:             for (jj=ii; jj<16; jj+=4) {
936:               bap[jj] += *value++;
937:             }
938:           }
939:           goto noinsert2;
940:         }
941:       }
942:       N = nrow++ - 1;
943:       high++; /* added new column index thus must search to one higher than before */
944:       /* shift up all the later entries in this row */
945:       for (ii=N; ii>=i; ii--) {
946:         rp[ii+1] = rp[ii];
947:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
948:       }
949:       if (N >= i) {
950:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
951:       }
952:       rp[i] = col;
953:       bap   = ap +  16*i;
954:       for (ii=0; ii<4; ii++,value+=stepval) {
955:         for (jj=ii; jj<16; jj+=4) {
956:           bap[jj] = *value++;
957:         }
958:       }
959:       noinsert2:;
960:       low = i;
961:     }
962:     ailen[row] = nrow;
963:   }
964:   PetscFunctionReturnVoid();
965: }

967: #if defined(PETSC_HAVE_FORTRAN_CAPS)
968: #define matsetvalues4_ MATSETVALUES4
969: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
970: #define matsetvalues4_ matsetvalues4
971: #endif

975: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
976: {
977:   Mat         A  = *AA;
978:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
979:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
980:   PetscInt    *ai=a->i,*ailen=a->ilen;
981:   PetscInt    *aj=a->j,brow,bcol;
982:   PetscInt    ridx,cidx,lastcol = -1;
983:   MatScalar   *ap,value,*aa=a->a,*bap;

986:   for (k=0; k<m; k++) { /* loop over added rows */
987:     row  = im[k]; brow = row/4;
988:     rp   = aj + ai[brow];
989:     ap   = aa + 16*ai[brow];
990:     nrow = ailen[brow];
991:     low  = 0;
992:     high = nrow;
993:     for (l=0; l<n; l++) { /* loop over added columns */
994:       col   = in[l]; bcol = col/4;
995:       ridx  = row % 4; cidx = col % 4;
996:       value = v[l + k*n];
997:       if (col <= lastcol)  low = 0;
998:       else                high = nrow;
999:       lastcol = col;
1000:       while (high-low > 7) {
1001:         t = (low+high)/2;
1002:         if (rp[t] > bcol) high = t;
1003:         else              low  = t;
1004:       }
1005:       for (i=low; i<high; i++) {
1006:         if (rp[i] > bcol) break;
1007:         if (rp[i] == bcol) {
1008:           bap   = ap +  16*i + 4*cidx + ridx;
1009:           *bap += value;
1010:           goto noinsert1;
1011:         }
1012:       }
1013:       N = nrow++ - 1;
1014:       high++; /* added new column thus must search to one higher than before */
1015:       /* shift up all the later entries in this row */
1016:       for (ii=N; ii>=i; ii--) {
1017:         rp[ii+1] = rp[ii];
1018:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1019:       }
1020:       if (N>=i) {
1021:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1022:       }
1023:       rp[i]                    = bcol;
1024:       ap[16*i + 4*cidx + ridx] = value;
1025: noinsert1:;
1026:       low = i;
1027:     }
1028:     ailen[brow] = nrow;
1029:   }
1030:   PetscFunctionReturnVoid();
1031: }

1033: /*
1034:      Checks for missing diagonals
1035: */
1038: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1039: {
1040:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1042:   PetscInt       *diag,*ii = a->i,i;

1045:   MatMarkDiagonal_SeqBAIJ(A);
1046:   *missing = PETSC_FALSE;
1047:   if (A->rmap->n > 0 && !ii) {
1048:     *missing = PETSC_TRUE;
1049:     if (d) *d = 0;
1050:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1051:   } else {
1052:     diag = a->diag;
1053:     for (i=0; i<a->mbs; i++) {
1054:       if (diag[i] >= ii[i+1]) {
1055:         *missing = PETSC_TRUE;
1056:         if (d) *d = i;
1057:         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1058:         break;
1059:       }
1060:     }
1061:   }
1062:   return(0);
1063: }

1067: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1068: {
1069:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1071:   PetscInt       i,j,m = a->mbs;

1074:   if (!a->diag) {
1075:     PetscMalloc1(m,&a->diag);
1076:     PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1077:     a->free_diag = PETSC_TRUE;
1078:   }
1079:   for (i=0; i<m; i++) {
1080:     a->diag[i] = a->i[i+1];
1081:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1082:       if (a->j[j] == i) {
1083:         a->diag[i] = j;
1084:         break;
1085:       }
1086:     }
1087:   }
1088:   return(0);
1089: }


1094: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1095: {
1096:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098:   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1099:   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;

1102:   *nn = n;
1103:   if (!ia) return(0);
1104:   if (symmetric) {
1105:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1106:     nz   = tia[n];
1107:   } else {
1108:     tia = a->i; tja = a->j;
1109:   }

1111:   if (!blockcompressed && bs > 1) {
1112:     (*nn) *= bs;
1113:     /* malloc & create the natural set of indices */
1114:     PetscMalloc1((n+1)*bs,ia);
1115:     if (n) {
1116:       (*ia)[0] = 0;
1117:       for (j=1; j<bs; j++) {
1118:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1119:       }
1120:     }

1122:     for (i=1; i<n; i++) {
1123:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1124:       for (j=1; j<bs; j++) {
1125:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1126:       }
1127:     }
1128:     if (n) {
1129:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1130:     }

1132:     if (inja) {
1133:       PetscMalloc1(nz*bs*bs,ja);
1134:       cnt = 0;
1135:       for (i=0; i<n; i++) {
1136:         for (j=0; j<bs; j++) {
1137:           for (k=tia[i]; k<tia[i+1]; k++) {
1138:             for (l=0; l<bs; l++) {
1139:               (*ja)[cnt++] = bs*tja[k] + l;
1140:             }
1141:           }
1142:         }
1143:       }
1144:     }

1146:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1147:       PetscFree(tia);
1148:       PetscFree(tja);
1149:     }
1150:   } else if (oshift == 1) {
1151:     if (symmetric) {
1152:       nz = tia[A->rmap->n/bs];
1153:       /*  add 1 to i and j indices */
1154:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1155:       *ia = tia;
1156:       if (ja) {
1157:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1158:         *ja = tja;
1159:       }
1160:     } else {
1161:       nz = a->i[A->rmap->n/bs];
1162:       /* malloc space and  add 1 to i and j indices */
1163:       PetscMalloc1(A->rmap->n/bs+1,ia);
1164:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1165:       if (ja) {
1166:         PetscMalloc1(nz,ja);
1167:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1168:       }
1169:     }
1170:   } else {
1171:     *ia = tia;
1172:     if (ja) *ja = tja;
1173:   }
1174:   return(0);
1175: }

1179: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1180: {

1184:   if (!ia) return(0);
1185:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1186:     PetscFree(*ia);
1187:     if (ja) {PetscFree(*ja);}
1188:   }
1189:   return(0);
1190: }

1194: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1195: {
1196:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1200: #if defined(PETSC_USE_LOG)
1201:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1202: #endif
1203:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1204:   ISDestroy(&a->row);
1205:   ISDestroy(&a->col);
1206:   if (a->free_diag) {PetscFree(a->diag);}
1207:   PetscFree(a->idiag);
1208:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1209:   PetscFree(a->solve_work);
1210:   PetscFree(a->mult_work);
1211:   PetscFree(a->sor_workt);
1212:   PetscFree(a->sor_work);
1213:   ISDestroy(&a->icol);
1214:   PetscFree(a->saved_values);
1215:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1217:   MatDestroy(&a->sbaijMat);
1218:   MatDestroy(&a->parent);
1219:   PetscFree(A->data);

1221:   PetscObjectChangeTypeName((PetscObject)A,0);
1222:   PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1223:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1224:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1225:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1226:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1227:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1228:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1229:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1230:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1231:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1232:   return(0);
1233: }

1237: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1238: {
1239:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1243:   switch (op) {
1244:   case MAT_ROW_ORIENTED:
1245:     a->roworiented = flg;
1246:     break;
1247:   case MAT_KEEP_NONZERO_PATTERN:
1248:     a->keepnonzeropattern = flg;
1249:     break;
1250:   case MAT_NEW_NONZERO_LOCATIONS:
1251:     a->nonew = (flg ? 0 : 1);
1252:     break;
1253:   case MAT_NEW_NONZERO_LOCATION_ERR:
1254:     a->nonew = (flg ? -1 : 0);
1255:     break;
1256:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1257:     a->nonew = (flg ? -2 : 0);
1258:     break;
1259:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1260:     a->nounused = (flg ? -1 : 0);
1261:     break;
1262:   case MAT_NEW_DIAGONALS:
1263:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1264:   case MAT_USE_HASH_TABLE:
1265:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1266:     break;
1267:   case MAT_SPD:
1268:   case MAT_SYMMETRIC:
1269:   case MAT_STRUCTURALLY_SYMMETRIC:
1270:   case MAT_HERMITIAN:
1271:   case MAT_SYMMETRY_ETERNAL:
1272:     /* These options are handled directly by MatSetOption() */
1273:     break;
1274:   default:
1275:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276:   }
1277:   return(0);
1278: }

1280: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1283: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1284: {
1286:   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1287:   MatScalar      *aa_i;
1288:   PetscScalar    *v_i;

1291:   bs  = A->rmap->bs;
1292:   bs2 = bs*bs;
1293:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);

1295:   bn  = row/bs;   /* Block number */
1296:   bp  = row % bs; /* Block Position */
1297:   M   = ai[bn+1] - ai[bn];
1298:   *nz = bs*M;

1300:   if (v) {
1301:     *v = 0;
1302:     if (*nz) {
1303:       PetscMalloc1(*nz,v);
1304:       for (i=0; i<M; i++) { /* for each block in the block row */
1305:         v_i  = *v + i*bs;
1306:         aa_i = aa + bs2*(ai[bn] + i);
1307:         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1308:       }
1309:     }
1310:   }

1312:   if (idx) {
1313:     *idx = 0;
1314:     if (*nz) {
1315:       PetscMalloc1(*nz,idx);
1316:       for (i=0; i<M; i++) { /* for each block in the block row */
1317:         idx_i = *idx + i*bs;
1318:         itmp  = bs*aj[ai[bn] + i];
1319:         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1320:       }
1321:     }
1322:   }
1323:   return(0);
1324: }

1328: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329: {
1330:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1334:   MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1335:   return(0);
1336: }

1340: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341: {

1345:   if (idx) {PetscFree(*idx);}
1346:   if (v)   {PetscFree(*v);}
1347:   return(0);
1348: }

1350: extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);

1354: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1355: {
1356:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1357:   Mat            C;
1359:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1360:   PetscInt       *rows,*cols,bs2=a->bs2;
1361:   MatScalar      *array;

1364:   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1365:   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1366:     PetscCalloc1(1+nbs,&col);

1368:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1369:     MatCreate(PetscObjectComm((PetscObject)A),&C);
1370:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1371:     MatSetType(C,((PetscObject)A)->type_name);
1372:     MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);
1373:     PetscFree(col);
1374:   } else {
1375:     C = *B;
1376:   }

1378:   array = a->a;
1379:   PetscMalloc2(bs,&rows,bs,&cols);
1380:   for (i=0; i<mbs; i++) {
1381:     cols[0] = i*bs;
1382:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1383:     len = ai[i+1] - ai[i];
1384:     for (j=0; j<len; j++) {
1385:       rows[0] = (*aj++)*bs;
1386:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1387:       MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1388:       array += bs2;
1389:     }
1390:   }
1391:   PetscFree2(rows,cols);

1393:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1394:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1396:   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1397:     *B = C;
1398:   } else {
1399:     MatHeaderMerge(A,C);
1400:   }
1401:   return(0);
1402: }

1406: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1407: {
1409:   Mat            Btrans;

1412:   *f   = PETSC_FALSE;
1413:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1414:   MatEqual_SeqBAIJ(B,Btrans,f);
1415:   MatDestroy(&Btrans);
1416:   return(0);
1417: }

1421: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1422: {
1423:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1425:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1426:   int            fd;
1427:   PetscScalar    *aa;
1428:   FILE           *file;

1431:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1432:   PetscMalloc1(4+A->rmap->N,&col_lens);
1433:   col_lens[0] = MAT_FILE_CLASSID;

1435:   col_lens[1] = A->rmap->N;
1436:   col_lens[2] = A->cmap->n;
1437:   col_lens[3] = a->nz*bs2;

1439:   /* store lengths of each row and write (including header) to file */
1440:   count = 0;
1441:   for (i=0; i<a->mbs; i++) {
1442:     for (j=0; j<bs; j++) {
1443:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1444:     }
1445:   }
1446:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1447:   PetscFree(col_lens);

1449:   /* store column indices (zero start index) */
1450:   PetscMalloc1((a->nz+1)*bs2,&jj);
1451:   count = 0;
1452:   for (i=0; i<a->mbs; i++) {
1453:     for (j=0; j<bs; j++) {
1454:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1455:         for (l=0; l<bs; l++) {
1456:           jj[count++] = bs*a->j[k] + l;
1457:         }
1458:       }
1459:     }
1460:   }
1461:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1462:   PetscFree(jj);

1464:   /* store nonzero values */
1465:   PetscMalloc1((a->nz+1)*bs2,&aa);
1466:   count = 0;
1467:   for (i=0; i<a->mbs; i++) {
1468:     for (j=0; j<bs; j++) {
1469:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1470:         for (l=0; l<bs; l++) {
1471:           aa[count++] = a->a[bs2*k + l*bs + j];
1472:         }
1473:       }
1474:     }
1475:   }
1476:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1477:   PetscFree(aa);

1479:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1480:   if (file) {
1481:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1482:   }
1483:   return(0);
1484: }

1488: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1489: {
1490:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1491:   PetscErrorCode    ierr;
1492:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1493:   PetscViewerFormat format;

1496:   PetscViewerGetFormat(viewer,&format);
1497:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1498:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1499:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1500:     const char *matname;
1501:     Mat        aij;
1502:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1503:     PetscObjectGetName((PetscObject)A,&matname);
1504:     PetscObjectSetName((PetscObject)aij,matname);
1505:     MatView(aij,viewer);
1506:     MatDestroy(&aij);
1507:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1508:       return(0);
1509:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1510:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1511:     for (i=0; i<a->mbs; i++) {
1512:       for (j=0; j<bs; j++) {
1513:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1514:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1515:           for (l=0; l<bs; l++) {
1516: #if defined(PETSC_USE_COMPLEX)
1517:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1518:               PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1519:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1520:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1521:               PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1522:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1523:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1524:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1525:             }
1526: #else
1527:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1528:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1529:             }
1530: #endif
1531:           }
1532:         }
1533:         PetscViewerASCIIPrintf(viewer,"\n");
1534:       }
1535:     }
1536:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1537:   } else {
1538:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1539:     for (i=0; i<a->mbs; i++) {
1540:       for (j=0; j<bs; j++) {
1541:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1542:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1543:           for (l=0; l<bs; l++) {
1544: #if defined(PETSC_USE_COMPLEX)
1545:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1546:               PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1547:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1548:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1549:               PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1550:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1551:             } else {
1552:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1553:             }
1554: #else
1555:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1556: #endif
1557:           }
1558:         }
1559:         PetscViewerASCIIPrintf(viewer,"\n");
1560:       }
1561:     }
1562:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1563:   }
1564:   PetscViewerFlush(viewer);
1565:   return(0);
1566: }

1568: #include <petscdraw.h>
1571: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1572: {
1573:   Mat               A = (Mat) Aa;
1574:   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1575:   PetscErrorCode    ierr;
1576:   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1577:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1578:   MatScalar         *aa;
1579:   PetscViewer       viewer;
1580:   PetscViewerFormat format;

1583:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1584:   PetscViewerGetFormat(viewer,&format);

1586:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1588:   /* loop over matrix elements drawing boxes */

1590:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1591:     color = PETSC_DRAW_BLUE;
1592:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1593:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1594:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1595:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1596:         aa  = a->a + j*bs2;
1597:         for (k=0; k<bs; k++) {
1598:           for (l=0; l<bs; l++) {
1599:             if (PetscRealPart(*aa++) >=  0.) continue;
1600:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1601:           }
1602:         }
1603:       }
1604:     }
1605:     color = PETSC_DRAW_CYAN;
1606:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1607:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1608:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1609:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1610:         aa  = a->a + j*bs2;
1611:         for (k=0; k<bs; k++) {
1612:           for (l=0; l<bs; l++) {
1613:             if (PetscRealPart(*aa++) != 0.) continue;
1614:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1615:           }
1616:         }
1617:       }
1618:     }
1619:     color = PETSC_DRAW_RED;
1620:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1621:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1622:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1623:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1624:         aa  = a->a + j*bs2;
1625:         for (k=0; k<bs; k++) {
1626:           for (l=0; l<bs; l++) {
1627:             if (PetscRealPart(*aa++) <= 0.) continue;
1628:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1629:           }
1630:         }
1631:       }
1632:     }
1633:   } else {
1634:     /* use contour shading to indicate magnitude of values */
1635:     /* first determine max of all nonzero values */
1636:     PetscDraw popup;
1637:     PetscReal scale,maxv = 0.0;

1639:     for (i=0; i<a->nz*a->bs2; i++) {
1640:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1641:     }
1642:     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1643:     PetscDrawGetPopup(draw,&popup);
1644:     if (popup) {
1645:       PetscDrawScalePopup(popup,0.0,maxv);
1646:     }
1647:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1648:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1649:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1650:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1651:         aa  = a->a + j*bs2;
1652:         for (k=0; k<bs; k++) {
1653:           for (l=0; l<bs; l++) {
1654:             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1655:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1656:           }
1657:         }
1658:       }
1659:     }
1660:   }
1661:   return(0);
1662: }

1666: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1667: {
1669:   PetscReal      xl,yl,xr,yr,w,h;
1670:   PetscDraw      draw;
1671:   PetscBool      isnull;

1674:   PetscViewerDrawGetDraw(viewer,0,&draw);
1675:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1677:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1678:   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1679:   xr  += w;    yr += h;  xl = -w;     yl = -h;
1680:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1681:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1682:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1683:   return(0);
1684: }

1688: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1689: {
1691:   PetscBool      iascii,isbinary,isdraw;

1694:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1695:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1696:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1697:   if (iascii) {
1698:     MatView_SeqBAIJ_ASCII(A,viewer);
1699:   } else if (isbinary) {
1700:     MatView_SeqBAIJ_Binary(A,viewer);
1701:   } else if (isdraw) {
1702:     MatView_SeqBAIJ_Draw(A,viewer);
1703:   } else {
1704:     Mat B;
1705:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1706:     MatView(B,viewer);
1707:     MatDestroy(&B);
1708:   }
1709:   return(0);
1710: }


1715: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1716: {
1717:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1718:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1719:   PetscInt    *ai = a->i,*ailen = a->ilen;
1720:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1721:   MatScalar   *ap,*aa = a->a;

1724:   for (k=0; k<m; k++) { /* loop over rows */
1725:     row = im[k]; brow = row/bs;
1726:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1727:     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1728:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1729:     nrow = ailen[brow];
1730:     for (l=0; l<n; l++) { /* loop over columns */
1731:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1732:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1733:       col  = in[l];
1734:       bcol = col/bs;
1735:       cidx = col%bs;
1736:       ridx = row%bs;
1737:       high = nrow;
1738:       low  = 0; /* assume unsorted */
1739:       while (high-low > 5) {
1740:         t = (low+high)/2;
1741:         if (rp[t] > bcol) high = t;
1742:         else             low  = t;
1743:       }
1744:       for (i=low; i<high; i++) {
1745:         if (rp[i] > bcol) break;
1746:         if (rp[i] == bcol) {
1747:           *v++ = ap[bs2*i+bs*cidx+ridx];
1748:           goto finished;
1749:         }
1750:       }
1751:       *v++ = 0.0;
1752: finished:;
1753:     }
1754:   }
1755:   return(0);
1756: }

1760: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1761: {
1762:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1763:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1764:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1765:   PetscErrorCode    ierr;
1766:   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1767:   PetscBool         roworiented=a->roworiented;
1768:   const PetscScalar *value     = v;
1769:   MatScalar         *ap,*aa = a->a,*bap;

1772:   if (roworiented) {
1773:     stepval = (n-1)*bs;
1774:   } else {
1775:     stepval = (m-1)*bs;
1776:   }
1777:   for (k=0; k<m; k++) { /* loop over added rows */
1778:     row = im[k];
1779:     if (row < 0) continue;
1780: #if defined(PETSC_USE_DEBUG)
1781:     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1782: #endif
1783:     rp   = aj + ai[row];
1784:     ap   = aa + bs2*ai[row];
1785:     rmax = imax[row];
1786:     nrow = ailen[row];
1787:     low  = 0;
1788:     high = nrow;
1789:     for (l=0; l<n; l++) { /* loop over added columns */
1790:       if (in[l] < 0) continue;
1791: #if defined(PETSC_USE_DEBUG)
1792:       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1793: #endif
1794:       col = in[l];
1795:       if (roworiented) {
1796:         value = v + (k*(stepval+bs) + l)*bs;
1797:       } else {
1798:         value = v + (l*(stepval+bs) + k)*bs;
1799:       }
1800:       if (col <= lastcol) low = 0;
1801:       else high = nrow;
1802:       lastcol = col;
1803:       while (high-low > 7) {
1804:         t = (low+high)/2;
1805:         if (rp[t] > col) high = t;
1806:         else             low  = t;
1807:       }
1808:       for (i=low; i<high; i++) {
1809:         if (rp[i] > col) break;
1810:         if (rp[i] == col) {
1811:           bap = ap +  bs2*i;
1812:           if (roworiented) {
1813:             if (is == ADD_VALUES) {
1814:               for (ii=0; ii<bs; ii++,value+=stepval) {
1815:                 for (jj=ii; jj<bs2; jj+=bs) {
1816:                   bap[jj] += *value++;
1817:                 }
1818:               }
1819:             } else {
1820:               for (ii=0; ii<bs; ii++,value+=stepval) {
1821:                 for (jj=ii; jj<bs2; jj+=bs) {
1822:                   bap[jj] = *value++;
1823:                 }
1824:               }
1825:             }
1826:           } else {
1827:             if (is == ADD_VALUES) {
1828:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1829:                 for (jj=0; jj<bs; jj++) {
1830:                   bap[jj] += value[jj];
1831:                 }
1832:                 bap += bs;
1833:               }
1834:             } else {
1835:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1836:                 for (jj=0; jj<bs; jj++) {
1837:                   bap[jj]  = value[jj];
1838:                 }
1839:                 bap += bs;
1840:               }
1841:             }
1842:           }
1843:           goto noinsert2;
1844:         }
1845:       }
1846:       if (nonew == 1) goto noinsert2;
1847:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1848:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1849:       N = nrow++ - 1; high++;
1850:       /* shift up all the later entries in this row */
1851:       for (ii=N; ii>=i; ii--) {
1852:         rp[ii+1] = rp[ii];
1853:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1854:       }
1855:       if (N >= i) {
1856:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1857:       }
1858:       rp[i] = col;
1859:       bap   = ap +  bs2*i;
1860:       if (roworiented) {
1861:         for (ii=0; ii<bs; ii++,value+=stepval) {
1862:           for (jj=ii; jj<bs2; jj+=bs) {
1863:             bap[jj] = *value++;
1864:           }
1865:         }
1866:       } else {
1867:         for (ii=0; ii<bs; ii++,value+=stepval) {
1868:           for (jj=0; jj<bs; jj++) {
1869:             *bap++ = *value++;
1870:           }
1871:         }
1872:       }
1873: noinsert2:;
1874:       low = i;
1875:     }
1876:     ailen[row] = nrow;
1877:   }
1878:   return(0);
1879: }

1883: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1884: {
1885:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1886:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1887:   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1889:   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1890:   MatScalar      *aa  = a->a,*ap;
1891:   PetscReal      ratio=0.6;

1894:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1896:   if (m) rmax = ailen[0];
1897:   for (i=1; i<mbs; i++) {
1898:     /* move each row back by the amount of empty slots (fshift) before it*/
1899:     fshift += imax[i-1] - ailen[i-1];
1900:     rmax    = PetscMax(rmax,ailen[i]);
1901:     if (fshift) {
1902:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1903:       N  = ailen[i];
1904:       for (j=0; j<N; j++) {
1905:         ip[j-fshift] = ip[j];

1907:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1908:       }
1909:     }
1910:     ai[i] = ai[i-1] + ailen[i-1];
1911:   }
1912:   if (mbs) {
1913:     fshift += imax[mbs-1] - ailen[mbs-1];
1914:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1915:   }

1917:   /* reset ilen and imax for each row */
1918:   a->nonzerorowcnt = 0;
1919:   for (i=0; i<mbs; i++) {
1920:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1921:     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1922:   }
1923:   a->nz = ai[mbs];

1925:   /* diagonals may have moved, so kill the diagonal pointers */
1926:   a->idiagvalid = PETSC_FALSE;
1927:   if (fshift && a->diag) {
1928:     PetscFree(a->diag);
1929:     PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1930:     a->diag = 0;
1931:   }
1932:   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1933:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1934:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1935:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);

1937:   A->info.mallocs    += a->reallocs;
1938:   a->reallocs         = 0;
1939:   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1940:   a->rmax             = rmax;

1942:   MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1943:   return(0);
1944: }

1946: /*
1947:    This function returns an array of flags which indicate the locations of contiguous
1948:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1949:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1950:    Assume: sizes should be long enough to hold all the values.
1951: */
1954: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1955: {
1956:   PetscInt  i,j,k,row;
1957:   PetscBool flg;

1960:   for (i=0,j=0; i<n; j++) {
1961:     row = idx[i];
1962:     if (row%bs!=0) { /* Not the begining of a block */
1963:       sizes[j] = 1;
1964:       i++;
1965:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1966:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1967:       i++;
1968:     } else { /* Begining of the block, so check if the complete block exists */
1969:       flg = PETSC_TRUE;
1970:       for (k=1; k<bs; k++) {
1971:         if (row+k != idx[i+k]) { /* break in the block */
1972:           flg = PETSC_FALSE;
1973:           break;
1974:         }
1975:       }
1976:       if (flg) { /* No break in the bs */
1977:         sizes[j] = bs;
1978:         i       += bs;
1979:       } else {
1980:         sizes[j] = 1;
1981:         i++;
1982:       }
1983:     }
1984:   }
1985:   *bs_max = j;
1986:   return(0);
1987: }

1991: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1992: {
1993:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1994:   PetscErrorCode    ierr;
1995:   PetscInt          i,j,k,count,*rows;
1996:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1997:   PetscScalar       zero = 0.0;
1998:   MatScalar         *aa;
1999:   const PetscScalar *xx;
2000:   PetscScalar       *bb;

2003:   /* fix right hand side if needed */
2004:   if (x && b) {
2005:     VecGetArrayRead(x,&xx);
2006:     VecGetArray(b,&bb);
2007:     for (i=0; i<is_n; i++) {
2008:       bb[is_idx[i]] = diag*xx[is_idx[i]];
2009:     }
2010:     VecRestoreArrayRead(x,&xx);
2011:     VecRestoreArray(b,&bb);
2012:   }

2014:   /* Make a copy of the IS and  sort it */
2015:   /* allocate memory for rows,sizes */
2016:   PetscMalloc2(is_n,&rows,2*is_n,&sizes);

2018:   /* copy IS values to rows, and sort them */
2019:   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2020:   PetscSortInt(is_n,rows);

2022:   if (baij->keepnonzeropattern) {
2023:     for (i=0; i<is_n; i++) sizes[i] = 1;
2024:     bs_max          = is_n;
2025:   } else {
2026:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2027:     A->nonzerostate++;
2028:   }

2030:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2031:     row = rows[j];
2032:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2033:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2034:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2035:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2036:       if (diag != (PetscScalar)0.0) {
2037:         if (baij->ilen[row/bs] > 0) {
2038:           baij->ilen[row/bs]       = 1;
2039:           baij->j[baij->i[row/bs]] = row/bs;

2041:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2042:         }
2043:         /* Now insert all the diagonal values for this bs */
2044:         for (k=0; k<bs; k++) {
2045:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2046:         }
2047:       } else { /* (diag == 0.0) */
2048:         baij->ilen[row/bs] = 0;
2049:       } /* end (diag == 0.0) */
2050:     } else { /* (sizes[i] != bs) */
2051: #if defined(PETSC_USE_DEBUG)
2052:       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2053: #endif
2054:       for (k=0; k<count; k++) {
2055:         aa[0] =  zero;
2056:         aa   += bs;
2057:       }
2058:       if (diag != (PetscScalar)0.0) {
2059:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2060:       }
2061:     }
2062:   }

2064:   PetscFree2(rows,sizes);
2065:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2066:   return(0);
2067: }

2071: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2072: {
2073:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2074:   PetscErrorCode    ierr;
2075:   PetscInt          i,j,k,count;
2076:   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2077:   PetscScalar       zero = 0.0;
2078:   MatScalar         *aa;
2079:   const PetscScalar *xx;
2080:   PetscScalar       *bb;
2081:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2084:   /* fix right hand side if needed */
2085:   if (x && b) {
2086:     VecGetArrayRead(x,&xx);
2087:     VecGetArray(b,&bb);
2088:     vecs = PETSC_TRUE;
2089:   }

2091:   /* zero the columns */
2092:   PetscCalloc1(A->rmap->n,&zeroed);
2093:   for (i=0; i<is_n; i++) {
2094:     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2095:     zeroed[is_idx[i]] = PETSC_TRUE;
2096:   }
2097:   for (i=0; i<A->rmap->N; i++) {
2098:     if (!zeroed[i]) {
2099:       row = i/bs;
2100:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2101:         for (k=0; k<bs; k++) {
2102:           col = bs*baij->j[j] + k;
2103:           if (zeroed[col]) {
2104:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2105:             if (vecs) bb[i] -= aa[0]*xx[col];
2106:             aa[0] = 0.0;
2107:           }
2108:         }
2109:       }
2110:     } else if (vecs) bb[i] = diag*xx[i];
2111:   }
2112:   PetscFree(zeroed);
2113:   if (vecs) {
2114:     VecRestoreArrayRead(x,&xx);
2115:     VecRestoreArray(b,&bb);
2116:   }

2118:   /* zero the rows */
2119:   for (i=0; i<is_n; i++) {
2120:     row   = is_idx[i];
2121:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2122:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2123:     for (k=0; k<count; k++) {
2124:       aa[0] =  zero;
2125:       aa   += bs;
2126:     }
2127:     if (diag != (PetscScalar)0.0) {
2128:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2129:     }
2130:   }
2131:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2132:   return(0);
2133: }

2137: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2138: {
2139:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2140:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2141:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2142:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2144:   PetscInt       ridx,cidx,bs2=a->bs2;
2145:   PetscBool      roworiented=a->roworiented;
2146:   MatScalar      *ap,value,*aa=a->a,*bap;

2149:   for (k=0; k<m; k++) { /* loop over added rows */
2150:     row  = im[k];
2151:     brow = row/bs;
2152:     if (row < 0) continue;
2153: #if defined(PETSC_USE_DEBUG)
2154:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2155: #endif
2156:     rp   = aj + ai[brow];
2157:     ap   = aa + bs2*ai[brow];
2158:     rmax = imax[brow];
2159:     nrow = ailen[brow];
2160:     low  = 0;
2161:     high = nrow;
2162:     for (l=0; l<n; l++) { /* loop over added columns */
2163:       if (in[l] < 0) continue;
2164: #if defined(PETSC_USE_DEBUG)
2165:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2166: #endif
2167:       col  = in[l]; bcol = col/bs;
2168:       ridx = row % bs; cidx = col % bs;
2169:       if (roworiented) {
2170:         value = v[l + k*n];
2171:       } else {
2172:         value = v[k + l*m];
2173:       }
2174:       if (col <= lastcol) low = 0; else high = nrow;
2175:       lastcol = col;
2176:       while (high-low > 7) {
2177:         t = (low+high)/2;
2178:         if (rp[t] > bcol) high = t;
2179:         else              low  = t;
2180:       }
2181:       for (i=low; i<high; i++) {
2182:         if (rp[i] > bcol) break;
2183:         if (rp[i] == bcol) {
2184:           bap = ap +  bs2*i + bs*cidx + ridx;
2185:           if (is == ADD_VALUES) *bap += value;
2186:           else                  *bap  = value;
2187:           goto noinsert1;
2188:         }
2189:       }
2190:       if (nonew == 1) goto noinsert1;
2191:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2192:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2193:       N = nrow++ - 1; high++;
2194:       /* shift up all the later entries in this row */
2195:       for (ii=N; ii>=i; ii--) {
2196:         rp[ii+1] = rp[ii];
2197:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2198:       }
2199:       if (N>=i) {
2200:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2201:       }
2202:       rp[i]                      = bcol;
2203:       ap[bs2*i + bs*cidx + ridx] = value;
2204:       a->nz++;
2205:       A->nonzerostate++;
2206: noinsert1:;
2207:       low = i;
2208:     }
2209:     ailen[brow] = nrow;
2210:   }
2211:   return(0);
2212: }

2216: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2217: {
2218:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2219:   Mat            outA;
2221:   PetscBool      row_identity,col_identity;

2224:   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2225:   ISIdentity(row,&row_identity);
2226:   ISIdentity(col,&col_identity);
2227:   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");

2229:   outA            = inA;
2230:   inA->factortype = MAT_FACTOR_LU;

2232:   MatMarkDiagonal_SeqBAIJ(inA);

2234:   PetscObjectReference((PetscObject)row);
2235:   ISDestroy(&a->row);
2236:   a->row = row;
2237:   PetscObjectReference((PetscObject)col);
2238:   ISDestroy(&a->col);
2239:   a->col = col;

2241:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2242:   ISDestroy(&a->icol);
2243:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2244:   PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);

2246:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2247:   if (!a->solve_work) {
2248:     PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2249:     PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2250:   }
2251:   MatLUFactorNumeric(outA,inA,info);
2252:   return(0);
2253: }

2257: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2258: {
2259:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2260:   PetscInt    i,nz,mbs;

2263:   nz  = baij->maxnz;
2264:   mbs = baij->mbs;
2265:   for (i=0; i<nz; i++) {
2266:     baij->j[i] = indices[i];
2267:   }
2268:   baij->nz = nz;
2269:   for (i=0; i<mbs; i++) {
2270:     baij->ilen[i] = baij->imax[i];
2271:   }
2272:   return(0);
2273: }

2277: /*@
2278:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2279:        in the matrix.

2281:   Input Parameters:
2282: +  mat - the SeqBAIJ matrix
2283: -  indices - the column indices

2285:   Level: advanced

2287:   Notes:
2288:     This can be called if you have precomputed the nonzero structure of the
2289:   matrix and want to provide it to the matrix object to improve the performance
2290:   of the MatSetValues() operation.

2292:     You MUST have set the correct numbers of nonzeros per row in the call to
2293:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2295:     MUST be called before any calls to MatSetValues();

2297: @*/
2298: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2299: {

2305:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2306:   return(0);
2307: }

2311: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2312: {
2313:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2315:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2316:   PetscReal      atmp;
2317:   PetscScalar    *x,zero = 0.0;
2318:   MatScalar      *aa;
2319:   PetscInt       ncols,brow,krow,kcol;

2322:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2323:   bs  = A->rmap->bs;
2324:   aa  = a->a;
2325:   ai  = a->i;
2326:   aj  = a->j;
2327:   mbs = a->mbs;

2329:   VecSet(v,zero);
2330:   VecGetArray(v,&x);
2331:   VecGetLocalSize(v,&n);
2332:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2333:   for (i=0; i<mbs; i++) {
2334:     ncols = ai[1] - ai[0]; ai++;
2335:     brow  = bs*i;
2336:     for (j=0; j<ncols; j++) {
2337:       for (kcol=0; kcol<bs; kcol++) {
2338:         for (krow=0; krow<bs; krow++) {
2339:           atmp = PetscAbsScalar(*aa);aa++;
2340:           row  = brow + krow;   /* row index */
2341:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2342:         }
2343:       }
2344:       aj++;
2345:     }
2346:   }
2347:   VecRestoreArray(v,&x);
2348:   return(0);
2349: }

2353: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2354: {

2358:   /* If the two matrices have the same copy implementation, use fast copy. */
2359:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2360:     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2361:     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2362:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2364:     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2365:     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2366:     PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2367:   } else {
2368:     MatCopy_Basic(A,B,str);
2369:   }
2370:   return(0);
2371: }

2375: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2376: {

2380:   MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);
2381:   return(0);
2382: }

2386: PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2387: {
2388:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

2391:   *array = a->a;
2392:   return(0);
2393: }

2397: PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2398: {
2400:   return(0);
2401: }

2405: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2406: {
2407:   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2408:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2409:   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;

2413:   /* Set the number of nonzeros in the new matrix */
2414:   MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2415:   return(0);
2416: }

2420: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2421: {
2422:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2424:   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2425:   PetscBLASInt   one=1;

2428:   if (str == SAME_NONZERO_PATTERN) {
2429:     PetscScalar  alpha = a;
2430:     PetscBLASInt bnz;
2431:     PetscBLASIntCast(x->nz*bs2,&bnz);
2432:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2433:     PetscObjectStateIncrease((PetscObject)Y);
2434:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2435:     MatAXPY_Basic(Y,a,X,str);
2436:   } else {
2437:     Mat      B;
2438:     PetscInt *nnz;
2439:     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2440:     PetscMalloc1(Y->rmap->N,&nnz);
2441:     MatCreate(PetscObjectComm((PetscObject)Y),&B);
2442:     PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2443:     MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2444:     MatSetBlockSizesFromMats(B,Y,Y);
2445:     MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2446:     MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2447:     MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2448:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2449:     MatHeaderReplace(Y,B);
2450:     PetscFree(nnz);
2451:   }
2452:   return(0);
2453: }

2457: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2458: {
2459:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2460:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2461:   MatScalar   *aa = a->a;

2464:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2465:   return(0);
2466: }

2470: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2471: {
2472:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2473:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2474:   MatScalar   *aa = a->a;

2477:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2478:   return(0);
2479: }

2483: /*
2484:     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2485: */
2486: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2487: {
2488:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2490:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2491:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2494:   *nn = n;
2495:   if (!ia) return(0);
2496:   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2497:   else {
2498:     PetscCalloc1(n+1,&collengths);
2499:     PetscMalloc1(n+1,&cia);
2500:     PetscMalloc1(nz+1,&cja);
2501:     jj   = a->j;
2502:     for (i=0; i<nz; i++) {
2503:       collengths[jj[i]]++;
2504:     }
2505:     cia[0] = oshift;
2506:     for (i=0; i<n; i++) {
2507:       cia[i+1] = cia[i] + collengths[i];
2508:     }
2509:     PetscMemzero(collengths,n*sizeof(PetscInt));
2510:     jj   = a->j;
2511:     for (row=0; row<m; row++) {
2512:       mr = a->i[row+1] - a->i[row];
2513:       for (i=0; i<mr; i++) {
2514:         col = *jj++;

2516:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2517:       }
2518:     }
2519:     PetscFree(collengths);
2520:     *ia  = cia; *ja = cja;
2521:   }
2522:   return(0);
2523: }

2527: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2528: {

2532:   if (!ia) return(0);
2533:   PetscFree(*ia);
2534:   PetscFree(*ja);
2535:   return(0);
2536: }

2538: /*
2539:  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2540:  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2541:  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2542:  */
2545: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2546: {
2547:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2549:   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2550:   PetscInt       nz = a->i[m],row,*jj,mr,col;
2551:   PetscInt       *cspidx;

2554:   *nn = n;
2555:   if (!ia) return(0);

2557:   PetscCalloc1(n+1,&collengths);
2558:   PetscMalloc1(n+1,&cia);
2559:   PetscMalloc1(nz+1,&cja);
2560:   PetscMalloc1(nz+1,&cspidx);
2561:   jj   = a->j;
2562:   for (i=0; i<nz; i++) {
2563:     collengths[jj[i]]++;
2564:   }
2565:   cia[0] = oshift;
2566:   for (i=0; i<n; i++) {
2567:     cia[i+1] = cia[i] + collengths[i];
2568:   }
2569:   PetscMemzero(collengths,n*sizeof(PetscInt));
2570:   jj   = a->j;
2571:   for (row=0; row<m; row++) {
2572:     mr = a->i[row+1] - a->i[row];
2573:     for (i=0; i<mr; i++) {
2574:       col = *jj++;
2575:       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2576:       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2577:     }
2578:   }
2579:   PetscFree(collengths);
2580:   *ia    = cia; *ja = cja;
2581:   *spidx = cspidx;
2582:   return(0);
2583: }

2587: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2588: {

2592:   MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2593:   PetscFree(*spidx);
2594:   return(0);
2595: }

2599: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2600: {
2602:   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;

2605:   if (!aij->nz) {
2606:     MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2607:   }
2608:   MatShift_Basic(Y,a);
2609:   return(0);
2610: }

2612: /* -------------------------------------------------------------------*/
2613: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2614:                                        MatGetRow_SeqBAIJ,
2615:                                        MatRestoreRow_SeqBAIJ,
2616:                                        MatMult_SeqBAIJ_N,
2617:                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2618:                                        MatMultTranspose_SeqBAIJ,
2619:                                        MatMultTransposeAdd_SeqBAIJ,
2620:                                        0,
2621:                                        0,
2622:                                        0,
2623:                                /* 10*/ 0,
2624:                                        MatLUFactor_SeqBAIJ,
2625:                                        0,
2626:                                        0,
2627:                                        MatTranspose_SeqBAIJ,
2628:                                /* 15*/ MatGetInfo_SeqBAIJ,
2629:                                        MatEqual_SeqBAIJ,
2630:                                        MatGetDiagonal_SeqBAIJ,
2631:                                        MatDiagonalScale_SeqBAIJ,
2632:                                        MatNorm_SeqBAIJ,
2633:                                /* 20*/ 0,
2634:                                        MatAssemblyEnd_SeqBAIJ,
2635:                                        MatSetOption_SeqBAIJ,
2636:                                        MatZeroEntries_SeqBAIJ,
2637:                                /* 24*/ MatZeroRows_SeqBAIJ,
2638:                                        0,
2639:                                        0,
2640:                                        0,
2641:                                        0,
2642:                                /* 29*/ MatSetUp_SeqBAIJ,
2643:                                        0,
2644:                                        0,
2645:                                        0,
2646:                                        0,
2647:                                /* 34*/ MatDuplicate_SeqBAIJ,
2648:                                        0,
2649:                                        0,
2650:                                        MatILUFactor_SeqBAIJ,
2651:                                        0,
2652:                                /* 39*/ MatAXPY_SeqBAIJ,
2653:                                        MatGetSubMatrices_SeqBAIJ,
2654:                                        MatIncreaseOverlap_SeqBAIJ,
2655:                                        MatGetValues_SeqBAIJ,
2656:                                        MatCopy_SeqBAIJ,
2657:                                /* 44*/ 0,
2658:                                        MatScale_SeqBAIJ,
2659:                                        MatShift_SeqBAIJ,
2660:                                        0,
2661:                                        MatZeroRowsColumns_SeqBAIJ,
2662:                                /* 49*/ 0,
2663:                                        MatGetRowIJ_SeqBAIJ,
2664:                                        MatRestoreRowIJ_SeqBAIJ,
2665:                                        MatGetColumnIJ_SeqBAIJ,
2666:                                        MatRestoreColumnIJ_SeqBAIJ,
2667:                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2668:                                        0,
2669:                                        0,
2670:                                        0,
2671:                                        MatSetValuesBlocked_SeqBAIJ,
2672:                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2673:                                        MatDestroy_SeqBAIJ,
2674:                                        MatView_SeqBAIJ,
2675:                                        0,
2676:                                        0,
2677:                                /* 64*/ 0,
2678:                                        0,
2679:                                        0,
2680:                                        0,
2681:                                        0,
2682:                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2683:                                        0,
2684:                                        MatConvert_Basic,
2685:                                        0,
2686:                                        0,
2687:                                /* 74*/ 0,
2688:                                        MatFDColoringApply_BAIJ,
2689:                                        0,
2690:                                        0,
2691:                                        0,
2692:                                /* 79*/ 0,
2693:                                        0,
2694:                                        0,
2695:                                        0,
2696:                                        MatLoad_SeqBAIJ,
2697:                                /* 84*/ 0,
2698:                                        0,
2699:                                        0,
2700:                                        0,
2701:                                        0,
2702:                                /* 89*/ 0,
2703:                                        0,
2704:                                        0,
2705:                                        0,
2706:                                        0,
2707:                                /* 94*/ 0,
2708:                                        0,
2709:                                        0,
2710:                                        0,
2711:                                        0,
2712:                                /* 99*/ 0,
2713:                                        0,
2714:                                        0,
2715:                                        0,
2716:                                        0,
2717:                                /*104*/ 0,
2718:                                        MatRealPart_SeqBAIJ,
2719:                                        MatImaginaryPart_SeqBAIJ,
2720:                                        0,
2721:                                        0,
2722:                                /*109*/ 0,
2723:                                        0,
2724:                                        0,
2725:                                        0,
2726:                                        MatMissingDiagonal_SeqBAIJ,
2727:                                /*114*/ 0,
2728:                                        0,
2729:                                        0,
2730:                                        0,
2731:                                        0,
2732:                                /*119*/ 0,
2733:                                        0,
2734:                                        MatMultHermitianTranspose_SeqBAIJ,
2735:                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2736:                                        0,
2737:                                /*124*/ 0,
2738:                                        0,
2739:                                        MatInvertBlockDiagonal_SeqBAIJ,
2740:                                        0,
2741:                                        0,
2742:                                /*129*/ 0,
2743:                                        0,
2744:                                        0,
2745:                                        0,
2746:                                        0,
2747:                                /*134*/ 0,
2748:                                        0,
2749:                                        0,
2750:                                        0,
2751:                                        0,
2752:                                /*139*/ 0,
2753:                                        0,
2754:                                        0,
2755:                                        MatFDColoringSetUp_SeqXAIJ,
2756:                                        0,
2757:                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2758: };

2762: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2763: {
2764:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2765:   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;

2769:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

2771:   /* allocate space for values if not already there */
2772:   if (!aij->saved_values) {
2773:     PetscMalloc1(nz+1,&aij->saved_values);
2774:     PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2775:   }

2777:   /* copy values over */
2778:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2779:   return(0);
2780: }

2784: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2785: {
2786:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2788:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;

2791:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2792:   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");

2794:   /* copy values over */
2795:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2796:   return(0);
2797: }

2799: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2800: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);

2804: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2805: {
2806:   Mat_SeqBAIJ    *b;
2808:   PetscInt       i,mbs,nbs,bs2;
2809:   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;

2812:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2813:   if (nz == MAT_SKIP_ALLOCATION) {
2814:     skipallocation = PETSC_TRUE;
2815:     nz             = 0;
2816:   }

2818:   MatSetBlockSize(B,PetscAbs(bs));
2819:   PetscLayoutSetUp(B->rmap);
2820:   PetscLayoutSetUp(B->cmap);
2821:   PetscLayoutGetBlockSize(B->rmap,&bs);

2823:   B->preallocated = PETSC_TRUE;

2825:   mbs = B->rmap->n/bs;
2826:   nbs = B->cmap->n/bs;
2827:   bs2 = bs*bs;

2829:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);

2831:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2832:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2833:   if (nnz) {
2834:     for (i=0; i<mbs; i++) {
2835:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2836:       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2837:     }
2838:   }

2840:   b    = (Mat_SeqBAIJ*)B->data;
2841:   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2842:   PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2843:   PetscOptionsEnd();

2845:   if (!flg) {
2846:     switch (bs) {
2847:     case 1:
2848:       B->ops->mult    = MatMult_SeqBAIJ_1;
2849:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2850:       break;
2851:     case 2:
2852:       B->ops->mult    = MatMult_SeqBAIJ_2;
2853:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2854:       break;
2855:     case 3:
2856:       B->ops->mult    = MatMult_SeqBAIJ_3;
2857:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2858:       break;
2859:     case 4:
2860:       B->ops->mult    = MatMult_SeqBAIJ_4;
2861:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2862:       break;
2863:     case 5:
2864:       B->ops->mult    = MatMult_SeqBAIJ_5;
2865:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2866:       break;
2867:     case 6:
2868:       B->ops->mult    = MatMult_SeqBAIJ_6;
2869:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2870:       break;
2871:     case 7:
2872:       B->ops->mult    = MatMult_SeqBAIJ_7;
2873:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2874:       break;
2875:     case 15:
2876:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2877:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2878:       break;
2879:     default:
2880:       B->ops->mult    = MatMult_SeqBAIJ_N;
2881:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2882:       break;
2883:     }
2884:   }
2885:   B->ops->sor = MatSOR_SeqBAIJ;
2886:   b->mbs = mbs;
2887:   b->nbs = nbs;
2888:   if (!skipallocation) {
2889:     if (!b->imax) {
2890:       PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2891:       PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));

2893:       b->free_imax_ilen = PETSC_TRUE;
2894:     }
2895:     /* b->ilen will count nonzeros in each block row so far. */
2896:     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2897:     if (!nnz) {
2898:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2899:       else if (nz < 0) nz = 1;
2900:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2901:       nz = nz*mbs;
2902:     } else {
2903:       nz = 0;
2904:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2905:     }

2907:     /* allocate the matrix space */
2908:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2909:     PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2910:     PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2911:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2912:     PetscMemzero(b->j,nz*sizeof(PetscInt));

2914:     b->singlemalloc = PETSC_TRUE;
2915:     b->i[0]         = 0;
2916:     for (i=1; i<mbs+1; i++) {
2917:       b->i[i] = b->i[i-1] + b->imax[i-1];
2918:     }
2919:     b->free_a  = PETSC_TRUE;
2920:     b->free_ij = PETSC_TRUE;
2921:   } else {
2922:     b->free_a  = PETSC_FALSE;
2923:     b->free_ij = PETSC_FALSE;
2924:   }

2926:   b->bs2              = bs2;
2927:   b->mbs              = mbs;
2928:   b->nz               = 0;
2929:   b->maxnz            = nz;
2930:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2931:   if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
2932:   return(0);
2933: }

2937: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2938: {
2939:   PetscInt       i,m,nz,nz_max=0,*nnz;
2940:   PetscScalar    *values=0;
2941:   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;

2945:   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2946:   PetscLayoutSetBlockSize(B->rmap,bs);
2947:   PetscLayoutSetBlockSize(B->cmap,bs);
2948:   PetscLayoutSetUp(B->rmap);
2949:   PetscLayoutSetUp(B->cmap);
2950:   PetscLayoutGetBlockSize(B->rmap,&bs);
2951:   m    = B->rmap->n/bs;

2953:   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2954:   PetscMalloc1(m+1, &nnz);
2955:   for (i=0; i<m; i++) {
2956:     nz = ii[i+1]- ii[i];
2957:     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2958:     nz_max = PetscMax(nz_max, nz);
2959:     nnz[i] = nz;
2960:   }
2961:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2962:   PetscFree(nnz);

2964:   values = (PetscScalar*)V;
2965:   if (!values) {
2966:     PetscCalloc1(bs*bs*(nz_max+1),&values);
2967:   }
2968:   for (i=0; i<m; i++) {
2969:     PetscInt          ncols  = ii[i+1] - ii[i];
2970:     const PetscInt    *icols = jj + ii[i];
2971:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2972:     if (!roworiented) {
2973:       MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2974:     } else {
2975:       PetscInt j;
2976:       for (j=0; j<ncols; j++) {
2977:         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2978:         MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
2979:       }
2980:     }
2981:   }
2982:   if (!V) { PetscFree(values); }
2983:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2984:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2985:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2986:   return(0);
2987: }

2989: /*MC
2990:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2991:    block sparse compressed row format.

2993:    Options Database Keys:
2994: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

2996:   Level: beginner

2998: .seealso: MatCreateSeqBAIJ()
2999: M*/

3001: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);

3005: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3006: {
3008:   PetscMPIInt    size;
3009:   Mat_SeqBAIJ    *b;

3012:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3013:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3015:   PetscNewLog(B,&b);
3016:   B->data = (void*)b;
3017:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

3019:   b->row          = 0;
3020:   b->col          = 0;
3021:   b->icol         = 0;
3022:   b->reallocs     = 0;
3023:   b->saved_values = 0;

3025:   b->roworiented        = PETSC_TRUE;
3026:   b->nonew              = 0;
3027:   b->diag               = 0;
3028:   B->spptr              = 0;
3029:   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3030:   b->keepnonzeropattern = PETSC_FALSE;

3032:   PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3033:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3034:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3035:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3036:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3037:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3038:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3039:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3040:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);
3041:   PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3042:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3043:   return(0);
3044: }

3048: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3049: {
3050:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3052:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3055:   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");

3057:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3058:     c->imax           = a->imax;
3059:     c->ilen           = a->ilen;
3060:     c->free_imax_ilen = PETSC_FALSE;
3061:   } else {
3062:     PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3063:     PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3064:     for (i=0; i<mbs; i++) {
3065:       c->imax[i] = a->imax[i];
3066:       c->ilen[i] = a->ilen[i];
3067:     }
3068:     c->free_imax_ilen = PETSC_TRUE;
3069:   }

3071:   /* allocate the matrix space */
3072:   if (mallocmatspace) {
3073:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3074:       PetscCalloc1(bs2*nz,&c->a);
3075:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));

3077:       c->i            = a->i;
3078:       c->j            = a->j;
3079:       c->singlemalloc = PETSC_FALSE;
3080:       c->free_a       = PETSC_TRUE;
3081:       c->free_ij      = PETSC_FALSE;
3082:       c->parent       = A;
3083:       C->preallocated = PETSC_TRUE;
3084:       C->assembled    = PETSC_TRUE;

3086:       PetscObjectReference((PetscObject)A);
3087:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3088:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3089:     } else {
3090:       PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3091:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));

3093:       c->singlemalloc = PETSC_TRUE;
3094:       c->free_a       = PETSC_TRUE;
3095:       c->free_ij      = PETSC_TRUE;

3097:       PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3098:       if (mbs > 0) {
3099:         PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3100:         if (cpvalues == MAT_COPY_VALUES) {
3101:           PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3102:         } else {
3103:           PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3104:         }
3105:       }
3106:       C->preallocated = PETSC_TRUE;
3107:       C->assembled    = PETSC_TRUE;
3108:     }
3109:   }

3111:   c->roworiented = a->roworiented;
3112:   c->nonew       = a->nonew;

3114:   PetscLayoutReference(A->rmap,&C->rmap);
3115:   PetscLayoutReference(A->cmap,&C->cmap);

3117:   c->bs2         = a->bs2;
3118:   c->mbs         = a->mbs;
3119:   c->nbs         = a->nbs;

3121:   if (a->diag) {
3122:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3123:       c->diag      = a->diag;
3124:       c->free_diag = PETSC_FALSE;
3125:     } else {
3126:       PetscMalloc1(mbs+1,&c->diag);
3127:       PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3128:       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3129:       c->free_diag = PETSC_TRUE;
3130:     }
3131:   } else c->diag = 0;

3133:   c->nz         = a->nz;
3134:   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3135:   c->solve_work = NULL;
3136:   c->mult_work  = NULL;
3137:   c->sor_workt  = NULL;
3138:   c->sor_work   = NULL;

3140:   c->compressedrow.use   = a->compressedrow.use;
3141:   c->compressedrow.nrows = a->compressedrow.nrows;
3142:   if (a->compressedrow.use) {
3143:     i    = a->compressedrow.nrows;
3144:     PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3145:     PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3146:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3147:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3148:   } else {
3149:     c->compressedrow.use    = PETSC_FALSE;
3150:     c->compressedrow.i      = NULL;
3151:     c->compressedrow.rindex = NULL;
3152:   }
3153:   C->nonzerostate = A->nonzerostate;

3155:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3156:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3157:   return(0);
3158: }

3162: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3163: {

3167:   MatCreate(PetscObjectComm((PetscObject)A),B);
3168:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3169:   MatSetType(*B,MATSEQBAIJ);
3170:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3171:   return(0);
3172: }

3176: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3177: {
3178:   Mat_SeqBAIJ    *a;
3180:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3181:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3182:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3183:   PetscInt       *masked,nmask,tmp,bs2,ishift;
3184:   PetscMPIInt    size;
3185:   int            fd;
3186:   PetscScalar    *aa;
3187:   MPI_Comm       comm;

3190:   /* force binary viewer to load .info file if it has not yet done so */
3191:   PetscViewerSetUp(viewer);
3192:   PetscObjectGetComm((PetscObject)viewer,&comm);
3193:   PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3194:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3195:   PetscOptionsEnd();
3196:   if (bs < 0) bs = 1;
3197:   bs2  = bs*bs;

3199:   MPI_Comm_size(comm,&size);
3200:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3201:   PetscViewerBinaryGetDescriptor(viewer,&fd);
3202:   PetscBinaryRead(fd,header,4,PETSC_INT);
3203:   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3204:   M = header[1]; N = header[2]; nz = header[3];

3206:   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3207:   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");

3209:   /*
3210:      This code adds extra rows to make sure the number of rows is
3211:     divisible by the blocksize
3212:   */
3213:   mbs        = M/bs;
3214:   extra_rows = bs - M + bs*(mbs);
3215:   if (extra_rows == bs) extra_rows = 0;
3216:   else mbs++;
3217:   if (extra_rows) {
3218:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3219:   }

3221:   /* Set global sizes if not already set */
3222:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3223:     MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3224:   } else { /* Check if the matrix global sizes are correct */
3225:     MatGetSize(newmat,&rows,&cols);
3226:     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3227:       MatGetLocalSize(newmat,&rows,&cols);
3228:     }
3229:     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3230:   }

3232:   /* read in row lengths */
3233:   PetscMalloc1(M+extra_rows,&rowlengths);
3234:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3235:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

3237:   /* read in column indices */
3238:   PetscMalloc1(nz+extra_rows,&jj);
3239:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
3240:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

3242:   /* loop over row lengths determining block row lengths */
3243:   PetscCalloc1(mbs,&browlengths);
3244:   PetscMalloc2(mbs,&mask,mbs,&masked);
3245:   PetscMemzero(mask,mbs*sizeof(PetscInt));
3246:   rowcount = 0;
3247:   nzcount  = 0;
3248:   for (i=0; i<mbs; i++) {
3249:     nmask = 0;
3250:     for (j=0; j<bs; j++) {
3251:       kmax = rowlengths[rowcount];
3252:       for (k=0; k<kmax; k++) {
3253:         tmp = jj[nzcount++]/bs;
3254:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3255:       }
3256:       rowcount++;
3257:     }
3258:     browlengths[i] += nmask;
3259:     /* zero out the mask elements we set */
3260:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3261:   }

3263:   /* Do preallocation  */
3264:   MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);
3265:   a    = (Mat_SeqBAIJ*)newmat->data;

3267:   /* set matrix "i" values */
3268:   a->i[0] = 0;
3269:   for (i=1; i<= mbs; i++) {
3270:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3271:     a->ilen[i-1] = browlengths[i-1];
3272:   }
3273:   a->nz = 0;
3274:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3276:   /* read in nonzero values */
3277:   PetscMalloc1(nz+extra_rows,&aa);
3278:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3279:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3281:   /* set "a" and "j" values into matrix */
3282:   nzcount = 0; jcount = 0;
3283:   for (i=0; i<mbs; i++) {
3284:     nzcountb = nzcount;
3285:     nmask    = 0;
3286:     for (j=0; j<bs; j++) {
3287:       kmax = rowlengths[i*bs+j];
3288:       for (k=0; k<kmax; k++) {
3289:         tmp = jj[nzcount++]/bs;
3290:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3291:       }
3292:     }
3293:     /* sort the masked values */
3294:     PetscSortInt(nmask,masked);

3296:     /* set "j" values into matrix */
3297:     maskcount = 1;
3298:     for (j=0; j<nmask; j++) {
3299:       a->j[jcount++]  = masked[j];
3300:       mask[masked[j]] = maskcount++;
3301:     }
3302:     /* set "a" values into matrix */
3303:     ishift = bs2*a->i[i];
3304:     for (j=0; j<bs; j++) {
3305:       kmax = rowlengths[i*bs+j];
3306:       for (k=0; k<kmax; k++) {
3307:         tmp       = jj[nzcountb]/bs;
3308:         block     = mask[tmp] - 1;
3309:         point     = jj[nzcountb] - bs*tmp;
3310:         idx       = ishift + bs2*block + j + bs*point;
3311:         a->a[idx] = (MatScalar)aa[nzcountb++];
3312:       }
3313:     }
3314:     /* zero out the mask elements we set */
3315:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3316:   }
3317:   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3319:   PetscFree(rowlengths);
3320:   PetscFree(browlengths);
3321:   PetscFree(aa);
3322:   PetscFree(jj);
3323:   PetscFree2(mask,masked);

3325:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3326:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3327:   return(0);
3328: }

3332: /*@C
3333:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3334:    compressed row) format.  For good matrix assembly performance the
3335:    user should preallocate the matrix storage by setting the parameter nz
3336:    (or the array nnz).  By setting these parameters accurately, performance
3337:    during matrix assembly can be increased by more than a factor of 50.

3339:    Collective on MPI_Comm

3341:    Input Parameters:
3342: +  comm - MPI communicator, set to PETSC_COMM_SELF
3343: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3344:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3345: .  m - number of rows
3346: .  n - number of columns
3347: .  nz - number of nonzero blocks  per block row (same for all rows)
3348: -  nnz - array containing the number of nonzero blocks in the various block rows
3349:          (possibly different for each block row) or NULL

3351:    Output Parameter:
3352: .  A - the matrix

3354:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3355:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3356:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3358:    Options Database Keys:
3359: .   -mat_no_unroll - uses code that does not unroll the loops in the
3360:                      block calculations (much slower)
3361: .    -mat_block_size - size of the blocks to use

3363:    Level: intermediate

3365:    Notes:
3366:    The number of rows and columns must be divisible by blocksize.

3368:    If the nnz parameter is given then the nz parameter is ignored

3370:    A nonzero block is any block that as 1 or more nonzeros in it

3372:    The block AIJ format is fully compatible with standard Fortran 77
3373:    storage.  That is, the stored row and column indices can begin at
3374:    either one (as in Fortran) or zero.  See the users' manual for details.

3376:    Specify the preallocated storage with either nz or nnz (not both).
3377:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3378:    allocation.  See Users-Manual: ch_mat for details.
3379:    matrices.

3381: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3382: @*/
3383: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3384: {

3388:   MatCreate(comm,A);
3389:   MatSetSizes(*A,m,n,m,n);
3390:   MatSetType(*A,MATSEQBAIJ);
3391:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3392:   return(0);
3393: }

3397: /*@C
3398:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3399:    per row in the matrix. For good matrix assembly performance the
3400:    user should preallocate the matrix storage by setting the parameter nz
3401:    (or the array nnz).  By setting these parameters accurately, performance
3402:    during matrix assembly can be increased by more than a factor of 50.

3404:    Collective on MPI_Comm

3406:    Input Parameters:
3407: +  B - the matrix
3408: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3409:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3410: .  nz - number of block nonzeros per block row (same for all rows)
3411: -  nnz - array containing the number of block nonzeros in the various block rows
3412:          (possibly different for each block row) or NULL

3414:    Options Database Keys:
3415: .   -mat_no_unroll - uses code that does not unroll the loops in the
3416:                      block calculations (much slower)
3417: .   -mat_block_size - size of the blocks to use

3419:    Level: intermediate

3421:    Notes:
3422:    If the nnz parameter is given then the nz parameter is ignored

3424:    You can call MatGetInfo() to get information on how effective the preallocation was;
3425:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3426:    You can also run with the option -info and look for messages with the string
3427:    malloc in them to see if additional memory allocation was needed.

3429:    The block AIJ format is fully compatible with standard Fortran 77
3430:    storage.  That is, the stored row and column indices can begin at
3431:    either one (as in Fortran) or zero.  See the users' manual for details.

3433:    Specify the preallocated storage with either nz or nnz (not both).
3434:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3435:    allocation.  See Users-Manual: ch_mat for details.

3437: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3438: @*/
3439: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3440: {

3447:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3448:   return(0);
3449: }

3453: /*@C
3454:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3455:    (the default sequential PETSc format).

3457:    Collective on MPI_Comm

3459:    Input Parameters:
3460: +  B - the matrix
3461: .  i - the indices into j for the start of each local row (starts with zero)
3462: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3463: -  v - optional values in the matrix

3465:    Level: developer

3467:    Notes:
3468:    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3469:    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3470:    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3471:    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3472:    block column and the second index is over columns within a block.

3474: .keywords: matrix, aij, compressed row, sparse

3476: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3477: @*/
3478: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3479: {

3486:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3487:   return(0);
3488: }


3493: /*@
3494:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3496:      Collective on MPI_Comm

3498:    Input Parameters:
3499: +  comm - must be an MPI communicator of size 1
3500: .  bs - size of block
3501: .  m - number of rows
3502: .  n - number of columns
3503: .  i - row indices
3504: .  j - column indices
3505: -  a - matrix values

3507:    Output Parameter:
3508: .  mat - the matrix

3510:    Level: advanced

3512:    Notes:
3513:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3514:     once the matrix is destroyed

3516:        You cannot set new nonzero locations into this matrix, that will generate an error.

3518:        The i and j indices are 0 based

3520:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).

3522:       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3523:       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3524:       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3525:       with column-major ordering within blocks.

3527: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()

3529: @*/
3530: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3531: {
3533:   PetscInt       ii;
3534:   Mat_SeqBAIJ    *baij;

3537:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3538:   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");

3540:   MatCreate(comm,mat);
3541:   MatSetSizes(*mat,m,n,m,n);
3542:   MatSetType(*mat,MATSEQBAIJ);
3543:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3544:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3545:   PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3546:   PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));

3548:   baij->i = i;
3549:   baij->j = j;
3550:   baij->a = a;

3552:   baij->singlemalloc = PETSC_FALSE;
3553:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3554:   baij->free_a       = PETSC_FALSE;
3555:   baij->free_ij      = PETSC_FALSE;

3557:   for (ii=0; ii<m; ii++) {
3558:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3559: #if defined(PETSC_USE_DEBUG)
3560:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3561: #endif
3562:   }
3563: #if defined(PETSC_USE_DEBUG)
3564:   for (ii=0; ii<baij->i[m]; ii++) {
3565:     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3566:     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3567:   }
3568: #endif

3570:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3571:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3572:   return(0);
3573: }

3577: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3578: {

3582:   MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3583:   return(0);
3584: }