Actual source code: baij.c

petsc-3.4.5 2014-06-29
  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6: #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
  7: #include <petscblaslapack.h>
  8: #include <petsc-private/kernels/blockinvert.h>
  9: #include <petsc-private/kernels/blockmatmult.h>

 13: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
 14: {
 15:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 17:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 18:   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
 19:   PetscReal      shift = 0.0;

 22:   if (a->idiagvalid) {
 23:     if (values) *values = a->idiag;
 24:     return(0);
 25:   }
 26:   MatMarkDiagonal_SeqBAIJ(A);
 27:   diag_offset = a->diag;
 28:   if (!a->idiag) {
 29:     PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);
 30:     PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));
 31:   }
 32:   diag  = a->idiag;
 33:   mdiag = a->idiag+bs2*mbs;
 34:   if (values) *values = a->idiag;
 35:   /* factor and invert each block */
 36:   switch (bs) {
 37:   case 1:
 38:     for (i=0; i<mbs; i++) {
 39:       odiag    = v + 1*diag_offset[i];
 40:       diag[0]  = odiag[0];
 41:       mdiag[0] = odiag[0];
 42:       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
 43:       diag    += 1;
 44:       mdiag   += 1;
 45:     }
 46:     break;
 47:   case 2:
 48:     for (i=0; i<mbs; i++) {
 49:       odiag    = v + 4*diag_offset[i];
 50:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 51:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 52:       PetscKernel_A_gets_inverse_A_2(diag,shift);
 53:       diag    += 4;
 54:       mdiag   += 4;
 55:     }
 56:     break;
 57:   case 3:
 58:     for (i=0; i<mbs; i++) {
 59:       odiag    = v + 9*diag_offset[i];
 60:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 61:       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 62:       diag[8]  = odiag[8];
 63:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 64:       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 65:       mdiag[8] = odiag[8];
 66:       PetscKernel_A_gets_inverse_A_3(diag,shift);
 67:       diag    += 9;
 68:       mdiag   += 9;
 69:     }
 70:     break;
 71:   case 4:
 72:     for (i=0; i<mbs; i++) {
 73:       odiag  = v + 16*diag_offset[i];
 74:       PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 75:       PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 76:       PetscKernel_A_gets_inverse_A_4(diag,shift);
 77:       diag  += 16;
 78:       mdiag += 16;
 79:     }
 80:     break;
 81:   case 5:
 82:     for (i=0; i<mbs; i++) {
 83:       odiag  = v + 25*diag_offset[i];
 84:       PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
 85:       PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
 86:       PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);
 87:       diag  += 25;
 88:       mdiag += 25;
 89:     }
 90:     break;
 91:   case 6:
 92:     for (i=0; i<mbs; i++) {
 93:       odiag  = v + 36*diag_offset[i];
 94:       PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
 95:       PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
 96:       PetscKernel_A_gets_inverse_A_6(diag,shift);
 97:       diag  += 36;
 98:       mdiag += 36;
 99:     }
100:     break;
101:   case 7:
102:     for (i=0; i<mbs; i++) {
103:       odiag  = v + 49*diag_offset[i];
104:       PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
105:       PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
106:       PetscKernel_A_gets_inverse_A_7(diag,shift);
107:       diag  += 49;
108:       mdiag += 49;
109:     }
110:     break;
111:   default:
112:     PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);
113:     for (i=0; i<mbs; i++) {
114:       odiag  = v + bs2*diag_offset[i];
115:       PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
116:       PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
117:       PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);
118:       diag  += bs2;
119:       mdiag += bs2;
120:     }
121:     PetscFree2(v_work,v_pivots);
122:   }
123:   a->idiagvalid = PETSC_TRUE;
124:   return(0);
125: }

129: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130: {
131:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132:   PetscScalar       *x,*work,*w,*workt,*t;
133:   const MatScalar   *v,*aa = a->a, *idiag;
134:   const PetscScalar *b,*xb;
135:   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136:   PetscErrorCode    ierr;
137:   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

141:   its = its*lits;
142:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");

148:   if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}

150:   if (!m) return(0);
151:   diag  = a->diag;
152:   idiag = a->idiag;
153:   k    = PetscMax(A->rmap->n,A->cmap->n);
154:   if (!a->mult_work) {
155:     PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);
156:   }
157:   work = a->mult_work;
158:   t = work + k+1;
159:   if (!a->sor_work) {
160:     PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);
161:   }
162:   w = a->sor_work;

164:   VecGetArray(xx,&x);
165:   VecGetArrayRead(bb,&b);

167:   if (flag & SOR_ZERO_INITIAL_GUESS) {
168:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
169:       switch (bs) {
170:       case 1:
171:         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
172:         t[0] = b[0];
173:         i2     = 1;
174:         idiag += 1;
175:         for (i=1; i<m; i++) {
176:           v  = aa + ai[i];
177:           vi = aj + ai[i];
178:           nz = diag[i] - ai[i];
179:           s[0] = b[i2];
180:           for (j=0; j<nz; j++) {
181:             xw[0] = x[vi[j]];
182:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
183:           }
184:           t[i2] = s[0];
185:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
186:           x[i2]  = xw[0];
187:           idiag += 1;
188:           i2    += 1;
189:         }
190:         break;
191:       case 2:
192:         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
193:         t[0] = b[0]; t[1] = b[1];
194:         i2     = 2;
195:         idiag += 4;
196:         for (i=1; i<m; i++) {
197:           v  = aa + 4*ai[i];
198:           vi = aj + ai[i];
199:           nz = diag[i] - ai[i];
200:           s[0] = b[i2]; s[1] = b[i2+1];
201:           for (j=0; j<nz; j++) {
202:             idx = 2*vi[j];
203:             it  = 4*j;
204:             xw[0] = x[idx]; xw[1] = x[1+idx];
205:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
206:           }
207:           t[i2] = s[0]; t[i2+1] = s[1];
208:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
209:           x[i2]   = xw[0]; x[i2+1] = xw[1];
210:           idiag  += 4;
211:           i2     += 2;
212:         }
213:         break;
214:       case 3:
215:         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
216:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
217:         i2     = 3;
218:         idiag += 9;
219:         for (i=1; i<m; i++) {
220:           v  = aa + 9*ai[i];
221:           vi = aj + ai[i];
222:           nz = diag[i] - ai[i];
223:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
224:           while (nz--) {
225:             idx = 3*(*vi++);
226:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
227:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
228:             v  += 9;
229:           }
230:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
231:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
232:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
233:           idiag  += 9;
234:           i2     += 3;
235:         }
236:         break;
237:       case 4:
238:         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
239:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
240:         i2     = 4;
241:         idiag += 16;
242:         for (i=1; i<m; i++) {
243:           v  = aa + 16*ai[i];
244:           vi = aj + ai[i];
245:           nz = diag[i] - ai[i];
246:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
247:           while (nz--) {
248:             idx = 4*(*vi++);
249:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
250:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
251:             v  += 16;
252:           }
253:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
254:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
255:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
256:           idiag  += 16;
257:           i2     += 4;
258:         }
259:         break;
260:       case 5:
261:         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
262:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
263:         i2     = 5;
264:         idiag += 25;
265:         for (i=1; i<m; i++) {
266:           v  = aa + 25*ai[i];
267:           vi = aj + ai[i];
268:           nz = diag[i] - ai[i];
269:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
270:           while (nz--) {
271:             idx = 5*(*vi++);
272:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
273:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
274:             v  += 25;
275:           }
276:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
277:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
278:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
279:           idiag  += 25;
280:           i2     += 5;
281:         }
282:         break;
283:       case 6:
284:         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
285:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
286:         i2     = 6;
287:         idiag += 36;
288:         for (i=1; i<m; i++) {
289:           v  = aa + 36*ai[i];
290:           vi = aj + ai[i];
291:           nz = diag[i] - ai[i];
292:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
293:           while (nz--) {
294:             idx = 6*(*vi++);
295:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
296:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
297:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
298:             v  += 36;
299:           }
300:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
301:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
302:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
303:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
304:           idiag  += 36;
305:           i2     += 6;
306:         }
307:         break;
308:       case 7:
309:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
310:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
311:         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
312:         i2     = 7;
313:         idiag += 49;
314:         for (i=1; i<m; i++) {
315:           v  = aa + 49*ai[i];
316:           vi = aj + ai[i];
317:           nz = diag[i] - ai[i];
318:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
319:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
320:           while (nz--) {
321:             idx = 7*(*vi++);
322:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
323:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
324:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
325:             v  += 49;
326:           }
327:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
328:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
329:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
330:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
331:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
332:           idiag  += 49;
333:           i2     += 7;
334:         }
335:         break;
336:       default:
337:         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
338:         PetscMemcpy(t,b,bs*sizeof(PetscScalar));
339:         i2     = bs;
340:         idiag += bs2;
341:         for (i=1; i<m; i++) {
342:           v  = aa + bs2*ai[i];
343:           vi = aj + ai[i];
344:           nz = diag[i] - ai[i];

346:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
347:           /* copy all rows of x that are needed into contiguous space */
348:           workt = work;
349:           for (j=0; j<nz; j++) {
350:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
351:             workt += bs;
352:           }
353:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
354:           PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
355:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

357:           idiag += bs2;
358:           i2    += bs;
359:         }
360:         break;
361:       }
362:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
363:       PetscLogFlops(1.0*bs2*a->nz);
364:       xb = t;
365:     }
366:     else xb = b;
367:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368:       idiag = a->idiag+bs2*(a->mbs-1);
369:       i2 = bs * (m-1);
370:       switch (bs) {
371:       case 1:
372:         s[0]  = xb[i2];
373:         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
374:         x[i2] = xw[0];
375:         i2   -= 1;
376:         for (i=m-2; i>=0; i--) {
377:           v  = aa + (diag[i]+1);
378:           vi = aj + diag[i] + 1;
379:           nz = ai[i+1] - diag[i] - 1;
380:           s[0] = xb[i2];
381:           for (j=0; j<nz; j++) {
382:             xw[0] = x[vi[j]];
383:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
384:           }
385:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
386:           x[i2]  = xw[0];
387:           idiag -= 1;
388:           i2    -= 1;
389:         }
390:         break;
391:       case 2:
392:         s[0]  = xb[i2]; s[1] = xb[i2+1];
393:         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
394:         x[i2] = xw[0]; x[i2+1] = xw[1];
395:         i2    -= 2;
396:         idiag -= 4;
397:         for (i=m-2; i>=0; i--) {
398:           v  = aa + 4*(diag[i] + 1);
399:           vi = aj + diag[i] + 1;
400:           nz = ai[i+1] - diag[i] - 1;
401:           s[0] = xb[i2]; s[1] = xb[i2+1];
402:           for (j=0; j<nz; j++) {
403:             idx = 2*vi[j];
404:             it  = 4*j;
405:             xw[0] = x[idx]; xw[1] = x[1+idx];
406:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
407:           }
408:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
409:           x[i2]   = xw[0]; x[i2+1] = xw[1];
410:           idiag  -= 4;
411:           i2     -= 2;
412:         }
413:         break;
414:       case 3:
415:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
416:         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
417:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
418:         i2    -= 3;
419:         idiag -= 9;
420:         for (i=m-2; i>=0; i--) {
421:           v  = aa + 9*(diag[i]+1);
422:           vi = aj + diag[i] + 1;
423:           nz = ai[i+1] - diag[i] - 1;
424:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425:           while (nz--) {
426:             idx = 3*(*vi++);
427:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
428:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
429:             v  += 9;
430:           }
431:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
432:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
433:           idiag  -= 9;
434:           i2     -= 3;
435:         }
436:         break;
437:       case 4:
438:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
439:         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
440:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
441:         i2    -= 4;
442:         idiag -= 16;
443:         for (i=m-2; i>=0; i--) {
444:           v  = aa + 16*(diag[i]+1);
445:           vi = aj + diag[i] + 1;
446:           nz = ai[i+1] - diag[i] - 1;
447:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448:           while (nz--) {
449:             idx = 4*(*vi++);
450:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
451:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
452:             v  += 16;
453:           }
454:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
455:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
456:           idiag  -= 16;
457:           i2     -= 4;
458:         }
459:         break;
460:       case 5:
461:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
462:         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
463:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
464:         i2    -= 5;
465:         idiag -= 25;
466:         for (i=m-2; i>=0; i--) {
467:           v  = aa + 25*(diag[i]+1);
468:           vi = aj + diag[i] + 1;
469:           nz = ai[i+1] - diag[i] - 1;
470:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471:           while (nz--) {
472:             idx = 5*(*vi++);
473:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
474:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
475:             v  += 25;
476:           }
477:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
478:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
479:           idiag  -= 25;
480:           i2     -= 5;
481:         }
482:         break;
483:       case 6:
484:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
485:         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
486:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
487:         i2    -= 6;
488:         idiag -= 36;
489:         for (i=m-2; i>=0; i--) {
490:           v  = aa + 36*(diag[i]+1);
491:           vi = aj + diag[i] + 1;
492:           nz = ai[i+1] - diag[i] - 1;
493:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494:           while (nz--) {
495:             idx = 6*(*vi++);
496:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
497:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
498:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
499:             v  += 36;
500:           }
501:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503:           idiag  -= 36;
504:           i2     -= 6;
505:         }
506:         break;
507:       case 7:
508:         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
509:         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
510:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
511:         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
512:         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
513:         i2    -= 7;
514:         idiag -= 49;
515:         for (i=m-2; i>=0; i--) {
516:           v  = aa + 49*(diag[i]+1);
517:           vi = aj + diag[i] + 1;
518:           nz = ai[i+1] - diag[i] - 1;
519:           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
520:           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521:           while (nz--) {
522:             idx = 7*(*vi++);
523:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
524:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
525:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
526:             v  += 49;
527:           }
528:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
529:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
530:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
531:           idiag  -= 49;
532:           i2     -= 7;
533:         }
534:         break;
535:       default:
536:         PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
537:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
538:         i2    -= bs;
539:         idiag -= bs2;
540:         for (i=m-2; i>=0; i--) {
541:           v  = aa + bs2*(diag[i]+1);
542:           vi = aj + diag[i] + 1;
543:           nz = ai[i+1] - diag[i] - 1;

545:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
546:           /* copy all rows of x that are needed into contiguous space */
547:           workt = work;
548:           for (j=0; j<nz; j++) {
549:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
550:             workt += bs;
551:           }
552:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
553:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

555:           idiag -= bs2;
556:           i2    -= bs;
557:         }
558:         break;
559:       }
560:       PetscLogFlops(1.0*bs2*(a->nz));
561:     }
562:     its--;
563:   }
564:   while (its--) {
565:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
566:       idiag = a->idiag;
567:       i2 = 0;
568:       switch (bs) {
569:       case 1:
570:         for (i=0; i<m; i++) {
571:           v  = aa + ai[i];
572:           vi = aj + ai[i];
573:           nz = ai[i+1] - ai[i];
574:           s[0] = b[i2];
575:           for (j=0; j<nz; j++) {
576:             xw[0] = x[vi[j]];
577:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
578:           }
579:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
580:           x[i2] += xw[0];
581:           idiag += 1;
582:           i2    += 1;
583:         }
584:         break;
585:       case 2:
586:         for (i=0; i<m; i++) {
587:           v  = aa + 4*ai[i];
588:           vi = aj + ai[i];
589:           nz = ai[i+1] - ai[i];
590:           s[0] = b[i2]; s[1] = b[i2+1];
591:           for (j=0; j<nz; j++) {
592:             idx = 2*vi[j];
593:             it  = 4*j;
594:             xw[0] = x[idx]; xw[1] = x[1+idx];
595:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
596:           }
597:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
598:           x[i2]  += xw[0]; x[i2+1] += xw[1];
599:           idiag  += 4;
600:           i2     += 2;
601:         }
602:         break;
603:       case 3:
604:         for (i=0; i<m; i++) {
605:           v  = aa + 9*ai[i];
606:           vi = aj + ai[i];
607:           nz = ai[i+1] - ai[i];
608:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
609:           while (nz--) {
610:             idx = 3*(*vi++);
611:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
612:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
613:             v  += 9;
614:           }
615:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
616:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
617:           idiag  += 9;
618:           i2     += 3;
619:         }
620:         break;
621:       case 4:
622:         for (i=0; i<m; i++) {
623:           v  = aa + 16*ai[i];
624:           vi = aj + ai[i];
625:           nz = ai[i+1] - ai[i];
626:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
627:           while (nz--) {
628:             idx = 4*(*vi++);
629:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
630:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
631:             v  += 16;
632:           }
633:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
634:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
635:           idiag  += 16;
636:           i2     += 4;
637:         }
638:         break;
639:       case 5:
640:         for (i=0; i<m; i++) {
641:           v  = aa + 25*ai[i];
642:           vi = aj + ai[i];
643:           nz = ai[i+1] - ai[i];
644:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
645:           while (nz--) {
646:             idx = 5*(*vi++);
647:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
648:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
649:             v  += 25;
650:           }
651:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
652:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
653:           idiag  += 25;
654:           i2     += 5;
655:         }
656:         break;
657:       case 6:
658:         for (i=0; i<m; i++) {
659:           v  = aa + 36*ai[i];
660:           vi = aj + ai[i];
661:           nz = ai[i+1] - ai[i];
662:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
663:           while (nz--) {
664:             idx = 6*(*vi++);
665:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
666:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
667:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
668:             v  += 36;
669:           }
670:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
671:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
673:           idiag  += 36;
674:           i2     += 6;
675:         }
676:         break;
677:       case 7:
678:         for (i=0; i<m; i++) {
679:           v  = aa + 49*ai[i];
680:           vi = aj + ai[i];
681:           nz = ai[i+1] - ai[i];
682:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
683:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
684:           while (nz--) {
685:             idx = 7*(*vi++);
686:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
687:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
688:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
689:             v  += 49;
690:           }
691:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
692:           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
694:           idiag  += 49;
695:           i2     += 7;
696:         }
697:         break;
698:       default:
699:         for (i=0; i<m; i++) {
700:           v  = aa + bs2*ai[i];
701:           vi = aj + ai[i];
702:           nz = ai[i+1] - ai[i];

704:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
705:           /* copy all rows of x that are needed into contiguous space */
706:           workt = work;
707:           for (j=0; j<nz; j++) {
708:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
709:             workt += bs;
710:           }
711:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
712:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

714:           idiag += bs2;
715:           i2    += bs;
716:         }
717:         break;
718:       }
719:       PetscLogFlops(2.0*bs2*a->nz);
720:     }
721:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722:       idiag = a->idiag+bs2*(a->mbs-1);
723:       i2 = bs * (m-1);
724:       switch (bs) {
725:       case 1:
726:         for (i=m-1; i>=0; i--) {
727:           v  = aa + ai[i];
728:           vi = aj + ai[i];
729:           nz = ai[i+1] - ai[i];
730:           s[0] = b[i2];
731:           for (j=0; j<nz; j++) {
732:             xw[0] = x[vi[j]];
733:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
734:           }
735:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
736:           x[i2] += xw[0];
737:           idiag -= 1;
738:           i2    -= 1;
739:         }
740:         break;
741:       case 2:
742:         for (i=m-1; i>=0; i--) {
743:           v  = aa + 4*ai[i];
744:           vi = aj + ai[i];
745:           nz = ai[i+1] - ai[i];
746:           s[0] = b[i2]; s[1] = b[i2+1];
747:           for (j=0; j<nz; j++) {
748:             idx = 2*vi[j];
749:             it  = 4*j;
750:             xw[0] = x[idx]; xw[1] = x[1+idx];
751:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
752:           }
753:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
754:           x[i2]  += xw[0]; x[i2+1] += xw[1];
755:           idiag  -= 4;
756:           i2     -= 2;
757:         }
758:         break;
759:       case 3:
760:         for (i=m-1; i>=0; i--) {
761:           v  = aa + 9*ai[i];
762:           vi = aj + ai[i];
763:           nz = ai[i+1] - ai[i];
764:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
765:           while (nz--) {
766:             idx = 3*(*vi++);
767:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
768:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
769:             v  += 9;
770:           }
771:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
772:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
773:           idiag  -= 9;
774:           i2     -= 3;
775:         }
776:         break;
777:       case 4:
778:         for (i=m-1; i>=0; i--) {
779:           v  = aa + 16*ai[i];
780:           vi = aj + ai[i];
781:           nz = ai[i+1] - ai[i];
782:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
783:           while (nz--) {
784:             idx = 4*(*vi++);
785:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
786:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
787:             v  += 16;
788:           }
789:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
790:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
791:           idiag  -= 16;
792:           i2     -= 4;
793:         }
794:         break;
795:       case 5:
796:         for (i=m-1; i>=0; i--) {
797:           v  = aa + 25*ai[i];
798:           vi = aj + ai[i];
799:           nz = ai[i+1] - ai[i];
800:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
801:           while (nz--) {
802:             idx = 5*(*vi++);
803:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
804:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
805:             v  += 25;
806:           }
807:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
808:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
809:           idiag  -= 25;
810:           i2     -= 5;
811:         }
812:         break;
813:       case 6:
814:         for (i=m-1; i>=0; i--) {
815:           v  = aa + 36*ai[i];
816:           vi = aj + ai[i];
817:           nz = ai[i+1] - ai[i];
818:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
819:           while (nz--) {
820:             idx = 6*(*vi++);
821:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
822:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
823:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
824:             v  += 36;
825:           }
826:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
827:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
829:           idiag  -= 36;
830:           i2     -= 6;
831:         }
832:         break;
833:       case 7:
834:         for (i=m-1; i>=0; i--) {
835:           v  = aa + 49*ai[i];
836:           vi = aj + ai[i];
837:           nz = ai[i+1] - ai[i];
838:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
839:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
840:           while (nz--) {
841:             idx = 7*(*vi++);
842:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
843:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
844:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
845:             v  += 49;
846:           }
847:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
848:           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
850:           idiag  -= 49;
851:           i2     -= 7;
852:         }
853:         break;
854:       default:
855:         for (i=m-1; i>=0; i--) {
856:           v  = aa + bs2*ai[i];
857:           vi = aj + ai[i];
858:           nz = ai[i+1] - ai[i];

860:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
861:           /* copy all rows of x that are needed into contiguous space */
862:           workt = work;
863:           for (j=0; j<nz; j++) {
864:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
865:             workt += bs;
866:           }
867:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
868:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

870:           idiag -= bs2;
871:           i2    -= bs;
872:         }
873:         break;
874:       }
875:       PetscLogFlops(2.0*bs2*(a->nz));
876:     }
877:   }
878:   VecRestoreArray(xx,&x);
879:   VecRestoreArrayRead(bb,&b);
880:   return(0);
881: }


884: /*
885:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
886: */
887: #if defined(PETSC_HAVE_FORTRAN_CAPS)
888: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
889: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
890: #define matsetvaluesblocked4_ matsetvaluesblocked4
891: #endif

895: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
896: {
897:   Mat               A  = *AA;
898:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
899:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
900:   PetscInt          *ai    =a->i,*ailen=a->ilen;
901:   PetscInt          *aj    =a->j,stepval,lastcol = -1;
902:   const PetscScalar *value = v;
903:   MatScalar         *ap,*aa = a->a,*bap;

906:   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
907:   stepval = (n-1)*4;
908:   for (k=0; k<m; k++) { /* loop over added rows */
909:     row  = im[k];
910:     rp   = aj + ai[row];
911:     ap   = aa + 16*ai[row];
912:     nrow = ailen[row];
913:     low  = 0;
914:     high = nrow;
915:     for (l=0; l<n; l++) { /* loop over added columns */
916:       col = in[l];
917:       if (col <= lastcol)  low = 0;
918:       else                high = nrow;
919:       lastcol = col;
920:       value   = v + k*(stepval+4 + l)*4;
921:       while (high-low > 7) {
922:         t = (low+high)/2;
923:         if (rp[t] > col) high = t;
924:         else             low  = t;
925:       }
926:       for (i=low; i<high; i++) {
927:         if (rp[i] > col) break;
928:         if (rp[i] == col) {
929:           bap = ap +  16*i;
930:           for (ii=0; ii<4; ii++,value+=stepval) {
931:             for (jj=ii; jj<16; jj+=4) {
932:               bap[jj] += *value++;
933:             }
934:           }
935:           goto noinsert2;
936:         }
937:       }
938:       N = nrow++ - 1;
939:       high++; /* added new column index thus must search to one higher than before */
940:       /* shift up all the later entries in this row */
941:       for (ii=N; ii>=i; ii--) {
942:         rp[ii+1] = rp[ii];
943:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
944:       }
945:       if (N >= i) {
946:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
947:       }
948:       rp[i] = col;
949:       bap   = ap +  16*i;
950:       for (ii=0; ii<4; ii++,value+=stepval) {
951:         for (jj=ii; jj<16; jj+=4) {
952:           bap[jj] = *value++;
953:         }
954:       }
955:       noinsert2:;
956:       low = i;
957:     }
958:     ailen[row] = nrow;
959:   }
960:   PetscFunctionReturnVoid();
961: }

963: #if defined(PETSC_HAVE_FORTRAN_CAPS)
964: #define matsetvalues4_ MATSETVALUES4
965: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
966: #define matsetvalues4_ matsetvalues4
967: #endif

971: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
972: {
973:   Mat         A  = *AA;
974:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
975:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
976:   PetscInt    *ai=a->i,*ailen=a->ilen;
977:   PetscInt    *aj=a->j,brow,bcol;
978:   PetscInt    ridx,cidx,lastcol = -1;
979:   MatScalar   *ap,value,*aa=a->a,*bap;

982:   for (k=0; k<m; k++) { /* loop over added rows */
983:     row  = im[k]; brow = row/4;
984:     rp   = aj + ai[brow];
985:     ap   = aa + 16*ai[brow];
986:     nrow = ailen[brow];
987:     low  = 0;
988:     high = nrow;
989:     for (l=0; l<n; l++) { /* loop over added columns */
990:       col   = in[l]; bcol = col/4;
991:       ridx  = row % 4; cidx = col % 4;
992:       value = v[l + k*n];
993:       if (col <= lastcol)  low = 0;
994:       else                high = nrow;
995:       lastcol = col;
996:       while (high-low > 7) {
997:         t = (low+high)/2;
998:         if (rp[t] > bcol) high = t;
999:         else              low  = t;
1000:       }
1001:       for (i=low; i<high; i++) {
1002:         if (rp[i] > bcol) break;
1003:         if (rp[i] == bcol) {
1004:           bap   = ap +  16*i + 4*cidx + ridx;
1005:           *bap += value;
1006:           goto noinsert1;
1007:         }
1008:       }
1009:       N = nrow++ - 1;
1010:       high++; /* added new column thus must search to one higher than before */
1011:       /* shift up all the later entries in this row */
1012:       for (ii=N; ii>=i; ii--) {
1013:         rp[ii+1] = rp[ii];
1014:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015:       }
1016:       if (N>=i) {
1017:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018:       }
1019:       rp[i]                    = bcol;
1020:       ap[16*i + 4*cidx + ridx] = value;
1021: noinsert1:;
1022:       low = i;
1023:     }
1024:     ailen[brow] = nrow;
1025:   }
1026:   PetscFunctionReturnVoid();
1027: }

1029: /*
1030:      Checks for missing diagonals
1031: */
1034: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035: {
1036:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1038:   PetscInt       *diag,*jj = a->j,i;

1041:   MatMarkDiagonal_SeqBAIJ(A);
1042:   *missing = PETSC_FALSE;
1043:   if (A->rmap->n > 0 && !jj) {
1044:     *missing = PETSC_TRUE;
1045:     if (d) *d = 0;
1046:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1047:   } else {
1048:     diag = a->diag;
1049:     for (i=0; i<a->mbs; i++) {
1050:       if (jj[diag[i]] != i) {
1051:         *missing = PETSC_TRUE;
1052:         if (d) *d = i;
1053:         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1054:         break;
1055:       }
1056:     }
1057:   }
1058:   return(0);
1059: }

1063: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064: {
1065:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1067:   PetscInt       i,j,m = a->mbs;

1070:   if (!a->diag) {
1071:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
1072:     PetscLogObjectMemory(A,m*sizeof(PetscInt));
1073:     a->free_diag = PETSC_TRUE;
1074:   }
1075:   for (i=0; i<m; i++) {
1076:     a->diag[i] = a->i[i+1];
1077:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078:       if (a->j[j] == i) {
1079:         a->diag[i] = j;
1080:         break;
1081:       }
1082:     }
1083:   }
1084:   return(0);
1085: }


1090: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091: {
1092:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1094:   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095:   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;

1098:   *nn = n;
1099:   if (!ia) return(0);
1100:   if (symmetric) {
1101:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1102:     nz   = tia[n];
1103:   } else {
1104:     tia = a->i; tja = a->j;
1105:   }

1107:   if (!blockcompressed && bs > 1) {
1108:     (*nn) *= bs;
1109:     /* malloc & create the natural set of indices */
1110:     PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1111:     if (n) {
1112:       (*ia)[0] = 0;
1113:       for (j=1; j<bs; j++) {
1114:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115:       }
1116:     }

1118:     for (i=1; i<n; i++) {
1119:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120:       for (j=1; j<bs; j++) {
1121:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122:       }
1123:     }
1124:     if (n) {
1125:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126:     }

1128:     if (inja) {
1129:       PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1130:       cnt = 0;
1131:       for (i=0; i<n; i++) {
1132:         for (j=0; j<bs; j++) {
1133:           for (k=tia[i]; k<tia[i+1]; k++) {
1134:             for (l=0; l<bs; l++) {
1135:               (*ja)[cnt++] = bs*tja[k] + l;
1136:             }
1137:           }
1138:         }
1139:       }
1140:     }

1142:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143:       PetscFree(tia);
1144:       PetscFree(tja);
1145:     }
1146:   } else if (oshift == 1) {
1147:     if (symmetric) {
1148:       nz = tia[A->rmap->n/bs];
1149:       /*  add 1 to i and j indices */
1150:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151:       *ia = tia;
1152:       if (ja) {
1153:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154:         *ja = tja;
1155:       }
1156:     } else {
1157:       nz = a->i[A->rmap->n/bs];
1158:       /* malloc space and  add 1 to i and j indices */
1159:       PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);
1160:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161:       if (ja) {
1162:         PetscMalloc(nz*sizeof(PetscInt),ja);
1163:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164:       }
1165:     }
1166:   } else {
1167:     *ia = tia;
1168:     if (ja) *ja = tja;
1169:   }
1170:   return(0);
1171: }

1175: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1176: {

1180:   if (!ia) return(0);
1181:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182:     PetscFree(*ia);
1183:     if (ja) {PetscFree(*ja);}
1184:   }
1185:   return(0);
1186: }

1190: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1191: {
1192:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1196: #if defined(PETSC_USE_LOG)
1197:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1198: #endif
1199:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1200:   ISDestroy(&a->row);
1201:   ISDestroy(&a->col);
1202:   if (a->free_diag) {PetscFree(a->diag);}
1203:   PetscFree(a->idiag);
1204:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1205:   PetscFree(a->solve_work);
1206:   PetscFree(a->mult_work);
1207:   PetscFree(a->sor_work);
1208:   ISDestroy(&a->icol);
1209:   PetscFree(a->saved_values);
1210:   PetscFree(a->xtoy);
1211:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1213:   MatDestroy(&a->sbaijMat);
1214:   MatDestroy(&a->parent);
1215:   PetscFree(A->data);

1217:   PetscObjectChangeTypeName((PetscObject)A,0);
1218:   PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1219:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1220:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1221:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1222:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1223:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1224:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1225:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1226:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1227:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1228:   return(0);
1229: }

1233: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1234: {
1235:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1239:   switch (op) {
1240:   case MAT_ROW_ORIENTED:
1241:     a->roworiented = flg;
1242:     break;
1243:   case MAT_KEEP_NONZERO_PATTERN:
1244:     a->keepnonzeropattern = flg;
1245:     break;
1246:   case MAT_NEW_NONZERO_LOCATIONS:
1247:     a->nonew = (flg ? 0 : 1);
1248:     break;
1249:   case MAT_NEW_NONZERO_LOCATION_ERR:
1250:     a->nonew = (flg ? -1 : 0);
1251:     break;
1252:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1253:     a->nonew = (flg ? -2 : 0);
1254:     break;
1255:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1256:     a->nounused = (flg ? -1 : 0);
1257:     break;
1258:   case MAT_CHECK_COMPRESSED_ROW:
1259:     a->compressedrow.check = flg;
1260:     break;
1261:   case MAT_NEW_DIAGONALS:
1262:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1263:   case MAT_USE_HASH_TABLE:
1264:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1265:     break;
1266:   case MAT_SPD:
1267:   case MAT_SYMMETRIC:
1268:   case MAT_STRUCTURALLY_SYMMETRIC:
1269:   case MAT_HERMITIAN:
1270:   case MAT_SYMMETRY_ETERNAL:
1271:     /* These options are handled directly by MatSetOption() */
1272:     break;
1273:   default:
1274:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1275:   }
1276:   return(0);
1277: }

1281: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1282: {
1283:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1285:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1286:   MatScalar      *aa,*aa_i;
1287:   PetscScalar    *v_i;

1290:   bs  = A->rmap->bs;
1291:   ai  = a->i;
1292:   aj  = a->j;
1293:   aa  = a->a;
1294:   bs2 = a->bs2;

1296:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);

1298:   bn  = row/bs;   /* Block number */
1299:   bp  = row % bs; /* Block Position */
1300:   M   = ai[bn+1] - ai[bn];
1301:   *nz = bs*M;

1303:   if (v) {
1304:     *v = 0;
1305:     if (*nz) {
1306:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
1307:       for (i=0; i<M; i++) { /* for each block in the block row */
1308:         v_i  = *v + i*bs;
1309:         aa_i = aa + bs2*(ai[bn] + i);
1310:         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1311:       }
1312:     }
1313:   }

1315:   if (idx) {
1316:     *idx = 0;
1317:     if (*nz) {
1318:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
1319:       for (i=0; i<M; i++) { /* for each block in the block row */
1320:         idx_i = *idx + i*bs;
1321:         itmp  = bs*aj[ai[bn] + i];
1322:         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1323:       }
1324:     }
1325:   }
1326:   return(0);
1327: }

1331: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332: {

1336:   if (idx) {PetscFree(*idx);}
1337:   if (v)   {PetscFree(*v);}
1338:   return(0);
1339: }

1341: extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);

1345: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1346: {
1347:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1348:   Mat            C;
1350:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1351:   PetscInt       *rows,*cols,bs2=a->bs2;
1352:   MatScalar      *array;

1355:   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1356:   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1357:     PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1358:     PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1360:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1361:     MatCreate(PetscObjectComm((PetscObject)A),&C);
1362:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1363:     MatSetType(C,((PetscObject)A)->type_name);
1364:     MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);
1365:     PetscFree(col);
1366:   } else {
1367:     C = *B;
1368:   }

1370:   array = a->a;
1371:   PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);
1372:   for (i=0; i<mbs; i++) {
1373:     cols[0] = i*bs;
1374:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1375:     len = ai[i+1] - ai[i];
1376:     for (j=0; j<len; j++) {
1377:       rows[0] = (*aj++)*bs;
1378:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1379:       MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1380:       array += bs2;
1381:     }
1382:   }
1383:   PetscFree2(rows,cols);

1385:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1386:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1388:   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1389:     *B = C;
1390:   } else {
1391:     MatHeaderMerge(A,C);
1392:   }
1393:   return(0);
1394: }

1398: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1399: {
1401:   Mat            Btrans;

1404:   *f   = PETSC_FALSE;
1405:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1406:   MatEqual_SeqBAIJ(B,Btrans,f);
1407:   MatDestroy(&Btrans);
1408:   return(0);
1409: }

1413: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1414: {
1415:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1417:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1418:   int            fd;
1419:   PetscScalar    *aa;
1420:   FILE           *file;

1423:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1424:   PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1425:   col_lens[0] = MAT_FILE_CLASSID;

1427:   col_lens[1] = A->rmap->N;
1428:   col_lens[2] = A->cmap->n;
1429:   col_lens[3] = a->nz*bs2;

1431:   /* store lengths of each row and write (including header) to file */
1432:   count = 0;
1433:   for (i=0; i<a->mbs; i++) {
1434:     for (j=0; j<bs; j++) {
1435:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1436:     }
1437:   }
1438:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1439:   PetscFree(col_lens);

1441:   /* store column indices (zero start index) */
1442:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1443:   count = 0;
1444:   for (i=0; i<a->mbs; i++) {
1445:     for (j=0; j<bs; j++) {
1446:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1447:         for (l=0; l<bs; l++) {
1448:           jj[count++] = bs*a->j[k] + l;
1449:         }
1450:       }
1451:     }
1452:   }
1453:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1454:   PetscFree(jj);

1456:   /* store nonzero values */
1457:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1458:   count = 0;
1459:   for (i=0; i<a->mbs; i++) {
1460:     for (j=0; j<bs; j++) {
1461:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1462:         for (l=0; l<bs; l++) {
1463:           aa[count++] = a->a[bs2*k + l*bs + j];
1464:         }
1465:       }
1466:     }
1467:   }
1468:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1469:   PetscFree(aa);

1471:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1472:   if (file) {
1473:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1474:   }
1475:   return(0);
1476: }

1480: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1481: {
1482:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1483:   PetscErrorCode    ierr;
1484:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1485:   PetscViewerFormat format;

1488:   PetscViewerGetFormat(viewer,&format);
1489:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1490:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1491:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1492:     Mat aij;
1493:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1494:     MatView(aij,viewer);
1495:     MatDestroy(&aij);
1496:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1497:       return(0);
1498:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1499:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1500:     PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1501:     for (i=0; i<a->mbs; i++) {
1502:       for (j=0; j<bs; j++) {
1503:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1504:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1505:           for (l=0; l<bs; l++) {
1506: #if defined(PETSC_USE_COMPLEX)
1507:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1508:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1509:                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1510:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1511:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1512:                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1513:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1514:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1515:             }
1516: #else
1517:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1518:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1519:             }
1520: #endif
1521:           }
1522:         }
1523:         PetscViewerASCIIPrintf(viewer,"\n");
1524:       }
1525:     }
1526:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1527:   } else {
1528:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1529:     PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1530:     for (i=0; i<a->mbs; i++) {
1531:       for (j=0; j<bs; j++) {
1532:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1533:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1534:           for (l=0; l<bs; l++) {
1535: #if defined(PETSC_USE_COMPLEX)
1536:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1537:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1538:                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1539:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1540:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1541:                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1542:             } else {
1543:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1544:             }
1545: #else
1546:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1547: #endif
1548:           }
1549:         }
1550:         PetscViewerASCIIPrintf(viewer,"\n");
1551:       }
1552:     }
1553:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1554:   }
1555:   PetscViewerFlush(viewer);
1556:   return(0);
1557: }

1559: #include <petscdraw.h>
1562: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1563: {
1564:   Mat               A = (Mat) Aa;
1565:   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1566:   PetscErrorCode    ierr;
1567:   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1568:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1569:   MatScalar         *aa;
1570:   PetscViewer       viewer;
1571:   PetscViewerFormat format;

1574:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1575:   PetscViewerGetFormat(viewer,&format);

1577:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1579:   /* loop over matrix elements drawing boxes */

1581:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1582:     color = PETSC_DRAW_BLUE;
1583:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1584:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1585:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1586:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1587:         aa  = a->a + j*bs2;
1588:         for (k=0; k<bs; k++) {
1589:           for (l=0; l<bs; l++) {
1590:             if (PetscRealPart(*aa++) >=  0.) continue;
1591:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1592:           }
1593:         }
1594:       }
1595:     }
1596:     color = PETSC_DRAW_CYAN;
1597:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1598:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1599:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1600:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1601:         aa  = a->a + j*bs2;
1602:         for (k=0; k<bs; k++) {
1603:           for (l=0; l<bs; l++) {
1604:             if (PetscRealPart(*aa++) != 0.) continue;
1605:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1606:           }
1607:         }
1608:       }
1609:     }
1610:     color = PETSC_DRAW_RED;
1611:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615:         aa  = a->a + j*bs2;
1616:         for (k=0; k<bs; k++) {
1617:           for (l=0; l<bs; l++) {
1618:             if (PetscRealPart(*aa++) <= 0.) continue;
1619:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1620:           }
1621:         }
1622:       }
1623:     }
1624:   } else {
1625:     /* use contour shading to indicate magnitude of values */
1626:     /* first determine max of all nonzero values */
1627:     PetscDraw popup;
1628:     PetscReal scale,maxv = 0.0;

1630:     for (i=0; i<a->nz*a->bs2; i++) {
1631:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1632:     }
1633:     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1634:     PetscDrawGetPopup(draw,&popup);
1635:     if (popup) {
1636:       PetscDrawScalePopup(popup,0.0,maxv);
1637:     }
1638:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1639:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1640:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642:         aa  = a->a + j*bs2;
1643:         for (k=0; k<bs; k++) {
1644:           for (l=0; l<bs; l++) {
1645:             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1646:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1647:           }
1648:         }
1649:       }
1650:     }
1651:   }
1652:   return(0);
1653: }

1657: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1658: {
1660:   PetscReal      xl,yl,xr,yr,w,h;
1661:   PetscDraw      draw;
1662:   PetscBool      isnull;

1665:   PetscViewerDrawGetDraw(viewer,0,&draw);
1666:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1668:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1669:   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1670:   xr  += w;    yr += h;  xl = -w;     yl = -h;
1671:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1672:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1673:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1674:   return(0);
1675: }

1679: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1680: {
1682:   PetscBool      iascii,isbinary,isdraw;

1685:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1686:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1687:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1688:   if (iascii) {
1689:     MatView_SeqBAIJ_ASCII(A,viewer);
1690:   } else if (isbinary) {
1691:     MatView_SeqBAIJ_Binary(A,viewer);
1692:   } else if (isdraw) {
1693:     MatView_SeqBAIJ_Draw(A,viewer);
1694:   } else {
1695:     Mat B;
1696:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1697:     MatView(B,viewer);
1698:     MatDestroy(&B);
1699:   }
1700:   return(0);
1701: }


1706: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1707: {
1708:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1709:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1710:   PetscInt    *ai = a->i,*ailen = a->ilen;
1711:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1712:   MatScalar   *ap,*aa = a->a;

1715:   for (k=0; k<m; k++) { /* loop over rows */
1716:     row = im[k]; brow = row/bs;
1717:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1718:     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1719:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1720:     nrow = ailen[brow];
1721:     for (l=0; l<n; l++) { /* loop over columns */
1722:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1723:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1724:       col  = in[l];
1725:       bcol = col/bs;
1726:       cidx = col%bs;
1727:       ridx = row%bs;
1728:       high = nrow;
1729:       low  = 0; /* assume unsorted */
1730:       while (high-low > 5) {
1731:         t = (low+high)/2;
1732:         if (rp[t] > bcol) high = t;
1733:         else             low  = t;
1734:       }
1735:       for (i=low; i<high; i++) {
1736:         if (rp[i] > bcol) break;
1737:         if (rp[i] == bcol) {
1738:           *v++ = ap[bs2*i+bs*cidx+ridx];
1739:           goto finished;
1740:         }
1741:       }
1742:       *v++ = 0.0;
1743: finished:;
1744:     }
1745:   }
1746:   return(0);
1747: }

1751: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1752: {
1753:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1754:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1755:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1756:   PetscErrorCode    ierr;
1757:   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1758:   PetscBool         roworiented=a->roworiented;
1759:   const PetscScalar *value     = v;
1760:   MatScalar         *ap,*aa = a->a,*bap;

1763:   if (roworiented) {
1764:     stepval = (n-1)*bs;
1765:   } else {
1766:     stepval = (m-1)*bs;
1767:   }
1768:   for (k=0; k<m; k++) { /* loop over added rows */
1769:     row = im[k];
1770:     if (row < 0) continue;
1771: #if defined(PETSC_USE_DEBUG)
1772:     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1773: #endif
1774:     rp   = aj + ai[row];
1775:     ap   = aa + bs2*ai[row];
1776:     rmax = imax[row];
1777:     nrow = ailen[row];
1778:     low  = 0;
1779:     high = nrow;
1780:     for (l=0; l<n; l++) { /* loop over added columns */
1781:       if (in[l] < 0) continue;
1782: #if defined(PETSC_USE_DEBUG)
1783:       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1784: #endif
1785:       col = in[l];
1786:       if (roworiented) {
1787:         value = v + (k*(stepval+bs) + l)*bs;
1788:       } else {
1789:         value = v + (l*(stepval+bs) + k)*bs;
1790:       }
1791:       if (col <= lastcol) low = 0;
1792:       else high = nrow;
1793:       lastcol = col;
1794:       while (high-low > 7) {
1795:         t = (low+high)/2;
1796:         if (rp[t] > col) high = t;
1797:         else             low  = t;
1798:       }
1799:       for (i=low; i<high; i++) {
1800:         if (rp[i] > col) break;
1801:         if (rp[i] == col) {
1802:           bap = ap +  bs2*i;
1803:           if (roworiented) {
1804:             if (is == ADD_VALUES) {
1805:               for (ii=0; ii<bs; ii++,value+=stepval) {
1806:                 for (jj=ii; jj<bs2; jj+=bs) {
1807:                   bap[jj] += *value++;
1808:                 }
1809:               }
1810:             } else {
1811:               for (ii=0; ii<bs; ii++,value+=stepval) {
1812:                 for (jj=ii; jj<bs2; jj+=bs) {
1813:                   bap[jj] = *value++;
1814:                 }
1815:               }
1816:             }
1817:           } else {
1818:             if (is == ADD_VALUES) {
1819:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1820:                 for (jj=0; jj<bs; jj++) {
1821:                   bap[jj] += value[jj];
1822:                 }
1823:                 bap += bs;
1824:               }
1825:             } else {
1826:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1827:                 for (jj=0; jj<bs; jj++) {
1828:                   bap[jj]  = value[jj];
1829:                 }
1830:                 bap += bs;
1831:               }
1832:             }
1833:           }
1834:           goto noinsert2;
1835:         }
1836:       }
1837:       if (nonew == 1) goto noinsert2;
1838:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1839:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1840:       N = nrow++ - 1; high++;
1841:       /* shift up all the later entries in this row */
1842:       for (ii=N; ii>=i; ii--) {
1843:         rp[ii+1] = rp[ii];
1844:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1845:       }
1846:       if (N >= i) {
1847:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1848:       }
1849:       rp[i] = col;
1850:       bap   = ap +  bs2*i;
1851:       if (roworiented) {
1852:         for (ii=0; ii<bs; ii++,value+=stepval) {
1853:           for (jj=ii; jj<bs2; jj+=bs) {
1854:             bap[jj] = *value++;
1855:           }
1856:         }
1857:       } else {
1858:         for (ii=0; ii<bs; ii++,value+=stepval) {
1859:           for (jj=0; jj<bs; jj++) {
1860:             *bap++ = *value++;
1861:           }
1862:         }
1863:       }
1864: noinsert2:;
1865:       low = i;
1866:     }
1867:     ailen[row] = nrow;
1868:   }
1869:   return(0);
1870: }

1874: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1875: {
1876:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1877:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1878:   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1880:   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1881:   MatScalar      *aa  = a->a,*ap;
1882:   PetscReal      ratio=0.6;

1885:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1887:   if (m) rmax = ailen[0];
1888:   for (i=1; i<mbs; i++) {
1889:     /* move each row back by the amount of empty slots (fshift) before it*/
1890:     fshift += imax[i-1] - ailen[i-1];
1891:     rmax    = PetscMax(rmax,ailen[i]);
1892:     if (fshift) {
1893:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1894:       N  = ailen[i];
1895:       for (j=0; j<N; j++) {
1896:         ip[j-fshift] = ip[j];

1898:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1899:       }
1900:     }
1901:     ai[i] = ai[i-1] + ailen[i-1];
1902:   }
1903:   if (mbs) {
1904:     fshift += imax[mbs-1] - ailen[mbs-1];
1905:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1906:   }
1907:   /* reset ilen and imax for each row */
1908:   for (i=0; i<mbs; i++) {
1909:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1910:   }
1911:   a->nz = ai[mbs];

1913:   /* diagonals may have moved, so kill the diagonal pointers */
1914:   a->idiagvalid = PETSC_FALSE;
1915:   if (fshift && a->diag) {
1916:     PetscFree(a->diag);
1917:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1918:     a->diag = 0;
1919:   }
1920:   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1921:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1922:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1923:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);

1925:   A->info.mallocs    += a->reallocs;
1926:   a->reallocs         = 0;
1927:   A->info.nz_unneeded = (PetscReal)fshift*bs2;

1929:   MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);

1931:   A->same_nonzero = PETSC_TRUE;
1932:   return(0);
1933: }

1935: /*
1936:    This function returns an array of flags which indicate the locations of contiguous
1937:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1938:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1939:    Assume: sizes should be long enough to hold all the values.
1940: */
1943: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1944: {
1945:   PetscInt  i,j,k,row;
1946:   PetscBool flg;

1949:   for (i=0,j=0; i<n; j++) {
1950:     row = idx[i];
1951:     if (row%bs!=0) { /* Not the begining of a block */
1952:       sizes[j] = 1;
1953:       i++;
1954:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1955:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1956:       i++;
1957:     } else { /* Begining of the block, so check if the complete block exists */
1958:       flg = PETSC_TRUE;
1959:       for (k=1; k<bs; k++) {
1960:         if (row+k != idx[i+k]) { /* break in the block */
1961:           flg = PETSC_FALSE;
1962:           break;
1963:         }
1964:       }
1965:       if (flg) { /* No break in the bs */
1966:         sizes[j] = bs;
1967:         i       += bs;
1968:       } else {
1969:         sizes[j] = 1;
1970:         i++;
1971:       }
1972:     }
1973:   }
1974:   *bs_max = j;
1975:   return(0);
1976: }

1980: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1981: {
1982:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1983:   PetscErrorCode    ierr;
1984:   PetscInt          i,j,k,count,*rows;
1985:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1986:   PetscScalar       zero = 0.0;
1987:   MatScalar         *aa;
1988:   const PetscScalar *xx;
1989:   PetscScalar       *bb;

1992:   /* fix right hand side if needed */
1993:   if (x && b) {
1994:     VecGetArrayRead(x,&xx);
1995:     VecGetArray(b,&bb);
1996:     for (i=0; i<is_n; i++) {
1997:       bb[is_idx[i]] = diag*xx[is_idx[i]];
1998:     }
1999:     VecRestoreArrayRead(x,&xx);
2000:     VecRestoreArray(b,&bb);
2001:   }

2003:   /* Make a copy of the IS and  sort it */
2004:   /* allocate memory for rows,sizes */
2005:   PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);

2007:   /* copy IS values to rows, and sort them */
2008:   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2009:   PetscSortInt(is_n,rows);

2011:   if (baij->keepnonzeropattern) {
2012:     for (i=0; i<is_n; i++) sizes[i] = 1;
2013:     bs_max          = is_n;
2014:     A->same_nonzero = PETSC_TRUE;
2015:   } else {
2016:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);

2018:     A->same_nonzero = PETSC_FALSE;
2019:   }

2021:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2022:     row = rows[j];
2023:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2024:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2025:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2026:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2027:       if (diag != (PetscScalar)0.0) {
2028:         if (baij->ilen[row/bs] > 0) {
2029:           baij->ilen[row/bs]       = 1;
2030:           baij->j[baij->i[row/bs]] = row/bs;

2032:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2033:         }
2034:         /* Now insert all the diagonal values for this bs */
2035:         for (k=0; k<bs; k++) {
2036:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2037:         }
2038:       } else { /* (diag == 0.0) */
2039:         baij->ilen[row/bs] = 0;
2040:       } /* end (diag == 0.0) */
2041:     } else { /* (sizes[i] != bs) */
2042: #if defined(PETSC_USE_DEBUG)
2043:       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2044: #endif
2045:       for (k=0; k<count; k++) {
2046:         aa[0] =  zero;
2047:         aa   += bs;
2048:       }
2049:       if (diag != (PetscScalar)0.0) {
2050:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2051:       }
2052:     }
2053:   }

2055:   PetscFree2(rows,sizes);
2056:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2057:   return(0);
2058: }

2062: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2063: {
2064:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2065:   PetscErrorCode    ierr;
2066:   PetscInt          i,j,k,count;
2067:   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2068:   PetscScalar       zero = 0.0;
2069:   MatScalar         *aa;
2070:   const PetscScalar *xx;
2071:   PetscScalar       *bb;
2072:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2075:   /* fix right hand side if needed */
2076:   if (x && b) {
2077:     VecGetArrayRead(x,&xx);
2078:     VecGetArray(b,&bb);
2079:     vecs = PETSC_TRUE;
2080:   }
2081:   A->same_nonzero = PETSC_TRUE;

2083:   /* zero the columns */
2084:   PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);
2085:   PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));
2086:   for (i=0; i<is_n; i++) {
2087:     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2088:     zeroed[is_idx[i]] = PETSC_TRUE;
2089:   }
2090:   for (i=0; i<A->rmap->N; i++) {
2091:     if (!zeroed[i]) {
2092:       row = i/bs;
2093:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2094:         for (k=0; k<bs; k++) {
2095:           col = bs*baij->j[j] + k;
2096:           if (zeroed[col]) {
2097:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2098:             if (vecs) bb[i] -= aa[0]*xx[col];
2099:             aa[0] = 0.0;
2100:           }
2101:         }
2102:       }
2103:     } else if (vecs) bb[i] = diag*xx[i];
2104:   }
2105:   PetscFree(zeroed);
2106:   if (vecs) {
2107:     VecRestoreArrayRead(x,&xx);
2108:     VecRestoreArray(b,&bb);
2109:   }

2111:   /* zero the rows */
2112:   for (i=0; i<is_n; i++) {
2113:     row   = is_idx[i];
2114:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2115:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2116:     for (k=0; k<count; k++) {
2117:       aa[0] =  zero;
2118:       aa   += bs;
2119:     }
2120:     if (diag != (PetscScalar)0.0) {
2121:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2122:     }
2123:   }
2124:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2125:   return(0);
2126: }

2130: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2131: {
2132:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2133:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2134:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2135:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2137:   PetscInt       ridx,cidx,bs2=a->bs2;
2138:   PetscBool      roworiented=a->roworiented;
2139:   MatScalar      *ap,value,*aa=a->a,*bap;

2143:   for (k=0; k<m; k++) { /* loop over added rows */
2144:     row  = im[k];
2145:     brow = row/bs;
2146:     if (row < 0) continue;
2147: #if defined(PETSC_USE_DEBUG)
2148:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2149: #endif
2150:     rp   = aj + ai[brow];
2151:     ap   = aa + bs2*ai[brow];
2152:     rmax = imax[brow];
2153:     nrow = ailen[brow];
2154:     low  = 0;
2155:     high = nrow;
2156:     for (l=0; l<n; l++) { /* loop over added columns */
2157:       if (in[l] < 0) continue;
2158: #if defined(PETSC_USE_DEBUG)
2159:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2160: #endif
2161:       col  = in[l]; bcol = col/bs;
2162:       ridx = row % bs; cidx = col % bs;
2163:       if (roworiented) {
2164:         value = v[l + k*n];
2165:       } else {
2166:         value = v[k + l*m];
2167:       }
2168:       if (col <= lastcol) low = 0; else high = nrow;
2169:       lastcol = col;
2170:       while (high-low > 7) {
2171:         t = (low+high)/2;
2172:         if (rp[t] > bcol) high = t;
2173:         else              low  = t;
2174:       }
2175:       for (i=low; i<high; i++) {
2176:         if (rp[i] > bcol) break;
2177:         if (rp[i] == bcol) {
2178:           bap = ap +  bs2*i + bs*cidx + ridx;
2179:           if (is == ADD_VALUES) *bap += value;
2180:           else                  *bap  = value;
2181:           goto noinsert1;
2182:         }
2183:       }
2184:       if (nonew == 1) goto noinsert1;
2185:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2186:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2187:       N = nrow++ - 1; high++;
2188:       /* shift up all the later entries in this row */
2189:       for (ii=N; ii>=i; ii--) {
2190:         rp[ii+1] = rp[ii];
2191:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2192:       }
2193:       if (N>=i) {
2194:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2195:       }
2196:       rp[i]                      = bcol;
2197:       ap[bs2*i + bs*cidx + ridx] = value;
2198:       a->nz++;
2199: noinsert1:;
2200:       low = i;
2201:     }
2202:     ailen[brow] = nrow;
2203:   }
2204:   A->same_nonzero = PETSC_FALSE;
2205:   return(0);
2206: }

2210: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2211: {
2212:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2213:   Mat            outA;
2215:   PetscBool      row_identity,col_identity;

2218:   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2219:   ISIdentity(row,&row_identity);
2220:   ISIdentity(col,&col_identity);
2221:   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");

2223:   outA            = inA;
2224:   inA->factortype = MAT_FACTOR_LU;

2226:   MatMarkDiagonal_SeqBAIJ(inA);

2228:   PetscObjectReference((PetscObject)row);
2229:   ISDestroy(&a->row);
2230:   a->row = row;
2231:   PetscObjectReference((PetscObject)col);
2232:   ISDestroy(&a->col);
2233:   a->col = col;

2235:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2236:   ISDestroy(&a->icol);
2237:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2238:   PetscLogObjectParent(inA,a->icol);

2240:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2241:   if (!a->solve_work) {
2242:     PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2243:     PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2244:   }
2245:   MatLUFactorNumeric(outA,inA,info);
2246:   return(0);
2247: }

2251: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2252: {
2253:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2254:   PetscInt    i,nz,mbs;

2257:   nz  = baij->maxnz;
2258:   mbs = baij->mbs;
2259:   for (i=0; i<nz; i++) {
2260:     baij->j[i] = indices[i];
2261:   }
2262:   baij->nz = nz;
2263:   for (i=0; i<mbs; i++) {
2264:     baij->ilen[i] = baij->imax[i];
2265:   }
2266:   return(0);
2267: }

2271: /*@
2272:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2273:        in the matrix.

2275:   Input Parameters:
2276: +  mat - the SeqBAIJ matrix
2277: -  indices - the column indices

2279:   Level: advanced

2281:   Notes:
2282:     This can be called if you have precomputed the nonzero structure of the
2283:   matrix and want to provide it to the matrix object to improve the performance
2284:   of the MatSetValues() operation.

2286:     You MUST have set the correct numbers of nonzeros per row in the call to
2287:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2289:     MUST be called before any calls to MatSetValues();

2291: @*/
2292: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2293: {

2299:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2300:   return(0);
2301: }

2305: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2306: {
2307:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2309:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2310:   PetscReal      atmp;
2311:   PetscScalar    *x,zero = 0.0;
2312:   MatScalar      *aa;
2313:   PetscInt       ncols,brow,krow,kcol;

2316:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2317:   bs  = A->rmap->bs;
2318:   aa  = a->a;
2319:   ai  = a->i;
2320:   aj  = a->j;
2321:   mbs = a->mbs;

2323:   VecSet(v,zero);
2324:   VecGetArray(v,&x);
2325:   VecGetLocalSize(v,&n);
2326:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2327:   for (i=0; i<mbs; i++) {
2328:     ncols = ai[1] - ai[0]; ai++;
2329:     brow  = bs*i;
2330:     for (j=0; j<ncols; j++) {
2331:       for (kcol=0; kcol<bs; kcol++) {
2332:         for (krow=0; krow<bs; krow++) {
2333:           atmp = PetscAbsScalar(*aa);aa++;
2334:           row  = brow + krow;   /* row index */
2335:           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2336:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2337:         }
2338:       }
2339:       aj++;
2340:     }
2341:   }
2342:   VecRestoreArray(v,&x);
2343:   return(0);
2344: }

2348: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2349: {

2353:   /* If the two matrices have the same copy implementation, use fast copy. */
2354:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2355:     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2356:     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2357:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2359:     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2360:     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2361:     PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2362:   } else {
2363:     MatCopy_Basic(A,B,str);
2364:   }
2365:   return(0);
2366: }

2370: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2371: {

2375:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);
2376:   return(0);
2377: }

2381: PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2382: {
2383:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

2386:   *array = a->a;
2387:   return(0);
2388: }

2392: PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2393: {
2395:   return(0);
2396: }

2400: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2401: {
2402:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2404:   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2405:   PetscBLASInt   one=1;

2408:   if (str == SAME_NONZERO_PATTERN) {
2409:     PetscScalar  alpha = a;
2410:     PetscBLASInt bnz;
2411:     PetscBLASIntCast(x->nz*bs2,&bnz);
2412:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2413:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2414:     if (y->xtoy && y->XtoY != X) {
2415:       PetscFree(y->xtoy);
2416:       MatDestroy(&y->XtoY);
2417:     }
2418:     if (!y->xtoy) { /* get xtoy */
2419:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);
2420:       y->XtoY = X;
2421:       PetscObjectReference((PetscObject)X);
2422:     }
2423:     for (i=0; i<x->nz; i++) {
2424:       j = 0;
2425:       while (j < bs2) {
2426:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2427:         j++;
2428:       }
2429:     }
2430:     PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2431:   } else {
2432:     MatAXPY_Basic(Y,a,X,str);
2433:   }
2434:   return(0);
2435: }

2439: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2440: {
2441:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2442:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2443:   MatScalar   *aa = a->a;

2446:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2447:   return(0);
2448: }

2452: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2453: {
2454:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2455:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2456:   MatScalar   *aa = a->a;

2459:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2460:   return(0);
2461: }

2463: extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);

2467: /*
2468:     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2469: */
2470: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2471: {
2472:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2474:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2475:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2478:   *nn = n;
2479:   if (!ia) return(0);
2480:   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2481:   else {
2482:     PetscMalloc((n+1)*sizeof(PetscInt),&collengths);
2483:     PetscMemzero(collengths,n*sizeof(PetscInt));
2484:     PetscMalloc((n+1)*sizeof(PetscInt),&cia);
2485:     PetscMalloc((nz+1)*sizeof(PetscInt),&cja);
2486:     jj   = a->j;
2487:     for (i=0; i<nz; i++) {
2488:       collengths[jj[i]]++;
2489:     }
2490:     cia[0] = oshift;
2491:     for (i=0; i<n; i++) {
2492:       cia[i+1] = cia[i] + collengths[i];
2493:     }
2494:     PetscMemzero(collengths,n*sizeof(PetscInt));
2495:     jj   = a->j;
2496:     for (row=0; row<m; row++) {
2497:       mr = a->i[row+1] - a->i[row];
2498:       for (i=0; i<mr; i++) {
2499:         col = *jj++;

2501:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2502:       }
2503:     }
2504:     PetscFree(collengths);
2505:     *ia  = cia; *ja = cja;
2506:   }
2507:   return(0);
2508: }

2512: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2513: {

2517:   if (!ia) return(0);
2518:   PetscFree(*ia);
2519:   PetscFree(*ja);
2520:   return(0);
2521: }

2525: PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2526: {
2527:   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2529:   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2530:   PetscScalar    dx,*y,*xx,*w3_array;
2531:   PetscScalar    *vscale_array;
2532:   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2533:   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2534:   void           *fctx   = coloring->fctx;
2535:   PetscBool      flg     = PETSC_FALSE;
2536:   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2537:   Vec            x1_tmp;

2543:   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");

2545:   PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);
2546:   MatSetUnfactored(J);
2547:   PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);
2548:   if (flg) {
2549:     PetscInfo(coloring,"Not calling MatZeroEntries()\n");
2550:   } else {
2551:     PetscBool assembled;
2552:     MatAssembled(J,&assembled);
2553:     if (assembled) {
2554:       MatZeroEntries(J);
2555:     }
2556:   }

2558:   x1_tmp = x1;
2559:   if (!coloring->vscale) {
2560:     VecDuplicate(x1_tmp,&coloring->vscale);
2561:   }

2563:   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2564:     VecNorm(x1_tmp,NORM_2,&unorm);
2565:   }
2566:   VecGetOwnershipRange(w1,&start,&end); /* OwnershipRange is used by ghosted x! */

2568:   /* Set w1 = F(x1) */
2569:   if (!coloring->fset) {
2570:     PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2571:     (*f)(sctx,x1_tmp,w1,fctx);
2572:     PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2573:   } else {
2574:     coloring->fset = PETSC_FALSE;
2575:   }

2577:   if (!coloring->w3) {
2578:     VecDuplicate(x1_tmp,&coloring->w3);
2579:     PetscLogObjectParent(coloring,coloring->w3);
2580:   }
2581:   w3 = coloring->w3;

2583:   /* Compute all the local scale factors, including ghost points */
2584:   VecGetLocalSize(x1_tmp,&N);
2585:   VecGetArray(x1_tmp,&xx);
2586:   VecGetArray(coloring->vscale,&vscale_array);
2587:   if (ctype == IS_COLORING_GHOSTED) {
2588:     col_start = 0; col_end = N;
2589:   } else if (ctype == IS_COLORING_GLOBAL) {
2590:     xx           = xx - start;
2591:     vscale_array = vscale_array - start;
2592:     col_start    = start; col_end = N + start;
2593:   }
2594:   for (col=col_start; col<col_end; col++) {
2595:     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2596:     if (coloring->htype[0] == 'w') {
2597:       dx = 1.0 + unorm;
2598:     } else {
2599:       dx = xx[col];
2600:     }
2601:     if (dx == (PetscScalar)0.0) dx = 1.0;
2602:     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2603:     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2604:     dx               *= epsilon;
2605:     vscale_array[col] = (PetscScalar)1.0/dx;
2606:   }
2607:   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2608:   VecRestoreArray(coloring->vscale,&vscale_array);
2609:   if (ctype == IS_COLORING_GLOBAL) {
2610:     VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2611:     VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2612:   }
2613:   if (coloring->vscaleforrow) {
2614:     vscaleforrow = coloring->vscaleforrow;
2615:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");

2617:   PetscMalloc(bs*sizeof(PetscInt),&srows);
2618:   /*
2619:     Loop over each color
2620:   */
2621:   VecGetArray(coloring->vscale,&vscale_array);
2622:   for (k=0; k<coloring->ncolors; k++) {
2623:     coloring->currentcolor = k;
2624:     for (i=0; i<bs; i++) {
2625:       VecCopy(x1_tmp,w3);
2626:       VecGetArray(w3,&w3_array);
2627:       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2628:       /*
2629:         Loop over each column associated with color
2630:         adding the perturbation to the vector w3.
2631:       */
2632:       for (l=0; l<coloring->ncolumns[k]; l++) {
2633:         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2634:         if (coloring->htype[0] == 'w') {
2635:           dx = 1.0 + unorm;
2636:         } else {
2637:           dx = xx[col];
2638:         }
2639:         if (dx == (PetscScalar)0.0) dx = 1.0;
2640:         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2641:         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2642:         dx *= epsilon;
2643:         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2644:         w3_array[col] += dx;
2645:       }
2646:       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2647:       VecRestoreArray(w3,&w3_array);

2649:       /*
2650:         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2651:         w2 = F(x1 + dx) - F(x1)
2652:       */
2653:       PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2654:       (*f)(sctx,w3,w2,fctx);
2655:       PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2656:       VecAXPY(w2,-1.0,w1);

2658:       /*
2659:         Loop over rows of vector, putting results into Jacobian matrix
2660:       */
2661:       VecGetArray(w2,&y);
2662:       for (l=0; l<coloring->nrows[k]; l++) {
2663:         row = bs*coloring->rows[k][l];                /* local row index */
2664:         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2665:         for (j=0; j<bs; j++) {
2666:           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2667:           srows[j]  = row + start + j;
2668:         }
2669:         MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);
2670:       }
2671:       VecRestoreArray(w2,&y);
2672:     }
2673:   } /* endof for each color */
2674:   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2675:   VecRestoreArray(coloring->vscale,&vscale_array);
2676:   VecRestoreArray(x1_tmp,&xx);
2677:   PetscFree(srows);

2679:   coloring->currentcolor = -1;

2681:   MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2682:   MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2683:   PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);
2684:   return(0);
2685: }

2687: /* -------------------------------------------------------------------*/
2688: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2689:                                        MatGetRow_SeqBAIJ,
2690:                                        MatRestoreRow_SeqBAIJ,
2691:                                        MatMult_SeqBAIJ_N,
2692:                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2693:                                        MatMultTranspose_SeqBAIJ,
2694:                                        MatMultTransposeAdd_SeqBAIJ,
2695:                                        0,
2696:                                        0,
2697:                                        0,
2698:                                /* 10*/ 0,
2699:                                        MatLUFactor_SeqBAIJ,
2700:                                        0,
2701:                                        0,
2702:                                        MatTranspose_SeqBAIJ,
2703:                                /* 15*/ MatGetInfo_SeqBAIJ,
2704:                                        MatEqual_SeqBAIJ,
2705:                                        MatGetDiagonal_SeqBAIJ,
2706:                                        MatDiagonalScale_SeqBAIJ,
2707:                                        MatNorm_SeqBAIJ,
2708:                                /* 20*/ 0,
2709:                                        MatAssemblyEnd_SeqBAIJ,
2710:                                        MatSetOption_SeqBAIJ,
2711:                                        MatZeroEntries_SeqBAIJ,
2712:                                /* 24*/ MatZeroRows_SeqBAIJ,
2713:                                        0,
2714:                                        0,
2715:                                        0,
2716:                                        0,
2717:                                /* 29*/ MatSetUp_SeqBAIJ,
2718:                                        0,
2719:                                        0,
2720:                                        0,
2721:                                        0,
2722:                                /* 34*/ MatDuplicate_SeqBAIJ,
2723:                                        0,
2724:                                        0,
2725:                                        MatILUFactor_SeqBAIJ,
2726:                                        0,
2727:                                /* 39*/ MatAXPY_SeqBAIJ,
2728:                                        MatGetSubMatrices_SeqBAIJ,
2729:                                        MatIncreaseOverlap_SeqBAIJ,
2730:                                        MatGetValues_SeqBAIJ,
2731:                                        MatCopy_SeqBAIJ,
2732:                                /* 44*/ 0,
2733:                                        MatScale_SeqBAIJ,
2734:                                        0,
2735:                                        0,
2736:                                        MatZeroRowsColumns_SeqBAIJ,
2737:                                /* 49*/ 0,
2738:                                        MatGetRowIJ_SeqBAIJ,
2739:                                        MatRestoreRowIJ_SeqBAIJ,
2740:                                        MatGetColumnIJ_SeqBAIJ,
2741:                                        MatRestoreColumnIJ_SeqBAIJ,
2742:                                /* 54*/ MatFDColoringCreate_SeqAIJ,
2743:                                        0,
2744:                                        0,
2745:                                        0,
2746:                                        MatSetValuesBlocked_SeqBAIJ,
2747:                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2748:                                        MatDestroy_SeqBAIJ,
2749:                                        MatView_SeqBAIJ,
2750:                                        0,
2751:                                        0,
2752:                                /* 64*/ 0,
2753:                                        0,
2754:                                        0,
2755:                                        0,
2756:                                        0,
2757:                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2758:                                        0,
2759:                                        MatConvert_Basic,
2760:                                        0,
2761:                                        0,
2762:                                /* 74*/ 0,
2763:                                        MatFDColoringApply_BAIJ,
2764:                                        0,
2765:                                        0,
2766:                                        0,
2767:                                /* 79*/ 0,
2768:                                        0,
2769:                                        0,
2770:                                        0,
2771:                                        MatLoad_SeqBAIJ,
2772:                                /* 84*/ 0,
2773:                                        0,
2774:                                        0,
2775:                                        0,
2776:                                        0,
2777:                                /* 89*/ 0,
2778:                                        0,
2779:                                        0,
2780:                                        0,
2781:                                        0,
2782:                                /* 94*/ 0,
2783:                                        0,
2784:                                        0,
2785:                                        0,
2786:                                        0,
2787:                                /* 99*/ 0,
2788:                                        0,
2789:                                        0,
2790:                                        0,
2791:                                        0,
2792:                                /*104*/ 0,
2793:                                        MatRealPart_SeqBAIJ,
2794:                                        MatImaginaryPart_SeqBAIJ,
2795:                                        0,
2796:                                        0,
2797:                                /*109*/ 0,
2798:                                        0,
2799:                                        0,
2800:                                        0,
2801:                                        MatMissingDiagonal_SeqBAIJ,
2802:                                /*114*/ 0,
2803:                                        0,
2804:                                        0,
2805:                                        0,
2806:                                        0,
2807:                                /*119*/ 0,
2808:                                        0,
2809:                                        MatMultHermitianTranspose_SeqBAIJ,
2810:                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2811:                                        0,
2812:                                /*124*/ 0,
2813:                                        0,
2814:                                        MatInvertBlockDiagonal_SeqBAIJ,
2815:                                        0,
2816:                                        0,
2817:                                /*129*/ 0,
2818:                                        0,
2819:                                        0,
2820:                                        0,
2821:                                        0,
2822:                                /*134*/ 0,
2823:                                        0,
2824:                                        0,
2825:                                        0,
2826:                                        0,
2827:                                /*139*/ 0,
2828:                                        0
2829: };

2833: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2834: {
2835:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2836:   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;

2840:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

2842:   /* allocate space for values if not already there */
2843:   if (!aij->saved_values) {
2844:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2845:     PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));
2846:   }

2848:   /* copy values over */
2849:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2850:   return(0);
2851: }

2855: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2856: {
2857:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2859:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;

2862:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2863:   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");

2865:   /* copy values over */
2866:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2867:   return(0);
2868: }

2870: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2871: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);

2875: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2876: {
2877:   Mat_SeqBAIJ    *b;
2879:   PetscInt       i,mbs,nbs,bs2;
2880:   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;

2883:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2884:   if (nz == MAT_SKIP_ALLOCATION) {
2885:     skipallocation = PETSC_TRUE;
2886:     nz             = 0;
2887:   }

2889:   PetscLayoutSetBlockSize(B->rmap,bs);
2890:   PetscLayoutSetBlockSize(B->cmap,bs);
2891:   PetscLayoutSetUp(B->rmap);
2892:   PetscLayoutSetUp(B->cmap);
2893:   PetscLayoutGetBlockSize(B->rmap,&bs);

2895:   B->preallocated = PETSC_TRUE;

2897:   mbs = B->rmap->n/bs;
2898:   nbs = B->cmap->n/bs;
2899:   bs2 = bs*bs;

2901:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);

2903:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2904:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2905:   if (nnz) {
2906:     for (i=0; i<mbs; i++) {
2907:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2908:       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2909:     }
2910:   }

2912:   b    = (Mat_SeqBAIJ*)B->data;
2913:   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2914:   PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);
2915:   PetscOptionsEnd();

2917:   if (!flg) {
2918:     switch (bs) {
2919:     case 1:
2920:       B->ops->mult    = MatMult_SeqBAIJ_1;
2921:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2922:       break;
2923:     case 2:
2924:       B->ops->mult    = MatMult_SeqBAIJ_2;
2925:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2926:       break;
2927:     case 3:
2928:       B->ops->mult    = MatMult_SeqBAIJ_3;
2929:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2930:       break;
2931:     case 4:
2932:       B->ops->mult    = MatMult_SeqBAIJ_4;
2933:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2934:       break;
2935:     case 5:
2936:       B->ops->mult    = MatMult_SeqBAIJ_5;
2937:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2938:       break;
2939:     case 6:
2940:       B->ops->mult    = MatMult_SeqBAIJ_6;
2941:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2942:       break;
2943:     case 7:
2944:       B->ops->mult    = MatMult_SeqBAIJ_7;
2945:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2946:       break;
2947:     case 15:
2948:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2949:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2950:       break;
2951:     default:
2952:       B->ops->mult    = MatMult_SeqBAIJ_N;
2953:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2954:       break;
2955:     }
2956:   }
2957:   B->ops->sor = MatSOR_SeqBAIJ;
2958:   b->mbs = mbs;
2959:   b->nbs = nbs;
2960:   if (!skipallocation) {
2961:     if (!b->imax) {
2962:       PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2963:       PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));

2965:       b->free_imax_ilen = PETSC_TRUE;
2966:     }
2967:     /* b->ilen will count nonzeros in each block row so far. */
2968:     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2969:     if (!nnz) {
2970:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2971:       else if (nz < 0) nz = 1;
2972:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2973:       nz = nz*mbs;
2974:     } else {
2975:       nz = 0;
2976:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2977:     }

2979:     /* allocate the matrix space */
2980:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2981:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
2982:     PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2983:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2984:     PetscMemzero(b->j,nz*sizeof(PetscInt));

2986:     b->singlemalloc = PETSC_TRUE;
2987:     b->i[0]         = 0;
2988:     for (i=1; i<mbs+1; i++) {
2989:       b->i[i] = b->i[i-1] + b->imax[i-1];
2990:     }
2991:     b->free_a  = PETSC_TRUE;
2992:     b->free_ij = PETSC_TRUE;
2993:   } else {
2994:     b->free_a  = PETSC_FALSE;
2995:     b->free_ij = PETSC_FALSE;
2996:   }

2998:   b->bs2              = bs2;
2999:   b->mbs              = mbs;
3000:   b->nz               = 0;
3001:   b->maxnz            = nz;
3002:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3003:   if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
3004:   return(0);
3005: }

3009: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3010: {
3011:   PetscInt       i,m,nz,nz_max=0,*nnz;
3012:   PetscScalar    *values=0;

3016:   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3017:   PetscLayoutSetBlockSize(B->rmap,bs);
3018:   PetscLayoutSetBlockSize(B->cmap,bs);
3019:   PetscLayoutSetUp(B->rmap);
3020:   PetscLayoutSetUp(B->cmap);
3021:   PetscLayoutGetBlockSize(B->rmap,&bs);
3022:   m    = B->rmap->n/bs;

3024:   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3025:   PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
3026:   for (i=0; i<m; i++) {
3027:     nz = ii[i+1]- ii[i];
3028:     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3029:     nz_max = PetscMax(nz_max, nz);
3030:     nnz[i] = nz;
3031:   }
3032:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3033:   PetscFree(nnz);

3035:   values = (PetscScalar*)V;
3036:   if (!values) {
3037:     PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
3038:     PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
3039:   }
3040:   for (i=0; i<m; i++) {
3041:     PetscInt          ncols  = ii[i+1] - ii[i];
3042:     const PetscInt    *icols = jj + ii[i];
3043:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3044:     MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3045:   }
3046:   if (!V) { PetscFree(values); }
3047:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3048:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3049:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3050:   return(0);
3051: }

3053: PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3054: PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3055: #if defined(PETSC_HAVE_MUMPS)
3056: PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3057: #endif
3058: extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);

3060: /*MC
3061:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3062:    block sparse compressed row format.

3064:    Options Database Keys:
3065: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

3067:   Level: beginner

3069: .seealso: MatCreateSeqBAIJ()
3070: M*/

3072: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);

3076: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3077: {
3079:   PetscMPIInt    size;
3080:   Mat_SeqBAIJ    *b;

3083:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3084:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3086:   PetscNewLog(B,Mat_SeqBAIJ,&b);
3087:   B->data = (void*)b;
3088:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

3090:   b->row          = 0;
3091:   b->col          = 0;
3092:   b->icol         = 0;
3093:   b->reallocs     = 0;
3094:   b->saved_values = 0;

3096:   b->roworiented        = PETSC_TRUE;
3097:   b->nonew              = 0;
3098:   b->diag               = 0;
3099:   b->solve_work         = 0;
3100:   b->mult_work          = 0;
3101:   B->spptr              = 0;
3102:   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3103:   b->keepnonzeropattern = PETSC_FALSE;
3104:   b->xtoy               = 0;
3105:   b->XtoY               = 0;
3106:   B->same_nonzero       = PETSC_FALSE;

3108:   PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);
3109:   PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);
3110:   PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);
3111: #if defined(PETSC_HAVE_MUMPS)
3112:   PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);
3113: #endif
3114:   PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3115:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3116:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3117:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3118:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3119:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3120:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3121:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3122:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);
3123:   PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3124:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3125:   return(0);
3126: }

3130: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3131: {
3132:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3134:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3137:   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");

3139:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3140:     c->imax           = a->imax;
3141:     c->ilen           = a->ilen;
3142:     c->free_imax_ilen = PETSC_FALSE;
3143:   } else {
3144:     PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
3145:     PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));
3146:     for (i=0; i<mbs; i++) {
3147:       c->imax[i] = a->imax[i];
3148:       c->ilen[i] = a->ilen[i];
3149:     }
3150:     c->free_imax_ilen = PETSC_TRUE;
3151:   }

3153:   /* allocate the matrix space */
3154:   if (mallocmatspace) {
3155:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3156:       PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);
3157:       PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));
3158:       PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));

3160:       c->i            = a->i;
3161:       c->j            = a->j;
3162:       c->singlemalloc = PETSC_FALSE;
3163:       c->free_a       = PETSC_TRUE;
3164:       c->free_ij      = PETSC_FALSE;
3165:       c->parent       = A;
3166:       C->preallocated = PETSC_TRUE;
3167:       C->assembled    = PETSC_TRUE;

3169:       PetscObjectReference((PetscObject)A);
3170:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3171:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3172:     } else {
3173:       PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
3174:       PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));

3176:       c->singlemalloc = PETSC_TRUE;
3177:       c->free_a       = PETSC_TRUE;
3178:       c->free_ij      = PETSC_TRUE;

3180:       PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3181:       if (mbs > 0) {
3182:         PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3183:         if (cpvalues == MAT_COPY_VALUES) {
3184:           PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3185:         } else {
3186:           PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3187:         }
3188:       }
3189:       C->preallocated = PETSC_TRUE;
3190:       C->assembled    = PETSC_TRUE;
3191:     }
3192:   }

3194:   c->roworiented = a->roworiented;
3195:   c->nonew       = a->nonew;

3197:   PetscLayoutReference(A->rmap,&C->rmap);
3198:   PetscLayoutReference(A->cmap,&C->cmap);

3200:   c->bs2         = a->bs2;
3201:   c->mbs         = a->mbs;
3202:   c->nbs         = a->nbs;

3204:   if (a->diag) {
3205:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3206:       c->diag      = a->diag;
3207:       c->free_diag = PETSC_FALSE;
3208:     } else {
3209:       PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
3210:       PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
3211:       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3212:       c->free_diag = PETSC_TRUE;
3213:     }
3214:   } else c->diag = 0;

3216:   c->nz         = a->nz;
3217:   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3218:   c->solve_work = 0;
3219:   c->mult_work  = 0;

3221:   c->compressedrow.use   = a->compressedrow.use;
3222:   c->compressedrow.nrows = a->compressedrow.nrows;
3223:   c->compressedrow.check = a->compressedrow.check;
3224:   if (a->compressedrow.use) {
3225:     i    = a->compressedrow.nrows;
3226:     PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);
3227:     PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));
3228:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3229:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3230:   } else {
3231:     c->compressedrow.use    = PETSC_FALSE;
3232:     c->compressedrow.i      = NULL;
3233:     c->compressedrow.rindex = NULL;
3234:   }
3235:   C->same_nonzero = A->same_nonzero;

3237:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3238:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3239:   return(0);
3240: }

3244: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3245: {

3249:   MatCreate(PetscObjectComm((PetscObject)A),B);
3250:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3251:   MatSetType(*B,MATSEQBAIJ);
3252:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3253:   return(0);
3254: }

3258: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3259: {
3260:   Mat_SeqBAIJ    *a;
3262:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3263:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3264:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3265:   PetscInt       *masked,nmask,tmp,bs2,ishift;
3266:   PetscMPIInt    size;
3267:   int            fd;
3268:   PetscScalar    *aa;
3269:   MPI_Comm       comm;

3272:   PetscObjectGetComm((PetscObject)viewer,&comm);
3273:   PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3274:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3275:   PetscOptionsEnd();
3276:   bs2  = bs*bs;

3278:   MPI_Comm_size(comm,&size);
3279:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3280:   PetscViewerBinaryGetDescriptor(viewer,&fd);
3281:   PetscBinaryRead(fd,header,4,PETSC_INT);
3282:   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3283:   M = header[1]; N = header[2]; nz = header[3];

3285:   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3286:   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");

3288:   /*
3289:      This code adds extra rows to make sure the number of rows is
3290:     divisible by the blocksize
3291:   */
3292:   mbs        = M/bs;
3293:   extra_rows = bs - M + bs*(mbs);
3294:   if (extra_rows == bs) extra_rows = 0;
3295:   else mbs++;
3296:   if (extra_rows) {
3297:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3298:   }

3300:   /* Set global sizes if not already set */
3301:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3302:     MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3303:   } else { /* Check if the matrix global sizes are correct */
3304:     MatGetSize(newmat,&rows,&cols);
3305:     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3306:       MatGetLocalSize(newmat,&rows,&cols);
3307:     }
3308:     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3309:   }

3311:   /* read in row lengths */
3312:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
3313:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3314:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

3316:   /* read in column indices */
3317:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
3318:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
3319:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

3321:   /* loop over row lengths determining block row lengths */
3322:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
3323:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
3324:   PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);
3325:   PetscMemzero(mask,mbs*sizeof(PetscInt));
3326:   rowcount = 0;
3327:   nzcount  = 0;
3328:   for (i=0; i<mbs; i++) {
3329:     nmask = 0;
3330:     for (j=0; j<bs; j++) {
3331:       kmax = rowlengths[rowcount];
3332:       for (k=0; k<kmax; k++) {
3333:         tmp = jj[nzcount++]/bs;
3334:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3335:       }
3336:       rowcount++;
3337:     }
3338:     browlengths[i] += nmask;
3339:     /* zero out the mask elements we set */
3340:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3341:   }

3343:   /* Do preallocation  */
3344:   MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);
3345:   a    = (Mat_SeqBAIJ*)newmat->data;

3347:   /* set matrix "i" values */
3348:   a->i[0] = 0;
3349:   for (i=1; i<= mbs; i++) {
3350:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3351:     a->ilen[i-1] = browlengths[i-1];
3352:   }
3353:   a->nz = 0;
3354:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3356:   /* read in nonzero values */
3357:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3358:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3359:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3361:   /* set "a" and "j" values into matrix */
3362:   nzcount = 0; jcount = 0;
3363:   for (i=0; i<mbs; i++) {
3364:     nzcountb = nzcount;
3365:     nmask    = 0;
3366:     for (j=0; j<bs; j++) {
3367:       kmax = rowlengths[i*bs+j];
3368:       for (k=0; k<kmax; k++) {
3369:         tmp = jj[nzcount++]/bs;
3370:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3371:       }
3372:     }
3373:     /* sort the masked values */
3374:     PetscSortInt(nmask,masked);

3376:     /* set "j" values into matrix */
3377:     maskcount = 1;
3378:     for (j=0; j<nmask; j++) {
3379:       a->j[jcount++]  = masked[j];
3380:       mask[masked[j]] = maskcount++;
3381:     }
3382:     /* set "a" values into matrix */
3383:     ishift = bs2*a->i[i];
3384:     for (j=0; j<bs; j++) {
3385:       kmax = rowlengths[i*bs+j];
3386:       for (k=0; k<kmax; k++) {
3387:         tmp       = jj[nzcountb]/bs;
3388:         block     = mask[tmp] - 1;
3389:         point     = jj[nzcountb] - bs*tmp;
3390:         idx       = ishift + bs2*block + j + bs*point;
3391:         a->a[idx] = (MatScalar)aa[nzcountb++];
3392:       }
3393:     }
3394:     /* zero out the mask elements we set */
3395:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3396:   }
3397:   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3399:   PetscFree(rowlengths);
3400:   PetscFree(browlengths);
3401:   PetscFree(aa);
3402:   PetscFree(jj);
3403:   PetscFree2(mask,masked);

3405:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3406:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3407:   return(0);
3408: }

3412: /*@C
3413:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3414:    compressed row) format.  For good matrix assembly performance the
3415:    user should preallocate the matrix storage by setting the parameter nz
3416:    (or the array nnz).  By setting these parameters accurately, performance
3417:    during matrix assembly can be increased by more than a factor of 50.

3419:    Collective on MPI_Comm

3421:    Input Parameters:
3422: +  comm - MPI communicator, set to PETSC_COMM_SELF
3423: .  bs - size of block
3424: .  m - number of rows
3425: .  n - number of columns
3426: .  nz - number of nonzero blocks  per block row (same for all rows)
3427: -  nnz - array containing the number of nonzero blocks in the various block rows
3428:          (possibly different for each block row) or NULL

3430:    Output Parameter:
3431: .  A - the matrix

3433:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3434:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3435:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3437:    Options Database Keys:
3438: .   -mat_no_unroll - uses code that does not unroll the loops in the
3439:                      block calculations (much slower)
3440: .    -mat_block_size - size of the blocks to use

3442:    Level: intermediate

3444:    Notes:
3445:    The number of rows and columns must be divisible by blocksize.

3447:    If the nnz parameter is given then the nz parameter is ignored

3449:    A nonzero block is any block that as 1 or more nonzeros in it

3451:    The block AIJ format is fully compatible with standard Fortran 77
3452:    storage.  That is, the stored row and column indices can begin at
3453:    either one (as in Fortran) or zero.  See the users' manual for details.

3455:    Specify the preallocated storage with either nz or nnz (not both).
3456:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3457:    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3458:    matrices.

3460: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3461: @*/
3462: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3463: {

3467:   MatCreate(comm,A);
3468:   MatSetSizes(*A,m,n,m,n);
3469:   MatSetType(*A,MATSEQBAIJ);
3470:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3471:   return(0);
3472: }

3476: /*@C
3477:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3478:    per row in the matrix. For good matrix assembly performance the
3479:    user should preallocate the matrix storage by setting the parameter nz
3480:    (or the array nnz).  By setting these parameters accurately, performance
3481:    during matrix assembly can be increased by more than a factor of 50.

3483:    Collective on MPI_Comm

3485:    Input Parameters:
3486: +  A - the matrix
3487: .  bs - size of block
3488: .  nz - number of block nonzeros per block row (same for all rows)
3489: -  nnz - array containing the number of block nonzeros in the various block rows
3490:          (possibly different for each block row) or NULL

3492:    Options Database Keys:
3493: .   -mat_no_unroll - uses code that does not unroll the loops in the
3494:                      block calculations (much slower)
3495: .    -mat_block_size - size of the blocks to use

3497:    Level: intermediate

3499:    Notes:
3500:    If the nnz parameter is given then the nz parameter is ignored

3502:    You can call MatGetInfo() to get information on how effective the preallocation was;
3503:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3504:    You can also run with the option -info and look for messages with the string
3505:    malloc in them to see if additional memory allocation was needed.

3507:    The block AIJ format is fully compatible with standard Fortran 77
3508:    storage.  That is, the stored row and column indices can begin at
3509:    either one (as in Fortran) or zero.  See the users' manual for details.

3511:    Specify the preallocated storage with either nz or nnz (not both).
3512:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3513:    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.

3515: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3516: @*/
3517: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3518: {

3525:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3526:   return(0);
3527: }

3531: /*@C
3532:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3533:    (the default sequential PETSc format).

3535:    Collective on MPI_Comm

3537:    Input Parameters:
3538: +  A - the matrix
3539: .  i - the indices into j for the start of each local row (starts with zero)
3540: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3541: -  v - optional values in the matrix

3543:    Level: developer

3545: .keywords: matrix, aij, compressed row, sparse

3547: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3548: @*/
3549: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3550: {

3557:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3558:   return(0);
3559: }


3564: /*@
3565:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3567:      Collective on MPI_Comm

3569:    Input Parameters:
3570: +  comm - must be an MPI communicator of size 1
3571: .  bs - size of block
3572: .  m - number of rows
3573: .  n - number of columns
3574: .  i - row indices
3575: .  j - column indices
3576: -  a - matrix values

3578:    Output Parameter:
3579: .  mat - the matrix

3581:    Level: advanced

3583:    Notes:
3584:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3585:     once the matrix is destroyed

3587:        You cannot set new nonzero locations into this matrix, that will generate an error.

3589:        The i and j indices are 0 based

3591:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).


3594: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()

3596: @*/
3597: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3598: {
3600:   PetscInt       ii;
3601:   Mat_SeqBAIJ    *baij;

3604:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3605:   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");

3607:   MatCreate(comm,mat);
3608:   MatSetSizes(*mat,m,n,m,n);
3609:   MatSetType(*mat,MATSEQBAIJ);
3610:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3611:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3612:   PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
3613:   PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));

3615:   baij->i = i;
3616:   baij->j = j;
3617:   baij->a = a;

3619:   baij->singlemalloc = PETSC_FALSE;
3620:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3621:   baij->free_a       = PETSC_FALSE;
3622:   baij->free_ij      = PETSC_FALSE;

3624:   for (ii=0; ii<m; ii++) {
3625:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3626: #if defined(PETSC_USE_DEBUG)
3627:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3628: #endif
3629:   }
3630: #if defined(PETSC_USE_DEBUG)
3631:   for (ii=0; ii<baij->i[m]; ii++) {
3632:     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3633:     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3634:   }
3635: #endif

3637:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3638:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3639:   return(0);
3640: }