Actual source code: baij.c

petsc-master 2017-07-27
Report Typos and Errors

  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6:  #include <../src/mat/impls/baij/seq/baij.h>
  7:  #include <petscblaslapack.h>
  8:  #include <petsc/private/kernels/blockinvert.h>
  9:  #include <petsc/private/kernels/blockmatmult.h>

 11: #if defined(PETSC_HAVE_HYPRE)
 12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
 13: #endif

 15: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
 16: {
 17:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 19:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 20:   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
 21:   PetscReal      shift = 0.0;
 22:   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;

 25:   allowzeropivot = PetscNot(A->erroriffailure);

 27:   if (a->idiagvalid) {
 28:     if (values) *values = a->idiag;
 29:     return(0);
 30:   }
 31:   MatMarkDiagonal_SeqBAIJ(A);
 32:   diag_offset = a->diag;
 33:   if (!a->idiag) {
 34:     PetscMalloc1(2*bs2*mbs,&a->idiag);
 35:     PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));
 36:   }
 37:   diag  = a->idiag;
 38:   mdiag = a->idiag+bs2*mbs;
 39:   if (values) *values = a->idiag;
 40:   /* factor and invert each block */
 41:   switch (bs) {
 42:   case 1:
 43:     for (i=0; i<mbs; i++) {
 44:       odiag    = v + 1*diag_offset[i];
 45:       diag[0]  = odiag[0];
 46:       mdiag[0] = odiag[0];

 48:       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
 49:         if (allowzeropivot) {
 50:           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 51:           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
 52:           A->factorerror_zeropivot_row   = i;
 53:           PetscInfo1(A,"Zero pivot, row %D\n",i);
 54:         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
 55:       }

 57:       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
 58:       diag    += 1;
 59:       mdiag   += 1;
 60:     }
 61:     break;
 62:   case 2:
 63:     for (i=0; i<mbs; i++) {
 64:       odiag    = v + 4*diag_offset[i];
 65:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 66:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 67:       PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
 68:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 69:       diag    += 4;
 70:       mdiag   += 4;
 71:     }
 72:     break;
 73:   case 3:
 74:     for (i=0; i<mbs; i++) {
 75:       odiag    = v + 9*diag_offset[i];
 76:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 77:       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 78:       diag[8]  = odiag[8];
 79:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 80:       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 81:       mdiag[8] = odiag[8];
 82:       PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
 83:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 84:       diag    += 9;
 85:       mdiag   += 9;
 86:     }
 87:     break;
 88:   case 4:
 89:     for (i=0; i<mbs; i++) {
 90:       odiag  = v + 16*diag_offset[i];
 91:       PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 92:       PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 93:       PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
 94:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 95:       diag  += 16;
 96:       mdiag += 16;
 97:     }
 98:     break;
 99:   case 5:
100:     for (i=0; i<mbs; i++) {
101:       odiag  = v + 25*diag_offset[i];
102:       PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
103:       PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
104:       PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
105:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
106:       diag  += 25;
107:       mdiag += 25;
108:     }
109:     break;
110:   case 6:
111:     for (i=0; i<mbs; i++) {
112:       odiag  = v + 36*diag_offset[i];
113:       PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
114:       PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
115:       PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
116:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117:       diag  += 36;
118:       mdiag += 36;
119:     }
120:     break;
121:   case 7:
122:     for (i=0; i<mbs; i++) {
123:       odiag  = v + 49*diag_offset[i];
124:       PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
125:       PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
126:       PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
127:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128:       diag  += 49;
129:       mdiag += 49;
130:     }
131:     break;
132:   default:
133:     PetscMalloc2(bs,&v_work,bs,&v_pivots);
134:     for (i=0; i<mbs; i++) {
135:       odiag  = v + bs2*diag_offset[i];
136:       PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
137:       PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
138:       PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
139:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
140:       diag  += bs2;
141:       mdiag += bs2;
142:     }
143:     PetscFree2(v_work,v_pivots);
144:   }
145:   a->idiagvalid = PETSC_TRUE;
146:   return(0);
147: }

149: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
150: {
151:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
152:   PetscScalar       *x,*work,*w,*workt,*t;
153:   const MatScalar   *v,*aa = a->a, *idiag;
154:   const PetscScalar *b,*xb;
155:   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
156:   PetscErrorCode    ierr;
157:   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
158:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

161:   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
162:   its = its*lits;
163:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
164:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");

169:   if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}

171:   if (!m) return(0);
172:   diag  = a->diag;
173:   idiag = a->idiag;
174:   k    = PetscMax(A->rmap->n,A->cmap->n);
175:   if (!a->mult_work) {
176:     PetscMalloc1(k+1,&a->mult_work);
177:   }
178:   if (!a->sor_workt) {
179:     PetscMalloc1(k,&a->sor_workt);
180:   }
181:   if (!a->sor_work) {
182:     PetscMalloc1(bs,&a->sor_work);
183:   }
184:   work = a->mult_work;
185:   t    = a->sor_workt;
186:   w    = a->sor_work;

188:   VecGetArray(xx,&x);
189:   VecGetArrayRead(bb,&b);

191:   if (flag & SOR_ZERO_INITIAL_GUESS) {
192:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
193:       switch (bs) {
194:       case 1:
195:         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
196:         t[0] = b[0];
197:         i2     = 1;
198:         idiag += 1;
199:         for (i=1; i<m; i++) {
200:           v  = aa + ai[i];
201:           vi = aj + ai[i];
202:           nz = diag[i] - ai[i];
203:           s[0] = b[i2];
204:           for (j=0; j<nz; j++) {
205:             xw[0] = x[vi[j]];
206:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
207:           }
208:           t[i2] = s[0];
209:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
210:           x[i2]  = xw[0];
211:           idiag += 1;
212:           i2    += 1;
213:         }
214:         break;
215:       case 2:
216:         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
217:         t[0] = b[0]; t[1] = b[1];
218:         i2     = 2;
219:         idiag += 4;
220:         for (i=1; i<m; i++) {
221:           v  = aa + 4*ai[i];
222:           vi = aj + ai[i];
223:           nz = diag[i] - ai[i];
224:           s[0] = b[i2]; s[1] = b[i2+1];
225:           for (j=0; j<nz; j++) {
226:             idx = 2*vi[j];
227:             it  = 4*j;
228:             xw[0] = x[idx]; xw[1] = x[1+idx];
229:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
230:           }
231:           t[i2] = s[0]; t[i2+1] = s[1];
232:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
233:           x[i2]   = xw[0]; x[i2+1] = xw[1];
234:           idiag  += 4;
235:           i2     += 2;
236:         }
237:         break;
238:       case 3:
239:         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
240:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
241:         i2     = 3;
242:         idiag += 9;
243:         for (i=1; i<m; i++) {
244:           v  = aa + 9*ai[i];
245:           vi = aj + ai[i];
246:           nz = diag[i] - ai[i];
247:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
248:           while (nz--) {
249:             idx = 3*(*vi++);
250:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
251:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
252:             v  += 9;
253:           }
254:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
255:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
256:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
257:           idiag  += 9;
258:           i2     += 3;
259:         }
260:         break;
261:       case 4:
262:         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
263:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
264:         i2     = 4;
265:         idiag += 16;
266:         for (i=1; i<m; i++) {
267:           v  = aa + 16*ai[i];
268:           vi = aj + ai[i];
269:           nz = diag[i] - ai[i];
270:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
271:           while (nz--) {
272:             idx = 4*(*vi++);
273:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
274:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
275:             v  += 16;
276:           }
277:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
278:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
279:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
280:           idiag  += 16;
281:           i2     += 4;
282:         }
283:         break;
284:       case 5:
285:         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
286:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
287:         i2     = 5;
288:         idiag += 25;
289:         for (i=1; i<m; i++) {
290:           v  = aa + 25*ai[i];
291:           vi = aj + ai[i];
292:           nz = diag[i] - ai[i];
293:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
294:           while (nz--) {
295:             idx = 5*(*vi++);
296:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
297:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
298:             v  += 25;
299:           }
300:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
301:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
302:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
303:           idiag  += 25;
304:           i2     += 5;
305:         }
306:         break;
307:       case 6:
308:         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
309:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
310:         i2     = 6;
311:         idiag += 36;
312:         for (i=1; i<m; i++) {
313:           v  = aa + 36*ai[i];
314:           vi = aj + ai[i];
315:           nz = diag[i] - ai[i];
316:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
317:           while (nz--) {
318:             idx = 6*(*vi++);
319:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
320:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
321:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
322:             v  += 36;
323:           }
324:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
325:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
326:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
327:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
328:           idiag  += 36;
329:           i2     += 6;
330:         }
331:         break;
332:       case 7:
333:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
334:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
335:         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
336:         i2     = 7;
337:         idiag += 49;
338:         for (i=1; i<m; i++) {
339:           v  = aa + 49*ai[i];
340:           vi = aj + ai[i];
341:           nz = diag[i] - ai[i];
342:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
343:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
344:           while (nz--) {
345:             idx = 7*(*vi++);
346:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
347:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
348:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
349:             v  += 49;
350:           }
351:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
352:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
353:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
354:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
355:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
356:           idiag  += 49;
357:           i2     += 7;
358:         }
359:         break;
360:       default:
361:         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
362:         PetscMemcpy(t,b,bs*sizeof(PetscScalar));
363:         i2     = bs;
364:         idiag += bs2;
365:         for (i=1; i<m; i++) {
366:           v  = aa + bs2*ai[i];
367:           vi = aj + ai[i];
368:           nz = diag[i] - ai[i];

370:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
371:           /* copy all rows of x that are needed into contiguous space */
372:           workt = work;
373:           for (j=0; j<nz; j++) {
374:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
375:             workt += bs;
376:           }
377:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
378:           PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
379:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

381:           idiag += bs2;
382:           i2    += bs;
383:         }
384:         break;
385:       }
386:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387:       PetscLogFlops(1.0*bs2*a->nz);
388:       xb = t;
389:     }
390:     else xb = b;
391:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392:       idiag = a->idiag+bs2*(a->mbs-1);
393:       i2 = bs * (m-1);
394:       switch (bs) {
395:       case 1:
396:         s[0]  = xb[i2];
397:         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
398:         x[i2] = xw[0];
399:         i2   -= 1;
400:         for (i=m-2; i>=0; i--) {
401:           v  = aa + (diag[i]+1);
402:           vi = aj + diag[i] + 1;
403:           nz = ai[i+1] - diag[i] - 1;
404:           s[0] = xb[i2];
405:           for (j=0; j<nz; j++) {
406:             xw[0] = x[vi[j]];
407:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
408:           }
409:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
410:           x[i2]  = xw[0];
411:           idiag -= 1;
412:           i2    -= 1;
413:         }
414:         break;
415:       case 2:
416:         s[0]  = xb[i2]; s[1] = xb[i2+1];
417:         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418:         x[i2] = xw[0]; x[i2+1] = xw[1];
419:         i2    -= 2;
420:         idiag -= 4;
421:         for (i=m-2; i>=0; i--) {
422:           v  = aa + 4*(diag[i] + 1);
423:           vi = aj + diag[i] + 1;
424:           nz = ai[i+1] - diag[i] - 1;
425:           s[0] = xb[i2]; s[1] = xb[i2+1];
426:           for (j=0; j<nz; j++) {
427:             idx = 2*vi[j];
428:             it  = 4*j;
429:             xw[0] = x[idx]; xw[1] = x[1+idx];
430:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
431:           }
432:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
433:           x[i2]   = xw[0]; x[i2+1] = xw[1];
434:           idiag  -= 4;
435:           i2     -= 2;
436:         }
437:         break;
438:       case 3:
439:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
440:         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442:         i2    -= 3;
443:         idiag -= 9;
444:         for (i=m-2; i>=0; i--) {
445:           v  = aa + 9*(diag[i]+1);
446:           vi = aj + diag[i] + 1;
447:           nz = ai[i+1] - diag[i] - 1;
448:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
449:           while (nz--) {
450:             idx = 3*(*vi++);
451:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
452:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
453:             v  += 9;
454:           }
455:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
456:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
457:           idiag  -= 9;
458:           i2     -= 3;
459:         }
460:         break;
461:       case 4:
462:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
463:         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465:         i2    -= 4;
466:         idiag -= 16;
467:         for (i=m-2; i>=0; i--) {
468:           v  = aa + 16*(diag[i]+1);
469:           vi = aj + diag[i] + 1;
470:           nz = ai[i+1] - diag[i] - 1;
471:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
472:           while (nz--) {
473:             idx = 4*(*vi++);
474:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
475:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
476:             v  += 16;
477:           }
478:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
479:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
480:           idiag  -= 16;
481:           i2     -= 4;
482:         }
483:         break;
484:       case 5:
485:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
486:         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488:         i2    -= 5;
489:         idiag -= 25;
490:         for (i=m-2; i>=0; i--) {
491:           v  = aa + 25*(diag[i]+1);
492:           vi = aj + diag[i] + 1;
493:           nz = ai[i+1] - diag[i] - 1;
494:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
495:           while (nz--) {
496:             idx = 5*(*vi++);
497:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
498:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
499:             v  += 25;
500:           }
501:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
502:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
503:           idiag  -= 25;
504:           i2     -= 5;
505:         }
506:         break;
507:       case 6:
508:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
509:         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
510:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
511:         i2    -= 6;
512:         idiag -= 36;
513:         for (i=m-2; i>=0; i--) {
514:           v  = aa + 36*(diag[i]+1);
515:           vi = aj + diag[i] + 1;
516:           nz = ai[i+1] - diag[i] - 1;
517:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
518:           while (nz--) {
519:             idx = 6*(*vi++);
520:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
521:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
522:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
523:             v  += 36;
524:           }
525:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
526:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
527:           idiag  -= 36;
528:           i2     -= 6;
529:         }
530:         break;
531:       case 7:
532:         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
533:         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
534:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
535:         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
536:         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
537:         i2    -= 7;
538:         idiag -= 49;
539:         for (i=m-2; i>=0; i--) {
540:           v  = aa + 49*(diag[i]+1);
541:           vi = aj + diag[i] + 1;
542:           nz = ai[i+1] - diag[i] - 1;
543:           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
544:           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
545:           while (nz--) {
546:             idx = 7*(*vi++);
547:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
548:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
549:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
550:             v  += 49;
551:           }
552:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
553:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
554:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
555:           idiag  -= 49;
556:           i2     -= 7;
557:         }
558:         break;
559:       default:
560:         PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
561:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
562:         i2    -= bs;
563:         idiag -= bs2;
564:         for (i=m-2; i>=0; i--) {
565:           v  = aa + bs2*(diag[i]+1);
566:           vi = aj + diag[i] + 1;
567:           nz = ai[i+1] - diag[i] - 1;

569:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
570:           /* copy all rows of x that are needed into contiguous space */
571:           workt = work;
572:           for (j=0; j<nz; j++) {
573:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
574:             workt += bs;
575:           }
576:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
577:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

579:           idiag -= bs2;
580:           i2    -= bs;
581:         }
582:         break;
583:       }
584:       PetscLogFlops(1.0*bs2*(a->nz));
585:     }
586:     its--;
587:   }
588:   while (its--) {
589:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
590:       idiag = a->idiag;
591:       i2 = 0;
592:       switch (bs) {
593:       case 1:
594:         for (i=0; i<m; i++) {
595:           v  = aa + ai[i];
596:           vi = aj + ai[i];
597:           nz = ai[i+1] - ai[i];
598:           s[0] = b[i2];
599:           for (j=0; j<nz; j++) {
600:             xw[0] = x[vi[j]];
601:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
602:           }
603:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
604:           x[i2] += xw[0];
605:           idiag += 1;
606:           i2    += 1;
607:         }
608:         break;
609:       case 2:
610:         for (i=0; i<m; i++) {
611:           v  = aa + 4*ai[i];
612:           vi = aj + ai[i];
613:           nz = ai[i+1] - ai[i];
614:           s[0] = b[i2]; s[1] = b[i2+1];
615:           for (j=0; j<nz; j++) {
616:             idx = 2*vi[j];
617:             it  = 4*j;
618:             xw[0] = x[idx]; xw[1] = x[1+idx];
619:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
620:           }
621:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
622:           x[i2]  += xw[0]; x[i2+1] += xw[1];
623:           idiag  += 4;
624:           i2     += 2;
625:         }
626:         break;
627:       case 3:
628:         for (i=0; i<m; i++) {
629:           v  = aa + 9*ai[i];
630:           vi = aj + ai[i];
631:           nz = ai[i+1] - ai[i];
632:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
633:           while (nz--) {
634:             idx = 3*(*vi++);
635:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
636:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
637:             v  += 9;
638:           }
639:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
640:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
641:           idiag  += 9;
642:           i2     += 3;
643:         }
644:         break;
645:       case 4:
646:         for (i=0; i<m; i++) {
647:           v  = aa + 16*ai[i];
648:           vi = aj + ai[i];
649:           nz = ai[i+1] - ai[i];
650:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
651:           while (nz--) {
652:             idx = 4*(*vi++);
653:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
654:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
655:             v  += 16;
656:           }
657:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
658:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
659:           idiag  += 16;
660:           i2     += 4;
661:         }
662:         break;
663:       case 5:
664:         for (i=0; i<m; i++) {
665:           v  = aa + 25*ai[i];
666:           vi = aj + ai[i];
667:           nz = ai[i+1] - ai[i];
668:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
669:           while (nz--) {
670:             idx = 5*(*vi++);
671:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
672:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
673:             v  += 25;
674:           }
675:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
676:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
677:           idiag  += 25;
678:           i2     += 5;
679:         }
680:         break;
681:       case 6:
682:         for (i=0; i<m; i++) {
683:           v  = aa + 36*ai[i];
684:           vi = aj + ai[i];
685:           nz = ai[i+1] - ai[i];
686:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
687:           while (nz--) {
688:             idx = 6*(*vi++);
689:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
691:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
692:             v  += 36;
693:           }
694:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
695:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
697:           idiag  += 36;
698:           i2     += 6;
699:         }
700:         break;
701:       case 7:
702:         for (i=0; i<m; i++) {
703:           v  = aa + 49*ai[i];
704:           vi = aj + ai[i];
705:           nz = ai[i+1] - ai[i];
706:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
707:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
708:           while (nz--) {
709:             idx = 7*(*vi++);
710:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
711:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
712:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
713:             v  += 49;
714:           }
715:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
716:           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
717:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
718:           idiag  += 49;
719:           i2     += 7;
720:         }
721:         break;
722:       default:
723:         for (i=0; i<m; i++) {
724:           v  = aa + bs2*ai[i];
725:           vi = aj + ai[i];
726:           nz = ai[i+1] - ai[i];

728:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
729:           /* copy all rows of x that are needed into contiguous space */
730:           workt = work;
731:           for (j=0; j<nz; j++) {
732:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
733:             workt += bs;
734:           }
735:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
736:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

738:           idiag += bs2;
739:           i2    += bs;
740:         }
741:         break;
742:       }
743:       PetscLogFlops(2.0*bs2*a->nz);
744:     }
745:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
746:       idiag = a->idiag+bs2*(a->mbs-1);
747:       i2 = bs * (m-1);
748:       switch (bs) {
749:       case 1:
750:         for (i=m-1; i>=0; i--) {
751:           v  = aa + ai[i];
752:           vi = aj + ai[i];
753:           nz = ai[i+1] - ai[i];
754:           s[0] = b[i2];
755:           for (j=0; j<nz; j++) {
756:             xw[0] = x[vi[j]];
757:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
758:           }
759:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
760:           x[i2] += xw[0];
761:           idiag -= 1;
762:           i2    -= 1;
763:         }
764:         break;
765:       case 2:
766:         for (i=m-1; i>=0; i--) {
767:           v  = aa + 4*ai[i];
768:           vi = aj + ai[i];
769:           nz = ai[i+1] - ai[i];
770:           s[0] = b[i2]; s[1] = b[i2+1];
771:           for (j=0; j<nz; j++) {
772:             idx = 2*vi[j];
773:             it  = 4*j;
774:             xw[0] = x[idx]; xw[1] = x[1+idx];
775:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
776:           }
777:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
778:           x[i2]  += xw[0]; x[i2+1] += xw[1];
779:           idiag  -= 4;
780:           i2     -= 2;
781:         }
782:         break;
783:       case 3:
784:         for (i=m-1; i>=0; i--) {
785:           v  = aa + 9*ai[i];
786:           vi = aj + ai[i];
787:           nz = ai[i+1] - ai[i];
788:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
789:           while (nz--) {
790:             idx = 3*(*vi++);
791:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
792:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
793:             v  += 9;
794:           }
795:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
796:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
797:           idiag  -= 9;
798:           i2     -= 3;
799:         }
800:         break;
801:       case 4:
802:         for (i=m-1; i>=0; i--) {
803:           v  = aa + 16*ai[i];
804:           vi = aj + ai[i];
805:           nz = ai[i+1] - ai[i];
806:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
807:           while (nz--) {
808:             idx = 4*(*vi++);
809:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
810:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
811:             v  += 16;
812:           }
813:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
814:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
815:           idiag  -= 16;
816:           i2     -= 4;
817:         }
818:         break;
819:       case 5:
820:         for (i=m-1; i>=0; i--) {
821:           v  = aa + 25*ai[i];
822:           vi = aj + ai[i];
823:           nz = ai[i+1] - ai[i];
824:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
825:           while (nz--) {
826:             idx = 5*(*vi++);
827:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
828:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
829:             v  += 25;
830:           }
831:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
832:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
833:           idiag  -= 25;
834:           i2     -= 5;
835:         }
836:         break;
837:       case 6:
838:         for (i=m-1; i>=0; i--) {
839:           v  = aa + 36*ai[i];
840:           vi = aj + ai[i];
841:           nz = ai[i+1] - ai[i];
842:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
843:           while (nz--) {
844:             idx = 6*(*vi++);
845:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
847:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
848:             v  += 36;
849:           }
850:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
851:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
853:           idiag  -= 36;
854:           i2     -= 6;
855:         }
856:         break;
857:       case 7:
858:         for (i=m-1; i>=0; i--) {
859:           v  = aa + 49*ai[i];
860:           vi = aj + ai[i];
861:           nz = ai[i+1] - ai[i];
862:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
863:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
864:           while (nz--) {
865:             idx = 7*(*vi++);
866:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
867:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
868:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
869:             v  += 49;
870:           }
871:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
872:           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
873:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
874:           idiag  -= 49;
875:           i2     -= 7;
876:         }
877:         break;
878:       default:
879:         for (i=m-1; i>=0; i--) {
880:           v  = aa + bs2*ai[i];
881:           vi = aj + ai[i];
882:           nz = ai[i+1] - ai[i];

884:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
885:           /* copy all rows of x that are needed into contiguous space */
886:           workt = work;
887:           for (j=0; j<nz; j++) {
888:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
889:             workt += bs;
890:           }
891:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
892:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

894:           idiag -= bs2;
895:           i2    -= bs;
896:         }
897:         break;
898:       }
899:       PetscLogFlops(2.0*bs2*(a->nz));
900:     }
901:   }
902:   VecRestoreArray(xx,&x);
903:   VecRestoreArrayRead(bb,&b);
904:   return(0);
905: }


908: /*
909:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
910: */
911: #if defined(PETSC_HAVE_FORTRAN_CAPS)
912: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
913: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
914: #define matsetvaluesblocked4_ matsetvaluesblocked4
915: #endif

917: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918: {
919:   Mat               A  = *AA;
920:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922:   PetscInt          *ai    =a->i,*ailen=a->ilen;
923:   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924:   const PetscScalar *value = v;
925:   MatScalar         *ap,*aa = a->a,*bap;

928:   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929:   stepval = (n-1)*4;
930:   for (k=0; k<m; k++) { /* loop over added rows */
931:     row  = im[k];
932:     rp   = aj + ai[row];
933:     ap   = aa + 16*ai[row];
934:     nrow = ailen[row];
935:     low  = 0;
936:     high = nrow;
937:     for (l=0; l<n; l++) { /* loop over added columns */
938:       col = in[l];
939:       if (col <= lastcol)  low = 0;
940:       else                high = nrow;
941:       lastcol = col;
942:       value   = v + k*(stepval+4 + l)*4;
943:       while (high-low > 7) {
944:         t = (low+high)/2;
945:         if (rp[t] > col) high = t;
946:         else             low  = t;
947:       }
948:       for (i=low; i<high; i++) {
949:         if (rp[i] > col) break;
950:         if (rp[i] == col) {
951:           bap = ap +  16*i;
952:           for (ii=0; ii<4; ii++,value+=stepval) {
953:             for (jj=ii; jj<16; jj+=4) {
954:               bap[jj] += *value++;
955:             }
956:           }
957:           goto noinsert2;
958:         }
959:       }
960:       N = nrow++ - 1;
961:       high++; /* added new column index thus must search to one higher than before */
962:       /* shift up all the later entries in this row */
963:       for (ii=N; ii>=i; ii--) {
964:         rp[ii+1] = rp[ii];
965:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966:       }
967:       if (N >= i) {
968:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969:       }
970:       rp[i] = col;
971:       bap   = ap +  16*i;
972:       for (ii=0; ii<4; ii++,value+=stepval) {
973:         for (jj=ii; jj<16; jj+=4) {
974:           bap[jj] = *value++;
975:         }
976:       }
977:       noinsert2:;
978:       low = i;
979:     }
980:     ailen[row] = nrow;
981:   }
982:   PetscFunctionReturnVoid();
983: }

985: #if defined(PETSC_HAVE_FORTRAN_CAPS)
986: #define matsetvalues4_ MATSETVALUES4
987: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988: #define matsetvalues4_ matsetvalues4
989: #endif

991: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
992: {
993:   Mat         A  = *AA;
994:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
995:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
996:   PetscInt    *ai=a->i,*ailen=a->ilen;
997:   PetscInt    *aj=a->j,brow,bcol;
998:   PetscInt    ridx,cidx,lastcol = -1;
999:   MatScalar   *ap,value,*aa=a->a,*bap;

1002:   for (k=0; k<m; k++) { /* loop over added rows */
1003:     row  = im[k]; brow = row/4;
1004:     rp   = aj + ai[brow];
1005:     ap   = aa + 16*ai[brow];
1006:     nrow = ailen[brow];
1007:     low  = 0;
1008:     high = nrow;
1009:     for (l=0; l<n; l++) { /* loop over added columns */
1010:       col   = in[l]; bcol = col/4;
1011:       ridx  = row % 4; cidx = col % 4;
1012:       value = v[l + k*n];
1013:       if (col <= lastcol)  low = 0;
1014:       else                high = nrow;
1015:       lastcol = col;
1016:       while (high-low > 7) {
1017:         t = (low+high)/2;
1018:         if (rp[t] > bcol) high = t;
1019:         else              low  = t;
1020:       }
1021:       for (i=low; i<high; i++) {
1022:         if (rp[i] > bcol) break;
1023:         if (rp[i] == bcol) {
1024:           bap   = ap +  16*i + 4*cidx + ridx;
1025:           *bap += value;
1026:           goto noinsert1;
1027:         }
1028:       }
1029:       N = nrow++ - 1;
1030:       high++; /* added new column thus must search to one higher than before */
1031:       /* shift up all the later entries in this row */
1032:       for (ii=N; ii>=i; ii--) {
1033:         rp[ii+1] = rp[ii];
1034:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1035:       }
1036:       if (N>=i) {
1037:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1038:       }
1039:       rp[i]                    = bcol;
1040:       ap[16*i + 4*cidx + ridx] = value;
1041: noinsert1:;
1042:       low = i;
1043:     }
1044:     ailen[brow] = nrow;
1045:   }
1046:   PetscFunctionReturnVoid();
1047: }

1049: /*
1050:      Checks for missing diagonals
1051: */
1052: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1053: {
1054:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1056:   PetscInt       *diag,*ii = a->i,i;

1059:   MatMarkDiagonal_SeqBAIJ(A);
1060:   *missing = PETSC_FALSE;
1061:   if (A->rmap->n > 0 && !ii) {
1062:     *missing = PETSC_TRUE;
1063:     if (d) *d = 0;
1064:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1065:   } else {
1066:     diag = a->diag;
1067:     for (i=0; i<a->mbs; i++) {
1068:       if (diag[i] >= ii[i+1]) {
1069:         *missing = PETSC_TRUE;
1070:         if (d) *d = i;
1071:         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1072:         break;
1073:       }
1074:     }
1075:   }
1076:   return(0);
1077: }

1079: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1080: {
1081:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1083:   PetscInt       i,j,m = a->mbs;

1086:   if (!a->diag) {
1087:     PetscMalloc1(m,&a->diag);
1088:     PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1089:     a->free_diag = PETSC_TRUE;
1090:   }
1091:   for (i=0; i<m; i++) {
1092:     a->diag[i] = a->i[i+1];
1093:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1094:       if (a->j[j] == i) {
1095:         a->diag[i] = j;
1096:         break;
1097:       }
1098:     }
1099:   }
1100:   return(0);
1101: }


1104: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1105: {
1106:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1108:   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1109:   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;

1112:   *nn = n;
1113:   if (!ia) return(0);
1114:   if (symmetric) {
1115:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1116:     nz   = tia[n];
1117:   } else {
1118:     tia = a->i; tja = a->j;
1119:   }

1121:   if (!blockcompressed && bs > 1) {
1122:     (*nn) *= bs;
1123:     /* malloc & create the natural set of indices */
1124:     PetscMalloc1((n+1)*bs,ia);
1125:     if (n) {
1126:       (*ia)[0] = oshift;
1127:       for (j=1; j<bs; j++) {
1128:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1129:       }
1130:     }

1132:     for (i=1; i<n; i++) {
1133:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1134:       for (j=1; j<bs; j++) {
1135:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1136:       }
1137:     }
1138:     if (n) {
1139:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1140:     }

1142:     if (inja) {
1143:       PetscMalloc1(nz*bs*bs,ja);
1144:       cnt = 0;
1145:       for (i=0; i<n; i++) {
1146:         for (j=0; j<bs; j++) {
1147:           for (k=tia[i]; k<tia[i+1]; k++) {
1148:             for (l=0; l<bs; l++) {
1149:               (*ja)[cnt++] = bs*tja[k] + l;
1150:             }
1151:           }
1152:         }
1153:       }
1154:     }

1156:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1157:       PetscFree(tia);
1158:       PetscFree(tja);
1159:     }
1160:   } else if (oshift == 1) {
1161:     if (symmetric) {
1162:       nz = tia[A->rmap->n/bs];
1163:       /*  add 1 to i and j indices */
1164:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1165:       *ia = tia;
1166:       if (ja) {
1167:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1168:         *ja = tja;
1169:       }
1170:     } else {
1171:       nz = a->i[A->rmap->n/bs];
1172:       /* malloc space and  add 1 to i and j indices */
1173:       PetscMalloc1(A->rmap->n/bs+1,ia);
1174:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1175:       if (ja) {
1176:         PetscMalloc1(nz,ja);
1177:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1178:       }
1179:     }
1180:   } else {
1181:     *ia = tia;
1182:     if (ja) *ja = tja;
1183:   }
1184:   return(0);
1185: }

1187: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1188: {

1192:   if (!ia) return(0);
1193:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1194:     PetscFree(*ia);
1195:     if (ja) {PetscFree(*ja);}
1196:   }
1197:   return(0);
1198: }

1200: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1201: {
1202:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1206: #if defined(PETSC_USE_LOG)
1207:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1208: #endif
1209:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1210:   ISDestroy(&a->row);
1211:   ISDestroy(&a->col);
1212:   if (a->free_diag) {PetscFree(a->diag);}
1213:   PetscFree(a->idiag);
1214:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1215:   PetscFree(a->solve_work);
1216:   PetscFree(a->mult_work);
1217:   PetscFree(a->sor_workt);
1218:   PetscFree(a->sor_work);
1219:   ISDestroy(&a->icol);
1220:   PetscFree(a->saved_values);
1221:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1223:   MatDestroy(&a->sbaijMat);
1224:   MatDestroy(&a->parent);
1225:   PetscFree(A->data);

1227:   PetscObjectChangeTypeName((PetscObject)A,0);
1228:   PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1229:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1230:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1231:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1232:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1233:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1234:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1235:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1236:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1237:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1238: #if defined(PETSC_HAVE_HYPRE)
1239:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);
1240: #endif
1241:   return(0);
1242: }

1244: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1245: {
1246:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1250:   switch (op) {
1251:   case MAT_ROW_ORIENTED:
1252:     a->roworiented = flg;
1253:     break;
1254:   case MAT_KEEP_NONZERO_PATTERN:
1255:     a->keepnonzeropattern = flg;
1256:     break;
1257:   case MAT_NEW_NONZERO_LOCATIONS:
1258:     a->nonew = (flg ? 0 : 1);
1259:     break;
1260:   case MAT_NEW_NONZERO_LOCATION_ERR:
1261:     a->nonew = (flg ? -1 : 0);
1262:     break;
1263:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1264:     a->nonew = (flg ? -2 : 0);
1265:     break;
1266:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1267:     a->nounused = (flg ? -1 : 0);
1268:     break;
1269:   case MAT_NEW_DIAGONALS:
1270:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1271:   case MAT_USE_HASH_TABLE:
1272:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1273:     break;
1274:   case MAT_SPD:
1275:   case MAT_SYMMETRIC:
1276:   case MAT_STRUCTURALLY_SYMMETRIC:
1277:   case MAT_HERMITIAN:
1278:   case MAT_SYMMETRY_ETERNAL:
1279:   case MAT_SUBMAT_SINGLEIS:
1280:   case MAT_STRUCTURE_ONLY:
1281:     /* These options are handled directly by MatSetOption() */
1282:     break;
1283:   default:
1284:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1285:   }
1286:   return(0);
1287: }

1289: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1290: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1291: {
1293:   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1294:   MatScalar      *aa_i;
1295:   PetscScalar    *v_i;

1298:   bs  = A->rmap->bs;
1299:   bs2 = bs*bs;
1300:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);

1302:   bn  = row/bs;   /* Block number */
1303:   bp  = row % bs; /* Block Position */
1304:   M   = ai[bn+1] - ai[bn];
1305:   *nz = bs*M;

1307:   if (v) {
1308:     *v = 0;
1309:     if (*nz) {
1310:       PetscMalloc1(*nz,v);
1311:       for (i=0; i<M; i++) { /* for each block in the block row */
1312:         v_i  = *v + i*bs;
1313:         aa_i = aa + bs2*(ai[bn] + i);
1314:         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1315:       }
1316:     }
1317:   }

1319:   if (idx) {
1320:     *idx = 0;
1321:     if (*nz) {
1322:       PetscMalloc1(*nz,idx);
1323:       for (i=0; i<M; i++) { /* for each block in the block row */
1324:         idx_i = *idx + i*bs;
1325:         itmp  = bs*aj[ai[bn] + i];
1326:         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1327:       }
1328:     }
1329:   }
1330:   return(0);
1331: }

1333: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1334: {
1335:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1339:   MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1340:   return(0);
1341: }

1343: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1344: {

1348:   if (idx) {PetscFree(*idx);}
1349:   if (v)   {PetscFree(*v);}
1350:   return(0);
1351: }

1353: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1354: {
1355:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1356:   Mat            C;
1358:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1359:   PetscInt       *rows,*cols,bs2=a->bs2;
1360:   MatScalar      *array;

1363:   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1364:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1365:     PetscCalloc1(1+nbs,&col);

1367:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1368:     MatCreate(PetscObjectComm((PetscObject)A),&C);
1369:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1370:     MatSetType(C,((PetscObject)A)->type_name);
1371:     MatSeqBAIJSetPreallocation(C,bs,0,col);
1372:     PetscFree(col);
1373:   } else {
1374:     C = *B;
1375:   }

1377:   array = a->a;
1378:   PetscMalloc2(bs,&rows,bs,&cols);
1379:   for (i=0; i<mbs; i++) {
1380:     cols[0] = i*bs;
1381:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1382:     len = ai[i+1] - ai[i];
1383:     for (j=0; j<len; j++) {
1384:       rows[0] = (*aj++)*bs;
1385:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1386:       MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1387:       array += bs2;
1388:     }
1389:   }
1390:   PetscFree2(rows,cols);

1392:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1393:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1395:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1396:     *B = C;
1397:   } else {
1398:     MatHeaderMerge(A,&C);
1399:   }
1400:   return(0);
1401: }

1403: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1404: {
1406:   Mat            Btrans;

1409:   *f   = PETSC_FALSE;
1410:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1411:   MatEqual_SeqBAIJ(B,Btrans,f);
1412:   MatDestroy(&Btrans);
1413:   return(0);
1414: }

1416: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1417: {
1418:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1420:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1421:   int            fd;
1422:   PetscScalar    *aa;
1423:   FILE           *file;

1426:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1427:   PetscMalloc1(4+A->rmap->N,&col_lens);
1428:   col_lens[0] = MAT_FILE_CLASSID;

1430:   col_lens[1] = A->rmap->N;
1431:   col_lens[2] = A->cmap->n;
1432:   col_lens[3] = a->nz*bs2;

1434:   /* store lengths of each row and write (including header) to file */
1435:   count = 0;
1436:   for (i=0; i<a->mbs; i++) {
1437:     for (j=0; j<bs; j++) {
1438:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1439:     }
1440:   }
1441:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1442:   PetscFree(col_lens);

1444:   /* store column indices (zero start index) */
1445:   PetscMalloc1((a->nz+1)*bs2,&jj);
1446:   count = 0;
1447:   for (i=0; i<a->mbs; i++) {
1448:     for (j=0; j<bs; j++) {
1449:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1450:         for (l=0; l<bs; l++) {
1451:           jj[count++] = bs*a->j[k] + l;
1452:         }
1453:       }
1454:     }
1455:   }
1456:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1457:   PetscFree(jj);

1459:   /* store nonzero values */
1460:   PetscMalloc1((a->nz+1)*bs2,&aa);
1461:   count = 0;
1462:   for (i=0; i<a->mbs; i++) {
1463:     for (j=0; j<bs; j++) {
1464:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1465:         for (l=0; l<bs; l++) {
1466:           aa[count++] = a->a[bs2*k + l*bs + j];
1467:         }
1468:       }
1469:     }
1470:   }
1471:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1472:   PetscFree(aa);

1474:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1475:   if (file) {
1476:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1477:   }
1478:   return(0);
1479: }

1481: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1482: {
1484:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485:   PetscInt       i,bs = A->rmap->bs,k;

1488:   PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1489:   for (i=0; i<a->mbs; i++) {
1490:     PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);
1491:     for (k=a->i[i]; k<a->i[i+1]; k++) {
1492:       PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);
1493:     }
1494:     PetscViewerASCIIPrintf(viewer,"\n");
1495:   }
1496:   PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1497:   return(0);
1498: }

1500: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1501: {
1502:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1503:   PetscErrorCode    ierr;
1504:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1505:   PetscViewerFormat format;

1508:   if (A->structure_only) {
1509:     MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1510:     return(0);
1511:   }

1513:   PetscViewerGetFormat(viewer,&format);
1514:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1515:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1516:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1517:     const char *matname;
1518:     Mat        aij;
1519:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1520:     PetscObjectGetName((PetscObject)A,&matname);
1521:     PetscObjectSetName((PetscObject)aij,matname);
1522:     MatView(aij,viewer);
1523:     MatDestroy(&aij);
1524:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1525:       return(0);
1526:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1527:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1528:     for (i=0; i<a->mbs; i++) {
1529:       for (j=0; j<bs; j++) {
1530:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1531:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1532:           for (l=0; l<bs; l++) {
1533: #if defined(PETSC_USE_COMPLEX)
1534:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535:               PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1536:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1537:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538:               PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1539:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1540:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1542:             }
1543: #else
1544:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1545:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1546:             }
1547: #endif
1548:           }
1549:         }
1550:         PetscViewerASCIIPrintf(viewer,"\n");
1551:       }
1552:     }
1553:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1554:   } else {
1555:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1556:     for (i=0; i<a->mbs; i++) {
1557:       for (j=0; j<bs; j++) {
1558:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1559:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1560:           for (l=0; l<bs; l++) {
1561: #if defined(PETSC_USE_COMPLEX)
1562:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1563:               PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1564:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1565:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1566:               PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1567:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1568:             } else {
1569:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1570:             }
1571: #else
1572:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1573: #endif
1574:           }
1575:         }
1576:         PetscViewerASCIIPrintf(viewer,"\n");
1577:       }
1578:     }
1579:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1580:   }
1581:   PetscViewerFlush(viewer);
1582:   return(0);
1583: }

1585:  #include <petscdraw.h>
1586: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1587: {
1588:   Mat               A = (Mat) Aa;
1589:   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1590:   PetscErrorCode    ierr;
1591:   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1592:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1593:   MatScalar         *aa;
1594:   PetscViewer       viewer;
1595:   PetscViewerFormat format;

1598:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1599:   PetscViewerGetFormat(viewer,&format);
1600:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1602:   /* loop over matrix elements drawing boxes */

1604:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1605:     PetscDrawCollectiveBegin(draw);
1606:     /* Blue for negative, Cyan for zero and  Red for positive */
1607:     color = PETSC_DRAW_BLUE;
1608:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1609:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1610:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1611:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1612:         aa  = a->a + j*bs2;
1613:         for (k=0; k<bs; k++) {
1614:           for (l=0; l<bs; l++) {
1615:             if (PetscRealPart(*aa++) >=  0.) continue;
1616:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1617:           }
1618:         }
1619:       }
1620:     }
1621:     color = PETSC_DRAW_CYAN;
1622:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1623:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1624:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1625:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1626:         aa  = a->a + j*bs2;
1627:         for (k=0; k<bs; k++) {
1628:           for (l=0; l<bs; l++) {
1629:             if (PetscRealPart(*aa++) != 0.) continue;
1630:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1631:           }
1632:         }
1633:       }
1634:     }
1635:     color = PETSC_DRAW_RED;
1636:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1637:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1638:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1639:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1640:         aa  = a->a + j*bs2;
1641:         for (k=0; k<bs; k++) {
1642:           for (l=0; l<bs; l++) {
1643:             if (PetscRealPart(*aa++) <= 0.) continue;
1644:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1645:           }
1646:         }
1647:       }
1648:     }
1649:     PetscDrawCollectiveEnd(draw);
1650:   } else {
1651:     /* use contour shading to indicate magnitude of values */
1652:     /* first determine max of all nonzero values */
1653:     PetscReal minv = 0.0, maxv = 0.0;
1654:     PetscDraw popup;

1656:     for (i=0; i<a->nz*a->bs2; i++) {
1657:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1658:     }
1659:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1660:     PetscDrawGetPopup(draw,&popup);
1661:     PetscDrawScalePopup(popup,0.0,maxv);

1663:     PetscDrawCollectiveBegin(draw);
1664:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1665:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1666:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1667:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1668:         aa  = a->a + j*bs2;
1669:         for (k=0; k<bs; k++) {
1670:           for (l=0; l<bs; l++) {
1671:             MatScalar v = *aa++;
1672:             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1673:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1674:           }
1675:         }
1676:       }
1677:     }
1678:     PetscDrawCollectiveEnd(draw);
1679:   }
1680:   return(0);
1681: }

1683: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1684: {
1686:   PetscReal      xl,yl,xr,yr,w,h;
1687:   PetscDraw      draw;
1688:   PetscBool      isnull;

1691:   PetscViewerDrawGetDraw(viewer,0,&draw);
1692:   PetscDrawIsNull(draw,&isnull);
1693:   if (isnull) return(0);

1695:   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1696:   xr  += w;          yr += h;        xl = -w;     yl = -h;
1697:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1698:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1699:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1700:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1701:   PetscDrawSave(draw);
1702:   return(0);
1703: }

1705: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1706: {
1708:   PetscBool      iascii,isbinary,isdraw;

1711:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1712:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1713:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1714:   if (iascii) {
1715:     MatView_SeqBAIJ_ASCII(A,viewer);
1716:   } else if (isbinary) {
1717:     MatView_SeqBAIJ_Binary(A,viewer);
1718:   } else if (isdraw) {
1719:     MatView_SeqBAIJ_Draw(A,viewer);
1720:   } else {
1721:     Mat B;
1722:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1723:     MatView(B,viewer);
1724:     MatDestroy(&B);
1725:   }
1726:   return(0);
1727: }


1730: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1731: {
1732:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1733:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1734:   PetscInt    *ai = a->i,*ailen = a->ilen;
1735:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1736:   MatScalar   *ap,*aa = a->a;

1739:   for (k=0; k<m; k++) { /* loop over rows */
1740:     row = im[k]; brow = row/bs;
1741:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1742:     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1743:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1744:     nrow = ailen[brow];
1745:     for (l=0; l<n; l++) { /* loop over columns */
1746:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1747:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1748:       col  = in[l];
1749:       bcol = col/bs;
1750:       cidx = col%bs;
1751:       ridx = row%bs;
1752:       high = nrow;
1753:       low  = 0; /* assume unsorted */
1754:       while (high-low > 5) {
1755:         t = (low+high)/2;
1756:         if (rp[t] > bcol) high = t;
1757:         else             low  = t;
1758:       }
1759:       for (i=low; i<high; i++) {
1760:         if (rp[i] > bcol) break;
1761:         if (rp[i] == bcol) {
1762:           *v++ = ap[bs2*i+bs*cidx+ridx];
1763:           goto finished;
1764:         }
1765:       }
1766:       *v++ = 0.0;
1767: finished:;
1768:     }
1769:   }
1770:   return(0);
1771: }

1773: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1774: {
1775:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1776:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1777:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1778:   PetscErrorCode    ierr;
1779:   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1780:   PetscBool         roworiented=a->roworiented;
1781:   const PetscScalar *value     = v;
1782:   MatScalar         *ap=NULL,*aa = a->a,*bap;

1785:   if (roworiented) {
1786:     stepval = (n-1)*bs;
1787:   } else {
1788:     stepval = (m-1)*bs;
1789:   }
1790:   for (k=0; k<m; k++) { /* loop over added rows */
1791:     row = im[k];
1792:     if (row < 0) continue;
1793: #if defined(PETSC_USE_DEBUG)
1794:     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1795: #endif
1796:     rp   = aj + ai[row];
1797:     if (!A->structure_only) ap = aa + bs2*ai[row];
1798:     rmax = imax[row];
1799:     nrow = ailen[row];
1800:     low  = 0;
1801:     high = nrow;
1802:     for (l=0; l<n; l++) { /* loop over added columns */
1803:       if (in[l] < 0) continue;
1804: #if defined(PETSC_USE_DEBUG)
1805:       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1806: #endif
1807:       col = in[l];
1808:       if (!A->structure_only) {
1809:         if (roworiented) {
1810:           value = v + (k*(stepval+bs) + l)*bs;
1811:         } else {
1812:           value = v + (l*(stepval+bs) + k)*bs;
1813:         }
1814:       }
1815:       if (col <= lastcol) low = 0;
1816:       else high = nrow;
1817:       lastcol = col;
1818:       while (high-low > 7) {
1819:         t = (low+high)/2;
1820:         if (rp[t] > col) high = t;
1821:         else             low  = t;
1822:       }
1823:       for (i=low; i<high; i++) {
1824:         if (rp[i] > col) break;
1825:         if (rp[i] == col) {
1826:           if (A->structure_only) goto noinsert2;
1827:           bap = ap +  bs2*i;
1828:           if (roworiented) {
1829:             if (is == ADD_VALUES) {
1830:               for (ii=0; ii<bs; ii++,value+=stepval) {
1831:                 for (jj=ii; jj<bs2; jj+=bs) {
1832:                   bap[jj] += *value++;
1833:                 }
1834:               }
1835:             } else {
1836:               for (ii=0; ii<bs; ii++,value+=stepval) {
1837:                 for (jj=ii; jj<bs2; jj+=bs) {
1838:                   bap[jj] = *value++;
1839:                 }
1840:               }
1841:             }
1842:           } else {
1843:             if (is == ADD_VALUES) {
1844:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1845:                 for (jj=0; jj<bs; jj++) {
1846:                   bap[jj] += value[jj];
1847:                 }
1848:                 bap += bs;
1849:               }
1850:             } else {
1851:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1852:                 for (jj=0; jj<bs; jj++) {
1853:                   bap[jj]  = value[jj];
1854:                 }
1855:                 bap += bs;
1856:               }
1857:             }
1858:           }
1859:           goto noinsert2;
1860:         }
1861:       }
1862:       if (nonew == 1) goto noinsert2;
1863:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1864:       if (A->structure_only) {
1865:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1866:       } else {
1867:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868:       }
1869:       N = nrow++ - 1; high++;
1870:       /* shift up all the later entries in this row */
1871:       for (ii=N; ii>=i; ii--) {
1872:         rp[ii+1] = rp[ii];
1873:         if (!A->structure_only) {
1874:           PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1875:         }
1876:       }
1877:       if (N >= i && !A->structure_only) {
1878:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1879:       }

1881:       rp[i] = col;
1882:       if (!A->structure_only) {
1883:         bap   = ap +  bs2*i;
1884:         if (roworiented) {
1885:           for (ii=0; ii<bs; ii++,value+=stepval) {
1886:             for (jj=ii; jj<bs2; jj+=bs) {
1887:               bap[jj] = *value++;
1888:             }
1889:           }
1890:         } else {
1891:           for (ii=0; ii<bs; ii++,value+=stepval) {
1892:             for (jj=0; jj<bs; jj++) {
1893:               *bap++ = *value++;
1894:             }
1895:           }
1896:         }
1897:       }
1898: noinsert2:;
1899:       low = i;
1900:     }
1901:     ailen[row] = nrow;
1902:   }
1903:   return(0);
1904: }

1906: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1907: {
1908:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1909:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1910:   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1912:   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1913:   MatScalar      *aa  = a->a,*ap;
1914:   PetscReal      ratio=0.6;

1917:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1919:   if (m) rmax = ailen[0];
1920:   for (i=1; i<mbs; i++) {
1921:     /* move each row back by the amount of empty slots (fshift) before it*/
1922:     fshift += imax[i-1] - ailen[i-1];
1923:     rmax    = PetscMax(rmax,ailen[i]);
1924:     if (fshift) {
1925:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1926:       N  = ailen[i];
1927:       for (j=0; j<N; j++) {
1928:         ip[j-fshift] = ip[j];
1929:         if (!A->structure_only) {
1930:           PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1931:         }
1932:       }
1933:     }
1934:     ai[i] = ai[i-1] + ailen[i-1];
1935:   }
1936:   if (mbs) {
1937:     fshift += imax[mbs-1] - ailen[mbs-1];
1938:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1939:   }

1941:   /* reset ilen and imax for each row */
1942:   a->nonzerorowcnt = 0;
1943:   if (A->structure_only) {
1944:     PetscFree2(a->imax,a->ilen);
1945:   } else { /* !A->structure_only */
1946:     for (i=0; i<mbs; i++) {
1947:       ailen[i] = imax[i] = ai[i+1] - ai[i];
1948:       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1949:     }
1950:   }
1951:   a->nz = ai[mbs];

1953:   /* diagonals may have moved, so kill the diagonal pointers */
1954:   a->idiagvalid = PETSC_FALSE;
1955:   if (fshift && a->diag) {
1956:     PetscFree(a->diag);
1957:     PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1958:     a->diag = 0;
1959:   }
1960:   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1961:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1962:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1963:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);

1965:   A->info.mallocs    += a->reallocs;
1966:   a->reallocs         = 0;
1967:   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1968:   a->rmax             = rmax;

1970:   if (!A->structure_only) {
1971:     MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1972:   }
1973:   return(0);
1974: }

1976: /*
1977:    This function returns an array of flags which indicate the locations of contiguous
1978:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1979:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1980:    Assume: sizes should be long enough to hold all the values.
1981: */
1982: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1983: {
1984:   PetscInt  i,j,k,row;
1985:   PetscBool flg;

1988:   for (i=0,j=0; i<n; j++) {
1989:     row = idx[i];
1990:     if (row%bs!=0) { /* Not the begining of a block */
1991:       sizes[j] = 1;
1992:       i++;
1993:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1994:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1995:       i++;
1996:     } else { /* Begining of the block, so check if the complete block exists */
1997:       flg = PETSC_TRUE;
1998:       for (k=1; k<bs; k++) {
1999:         if (row+k != idx[i+k]) { /* break in the block */
2000:           flg = PETSC_FALSE;
2001:           break;
2002:         }
2003:       }
2004:       if (flg) { /* No break in the bs */
2005:         sizes[j] = bs;
2006:         i       += bs;
2007:       } else {
2008:         sizes[j] = 1;
2009:         i++;
2010:       }
2011:     }
2012:   }
2013:   *bs_max = j;
2014:   return(0);
2015: }

2017: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2018: {
2019:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2020:   PetscErrorCode    ierr;
2021:   PetscInt          i,j,k,count,*rows;
2022:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2023:   PetscScalar       zero = 0.0;
2024:   MatScalar         *aa;
2025:   const PetscScalar *xx;
2026:   PetscScalar       *bb;

2029:   /* fix right hand side if needed */
2030:   if (x && b) {
2031:     VecGetArrayRead(x,&xx);
2032:     VecGetArray(b,&bb);
2033:     for (i=0; i<is_n; i++) {
2034:       bb[is_idx[i]] = diag*xx[is_idx[i]];
2035:     }
2036:     VecRestoreArrayRead(x,&xx);
2037:     VecRestoreArray(b,&bb);
2038:   }

2040:   /* Make a copy of the IS and  sort it */
2041:   /* allocate memory for rows,sizes */
2042:   PetscMalloc2(is_n,&rows,2*is_n,&sizes);

2044:   /* copy IS values to rows, and sort them */
2045:   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2046:   PetscSortInt(is_n,rows);

2048:   if (baij->keepnonzeropattern) {
2049:     for (i=0; i<is_n; i++) sizes[i] = 1;
2050:     bs_max          = is_n;
2051:   } else {
2052:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2053:     A->nonzerostate++;
2054:   }

2056:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2057:     row = rows[j];
2058:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2059:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2060:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2061:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2062:       if (diag != (PetscScalar)0.0) {
2063:         if (baij->ilen[row/bs] > 0) {
2064:           baij->ilen[row/bs]       = 1;
2065:           baij->j[baij->i[row/bs]] = row/bs;

2067:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2068:         }
2069:         /* Now insert all the diagonal values for this bs */
2070:         for (k=0; k<bs; k++) {
2071:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2072:         }
2073:       } else { /* (diag == 0.0) */
2074:         baij->ilen[row/bs] = 0;
2075:       } /* end (diag == 0.0) */
2076:     } else { /* (sizes[i] != bs) */
2077: #if defined(PETSC_USE_DEBUG)
2078:       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2079: #endif
2080:       for (k=0; k<count; k++) {
2081:         aa[0] =  zero;
2082:         aa   += bs;
2083:       }
2084:       if (diag != (PetscScalar)0.0) {
2085:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2086:       }
2087:     }
2088:   }

2090:   PetscFree2(rows,sizes);
2091:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2092:   return(0);
2093: }

2095: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2096: {
2097:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2098:   PetscErrorCode    ierr;
2099:   PetscInt          i,j,k,count;
2100:   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2101:   PetscScalar       zero = 0.0;
2102:   MatScalar         *aa;
2103:   const PetscScalar *xx;
2104:   PetscScalar       *bb;
2105:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2108:   /* fix right hand side if needed */
2109:   if (x && b) {
2110:     VecGetArrayRead(x,&xx);
2111:     VecGetArray(b,&bb);
2112:     vecs = PETSC_TRUE;
2113:   }

2115:   /* zero the columns */
2116:   PetscCalloc1(A->rmap->n,&zeroed);
2117:   for (i=0; i<is_n; i++) {
2118:     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2119:     zeroed[is_idx[i]] = PETSC_TRUE;
2120:   }
2121:   for (i=0; i<A->rmap->N; i++) {
2122:     if (!zeroed[i]) {
2123:       row = i/bs;
2124:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2125:         for (k=0; k<bs; k++) {
2126:           col = bs*baij->j[j] + k;
2127:           if (zeroed[col]) {
2128:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2129:             if (vecs) bb[i] -= aa[0]*xx[col];
2130:             aa[0] = 0.0;
2131:           }
2132:         }
2133:       }
2134:     } else if (vecs) bb[i] = diag*xx[i];
2135:   }
2136:   PetscFree(zeroed);
2137:   if (vecs) {
2138:     VecRestoreArrayRead(x,&xx);
2139:     VecRestoreArray(b,&bb);
2140:   }

2142:   /* zero the rows */
2143:   for (i=0; i<is_n; i++) {
2144:     row   = is_idx[i];
2145:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2146:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2147:     for (k=0; k<count; k++) {
2148:       aa[0] =  zero;
2149:       aa   += bs;
2150:     }
2151:     if (diag != (PetscScalar)0.0) {
2152:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2153:     }
2154:   }
2155:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2156:   return(0);
2157: }

2159: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2160: {
2161:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2162:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2163:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2164:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2166:   PetscInt       ridx,cidx,bs2=a->bs2;
2167:   PetscBool      roworiented=a->roworiented;
2168:   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;

2171:   for (k=0; k<m; k++) { /* loop over added rows */
2172:     row  = im[k];
2173:     brow = row/bs;
2174:     if (row < 0) continue;
2175: #if defined(PETSC_USE_DEBUG)
2176:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2177: #endif
2178:     rp   = aj + ai[brow];
2179:     if (!A->structure_only) ap = aa + bs2*ai[brow];
2180:     rmax = imax[brow];
2181:     nrow = ailen[brow];
2182:     low  = 0;
2183:     high = nrow;
2184:     for (l=0; l<n; l++) { /* loop over added columns */
2185:       if (in[l] < 0) continue;
2186: #if defined(PETSC_USE_DEBUG)
2187:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2188: #endif
2189:       col  = in[l]; bcol = col/bs;
2190:       ridx = row % bs; cidx = col % bs;
2191:       if (!A->structure_only) {
2192:         if (roworiented) {
2193:           value = v[l + k*n];
2194:         } else {
2195:           value = v[k + l*m];
2196:         }
2197:       }
2198:       if (col <= lastcol) low = 0; else high = nrow;
2199:       lastcol = col;
2200:       while (high-low > 7) {
2201:         t = (low+high)/2;
2202:         if (rp[t] > bcol) high = t;
2203:         else              low  = t;
2204:       }
2205:       for (i=low; i<high; i++) {
2206:         if (rp[i] > bcol) break;
2207:         if (rp[i] == bcol) {
2208:           bap = ap +  bs2*i + bs*cidx + ridx;
2209:           if (!A->structure_only) {
2210:             if (is == ADD_VALUES) *bap += value;
2211:             else                  *bap  = value;
2212:           }
2213:           goto noinsert1;
2214:         }
2215:       }
2216:       if (nonew == 1) goto noinsert1;
2217:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2218:       if (A->structure_only) {
2219:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2220:       } else {
2221:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2222:       }
2223:       N = nrow++ - 1; high++;
2224:       /* shift up all the later entries in this row */
2225:       for (ii=N; ii>=i; ii--) {
2226:         rp[ii+1] = rp[ii];
2227:         if (!A->structure_only) {
2228:           PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2229:         }
2230:       }
2231:       if (N>=i && !A->structure_only) {
2232:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2233:       }
2234:       rp[i]                      = bcol;
2235:       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2236:       a->nz++;
2237:       A->nonzerostate++;
2238: noinsert1:;
2239:       low = i;
2240:     }
2241:     ailen[brow] = nrow;
2242:   }
2243:   return(0);
2244: }

2246: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2247: {
2248:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2249:   Mat            outA;
2251:   PetscBool      row_identity,col_identity;

2254:   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2255:   ISIdentity(row,&row_identity);
2256:   ISIdentity(col,&col_identity);
2257:   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");

2259:   outA            = inA;
2260:   inA->factortype = MAT_FACTOR_LU;
2261:   PetscFree(inA->solvertype);
2262:   PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);

2264:   MatMarkDiagonal_SeqBAIJ(inA);

2266:   PetscObjectReference((PetscObject)row);
2267:   ISDestroy(&a->row);
2268:   a->row = row;
2269:   PetscObjectReference((PetscObject)col);
2270:   ISDestroy(&a->col);
2271:   a->col = col;

2273:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2274:   ISDestroy(&a->icol);
2275:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2276:   PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);

2278:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2279:   if (!a->solve_work) {
2280:     PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2281:     PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2282:   }
2283:   MatLUFactorNumeric(outA,inA,info);
2284:   return(0);
2285: }

2287: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2288: {
2289:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2290:   PetscInt    i,nz,mbs;

2293:   nz  = baij->maxnz;
2294:   mbs = baij->mbs;
2295:   for (i=0; i<nz; i++) {
2296:     baij->j[i] = indices[i];
2297:   }
2298:   baij->nz = nz;
2299:   for (i=0; i<mbs; i++) {
2300:     baij->ilen[i] = baij->imax[i];
2301:   }
2302:   return(0);
2303: }

2305: /*@
2306:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2307:        in the matrix.

2309:   Input Parameters:
2310: +  mat - the SeqBAIJ matrix
2311: -  indices - the column indices

2313:   Level: advanced

2315:   Notes:
2316:     This can be called if you have precomputed the nonzero structure of the
2317:   matrix and want to provide it to the matrix object to improve the performance
2318:   of the MatSetValues() operation.

2320:     You MUST have set the correct numbers of nonzeros per row in the call to
2321:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2323:     MUST be called before any calls to MatSetValues();

2325: @*/
2326: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2327: {

2333:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2334:   return(0);
2335: }

2337: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2338: {
2339:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2341:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2342:   PetscReal      atmp;
2343:   PetscScalar    *x,zero = 0.0;
2344:   MatScalar      *aa;
2345:   PetscInt       ncols,brow,krow,kcol;

2348:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2349:   bs  = A->rmap->bs;
2350:   aa  = a->a;
2351:   ai  = a->i;
2352:   aj  = a->j;
2353:   mbs = a->mbs;

2355:   VecSet(v,zero);
2356:   VecGetArray(v,&x);
2357:   VecGetLocalSize(v,&n);
2358:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2359:   for (i=0; i<mbs; i++) {
2360:     ncols = ai[1] - ai[0]; ai++;
2361:     brow  = bs*i;
2362:     for (j=0; j<ncols; j++) {
2363:       for (kcol=0; kcol<bs; kcol++) {
2364:         for (krow=0; krow<bs; krow++) {
2365:           atmp = PetscAbsScalar(*aa);aa++;
2366:           row  = brow + krow;   /* row index */
2367:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2368:         }
2369:       }
2370:       aj++;
2371:     }
2372:   }
2373:   VecRestoreArray(v,&x);
2374:   return(0);
2375: }

2377: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2378: {

2382:   /* If the two matrices have the same copy implementation, use fast copy. */
2383:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2384:     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2385:     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2386:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2388:     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2389:     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2390:     PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2391:     PetscObjectStateIncrease((PetscObject)B);
2392:   } else {
2393:     MatCopy_Basic(A,B,str);
2394:   }
2395:   return(0);
2396: }

2398: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2399: {

2403:   MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);
2404:   return(0);
2405: }

2407: PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2408: {
2409:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

2412:   *array = a->a;
2413:   return(0);
2414: }

2416: PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2417: {
2419:   return(0);
2420: }

2422: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2423: {
2424:   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2425:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2426:   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;

2430:   /* Set the number of nonzeros in the new matrix */
2431:   MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2432:   return(0);
2433: }

2435: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2436: {
2437:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2439:   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2440:   PetscBLASInt   one=1;

2443:   if (str == SAME_NONZERO_PATTERN) {
2444:     PetscScalar  alpha = a;
2445:     PetscBLASInt bnz;
2446:     PetscBLASIntCast(x->nz*bs2,&bnz);
2447:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2448:     PetscObjectStateIncrease((PetscObject)Y);
2449:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2450:     MatAXPY_Basic(Y,a,X,str);
2451:   } else {
2452:     Mat      B;
2453:     PetscInt *nnz;
2454:     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2455:     PetscMalloc1(Y->rmap->N,&nnz);
2456:     MatCreate(PetscObjectComm((PetscObject)Y),&B);
2457:     PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2458:     MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2459:     MatSetBlockSizesFromMats(B,Y,Y);
2460:     MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2461:     MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2462:     MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2463:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2464:     MatHeaderReplace(Y,&B);
2465:     PetscFree(nnz);
2466:   }
2467:   return(0);
2468: }

2470: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2471: {
2472:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2473:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2474:   MatScalar   *aa = a->a;

2477:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2478:   return(0);
2479: }

2481: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2482: {
2483:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2484:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2485:   MatScalar   *aa = a->a;

2488:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2489:   return(0);
2490: }

2492: /*
2493:     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2494: */
2495: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2496: {
2497:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2499:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2500:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2503:   *nn = n;
2504:   if (!ia) return(0);
2505:   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2506:   else {
2507:     PetscCalloc1(n+1,&collengths);
2508:     PetscMalloc1(n+1,&cia);
2509:     PetscMalloc1(nz+1,&cja);
2510:     jj   = a->j;
2511:     for (i=0; i<nz; i++) {
2512:       collengths[jj[i]]++;
2513:     }
2514:     cia[0] = oshift;
2515:     for (i=0; i<n; i++) {
2516:       cia[i+1] = cia[i] + collengths[i];
2517:     }
2518:     PetscMemzero(collengths,n*sizeof(PetscInt));
2519:     jj   = a->j;
2520:     for (row=0; row<m; row++) {
2521:       mr = a->i[row+1] - a->i[row];
2522:       for (i=0; i<mr; i++) {
2523:         col = *jj++;

2525:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2526:       }
2527:     }
2528:     PetscFree(collengths);
2529:     *ia  = cia; *ja = cja;
2530:   }
2531:   return(0);
2532: }

2534: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2535: {

2539:   if (!ia) return(0);
2540:   PetscFree(*ia);
2541:   PetscFree(*ja);
2542:   return(0);
2543: }

2545: /*
2546:  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2547:  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2548:  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2549:  */
2550: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2551: {
2552:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2554:   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2555:   PetscInt       nz = a->i[m],row,*jj,mr,col;
2556:   PetscInt       *cspidx;

2559:   *nn = n;
2560:   if (!ia) return(0);

2562:   PetscCalloc1(n+1,&collengths);
2563:   PetscMalloc1(n+1,&cia);
2564:   PetscMalloc1(nz+1,&cja);
2565:   PetscMalloc1(nz+1,&cspidx);
2566:   jj   = a->j;
2567:   for (i=0; i<nz; i++) {
2568:     collengths[jj[i]]++;
2569:   }
2570:   cia[0] = oshift;
2571:   for (i=0; i<n; i++) {
2572:     cia[i+1] = cia[i] + collengths[i];
2573:   }
2574:   PetscMemzero(collengths,n*sizeof(PetscInt));
2575:   jj   = a->j;
2576:   for (row=0; row<m; row++) {
2577:     mr = a->i[row+1] - a->i[row];
2578:     for (i=0; i<mr; i++) {
2579:       col = *jj++;
2580:       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2581:       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2582:     }
2583:   }
2584:   PetscFree(collengths);
2585:   *ia    = cia; *ja = cja;
2586:   *spidx = cspidx;
2587:   return(0);
2588: }

2590: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2591: {

2595:   MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2596:   PetscFree(*spidx);
2597:   return(0);
2598: }

2600: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2601: {
2603:   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;

2606:   if (!Y->preallocated || !aij->nz) {
2607:     MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2608:   }
2609:   MatShift_Basic(Y,a);
2610:   return(0);
2611: }

2613: /* -------------------------------------------------------------------*/
2614: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2615:                                        MatGetRow_SeqBAIJ,
2616:                                        MatRestoreRow_SeqBAIJ,
2617:                                        MatMult_SeqBAIJ_N,
2618:                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2619:                                        MatMultTranspose_SeqBAIJ,
2620:                                        MatMultTransposeAdd_SeqBAIJ,
2621:                                        0,
2622:                                        0,
2623:                                        0,
2624:                                /* 10*/ 0,
2625:                                        MatLUFactor_SeqBAIJ,
2626:                                        0,
2627:                                        0,
2628:                                        MatTranspose_SeqBAIJ,
2629:                                /* 15*/ MatGetInfo_SeqBAIJ,
2630:                                        MatEqual_SeqBAIJ,
2631:                                        MatGetDiagonal_SeqBAIJ,
2632:                                        MatDiagonalScale_SeqBAIJ,
2633:                                        MatNorm_SeqBAIJ,
2634:                                /* 20*/ 0,
2635:                                        MatAssemblyEnd_SeqBAIJ,
2636:                                        MatSetOption_SeqBAIJ,
2637:                                        MatZeroEntries_SeqBAIJ,
2638:                                /* 24*/ MatZeroRows_SeqBAIJ,
2639:                                        0,
2640:                                        0,
2641:                                        0,
2642:                                        0,
2643:                                /* 29*/ MatSetUp_SeqBAIJ,
2644:                                        0,
2645:                                        0,
2646:                                        0,
2647:                                        0,
2648:                                /* 34*/ MatDuplicate_SeqBAIJ,
2649:                                        0,
2650:                                        0,
2651:                                        MatILUFactor_SeqBAIJ,
2652:                                        0,
2653:                                /* 39*/ MatAXPY_SeqBAIJ,
2654:                                        MatCreateSubMatrices_SeqBAIJ,
2655:                                        MatIncreaseOverlap_SeqBAIJ,
2656:                                        MatGetValues_SeqBAIJ,
2657:                                        MatCopy_SeqBAIJ,
2658:                                /* 44*/ 0,
2659:                                        MatScale_SeqBAIJ,
2660:                                        MatShift_SeqBAIJ,
2661:                                        0,
2662:                                        MatZeroRowsColumns_SeqBAIJ,
2663:                                /* 49*/ 0,
2664:                                        MatGetRowIJ_SeqBAIJ,
2665:                                        MatRestoreRowIJ_SeqBAIJ,
2666:                                        MatGetColumnIJ_SeqBAIJ,
2667:                                        MatRestoreColumnIJ_SeqBAIJ,
2668:                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2669:                                        0,
2670:                                        0,
2671:                                        0,
2672:                                        MatSetValuesBlocked_SeqBAIJ,
2673:                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2674:                                        MatDestroy_SeqBAIJ,
2675:                                        MatView_SeqBAIJ,
2676:                                        0,
2677:                                        0,
2678:                                /* 64*/ 0,
2679:                                        0,
2680:                                        0,
2681:                                        0,
2682:                                        0,
2683:                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2684:                                        0,
2685:                                        MatConvert_Basic,
2686:                                        0,
2687:                                        0,
2688:                                /* 74*/ 0,
2689:                                        MatFDColoringApply_BAIJ,
2690:                                        0,
2691:                                        0,
2692:                                        0,
2693:                                /* 79*/ 0,
2694:                                        0,
2695:                                        0,
2696:                                        0,
2697:                                        MatLoad_SeqBAIJ,
2698:                                /* 84*/ 0,
2699:                                        0,
2700:                                        0,
2701:                                        0,
2702:                                        0,
2703:                                /* 89*/ 0,
2704:                                        0,
2705:                                        0,
2706:                                        0,
2707:                                        0,
2708:                                /* 94*/ 0,
2709:                                        0,
2710:                                        0,
2711:                                        0,
2712:                                        0,
2713:                                /* 99*/ 0,
2714:                                        0,
2715:                                        0,
2716:                                        0,
2717:                                        0,
2718:                                /*104*/ 0,
2719:                                        MatRealPart_SeqBAIJ,
2720:                                        MatImaginaryPart_SeqBAIJ,
2721:                                        0,
2722:                                        0,
2723:                                /*109*/ 0,
2724:                                        0,
2725:                                        0,
2726:                                        0,
2727:                                        MatMissingDiagonal_SeqBAIJ,
2728:                                /*114*/ 0,
2729:                                        0,
2730:                                        0,
2731:                                        0,
2732:                                        0,
2733:                                /*119*/ 0,
2734:                                        0,
2735:                                        MatMultHermitianTranspose_SeqBAIJ,
2736:                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2737:                                        0,
2738:                                /*124*/ 0,
2739:                                        0,
2740:                                        MatInvertBlockDiagonal_SeqBAIJ,
2741:                                        0,
2742:                                        0,
2743:                                /*129*/ 0,
2744:                                        0,
2745:                                        0,
2746:                                        0,
2747:                                        0,
2748:                                /*134*/ 0,
2749:                                        0,
2750:                                        0,
2751:                                        0,
2752:                                        0,
2753:                                /*139*/ MatSetBlockSizes_Default,
2754:                                        0,
2755:                                        0,
2756:                                        MatFDColoringSetUp_SeqXAIJ,
2757:                                        0,
2758:                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2759: };

2761: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2762: {
2763:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2764:   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;

2768:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

2770:   /* allocate space for values if not already there */
2771:   if (!aij->saved_values) {
2772:     PetscMalloc1(nz+1,&aij->saved_values);
2773:     PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2774:   }

2776:   /* copy values over */
2777:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2778:   return(0);
2779: }

2781: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2782: {
2783:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2785:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;

2788:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2789:   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");

2791:   /* copy values over */
2792:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2793:   return(0);
2794: }

2796: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2797: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);

2799: static PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2800: {
2801:   Mat_SeqBAIJ    *b;
2803:   PetscInt       i,mbs,nbs,bs2;
2804:   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;

2807:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2808:   if (nz == MAT_SKIP_ALLOCATION) {
2809:     skipallocation = PETSC_TRUE;
2810:     nz             = 0;
2811:   }

2813:   MatSetBlockSize(B,PetscAbs(bs));
2814:   PetscLayoutSetUp(B->rmap);
2815:   PetscLayoutSetUp(B->cmap);
2816:   PetscLayoutGetBlockSize(B->rmap,&bs);

2818:   B->preallocated = PETSC_TRUE;

2820:   mbs = B->rmap->n/bs;
2821:   nbs = B->cmap->n/bs;
2822:   bs2 = bs*bs;

2824:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);

2826:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2827:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2828:   if (nnz) {
2829:     for (i=0; i<mbs; i++) {
2830:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2831:       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2832:     }
2833:   }

2835:   b    = (Mat_SeqBAIJ*)B->data;
2836:   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2837:   PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2838:   PetscOptionsEnd();

2840:   if (!flg) {
2841:     switch (bs) {
2842:     case 1:
2843:       B->ops->mult    = MatMult_SeqBAIJ_1;
2844:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2845:       break;
2846:     case 2:
2847:       B->ops->mult    = MatMult_SeqBAIJ_2;
2848:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2849:       break;
2850:     case 3:
2851:       B->ops->mult    = MatMult_SeqBAIJ_3;
2852:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2853:       break;
2854:     case 4:
2855:       B->ops->mult    = MatMult_SeqBAIJ_4;
2856:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2857:       break;
2858:     case 5:
2859:       B->ops->mult    = MatMult_SeqBAIJ_5;
2860:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2861:       break;
2862:     case 6:
2863:       B->ops->mult    = MatMult_SeqBAIJ_6;
2864:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2865:       break;
2866:     case 7:
2867:       B->ops->mult    = MatMult_SeqBAIJ_7;
2868:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2869:       break;
2870:     case 11:
2871:       B->ops->mult    = MatMult_SeqBAIJ_11;
2872:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2873:       break;
2874:     case 15:
2875:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2876:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2877:       break;
2878:     default:
2879:       B->ops->mult    = MatMult_SeqBAIJ_N;
2880:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2881:       break;
2882:     }
2883:   }
2884:   B->ops->sor = MatSOR_SeqBAIJ;
2885:   b->mbs = mbs;
2886:   b->nbs = nbs;
2887:   if (!skipallocation) {
2888:     if (!b->imax) {
2889:       PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2890:       PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));

2892:       b->free_imax_ilen = PETSC_TRUE;
2893:     }
2894:     /* b->ilen will count nonzeros in each block row so far. */
2895:     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2896:     if (!nnz) {
2897:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2898:       else if (nz < 0) nz = 1;
2899:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2900:       nz = nz*mbs;
2901:     } else {
2902:       nz = 0;
2903:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2904:     }

2906:     /* allocate the matrix space */
2907:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2908:     if (B->structure_only) {
2909:       PetscMalloc1(nz,&b->j);
2910:       PetscMalloc1(B->rmap->N+1,&b->i);
2911:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
2912:     } else {
2913:       PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2914:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2915:       PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2916:     }
2917:     PetscMemzero(b->j,nz*sizeof(PetscInt));

2919:     if (B->structure_only) {
2920:       b->singlemalloc = PETSC_FALSE;
2921:       b->free_a       = PETSC_FALSE;
2922:     } else {
2923:       b->singlemalloc = PETSC_TRUE;
2924:       b->free_a       = PETSC_TRUE;
2925:     }
2926:     b->free_ij = PETSC_TRUE;

2928:     b->i[0] = 0;
2929:     for (i=1; i<mbs+1; i++) {
2930:       b->i[i] = b->i[i-1] + b->imax[i-1];
2931:     }

2933:   } else {
2934:     b->free_a  = PETSC_FALSE;
2935:     b->free_ij = PETSC_FALSE;
2936:   }

2938:   b->bs2              = bs2;
2939:   b->mbs              = mbs;
2940:   b->nz               = 0;
2941:   b->maxnz            = nz;
2942:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2943:   B->was_assembled    = PETSC_FALSE;
2944:   B->assembled        = PETSC_FALSE;
2945:   if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
2946:   return(0);
2947: }

2949: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2950: {
2951:   PetscInt       i,m,nz,nz_max=0,*nnz;
2952:   PetscScalar    *values=0;
2953:   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;

2957:   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2958:   PetscLayoutSetBlockSize(B->rmap,bs);
2959:   PetscLayoutSetBlockSize(B->cmap,bs);
2960:   PetscLayoutSetUp(B->rmap);
2961:   PetscLayoutSetUp(B->cmap);
2962:   PetscLayoutGetBlockSize(B->rmap,&bs);
2963:   m    = B->rmap->n/bs;

2965:   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2966:   PetscMalloc1(m+1, &nnz);
2967:   for (i=0; i<m; i++) {
2968:     nz = ii[i+1]- ii[i];
2969:     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2970:     nz_max = PetscMax(nz_max, nz);
2971:     nnz[i] = nz;
2972:   }
2973:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2974:   PetscFree(nnz);

2976:   values = (PetscScalar*)V;
2977:   if (!values) {
2978:     PetscCalloc1(bs*bs*(nz_max+1),&values);
2979:   }
2980:   for (i=0; i<m; i++) {
2981:     PetscInt          ncols  = ii[i+1] - ii[i];
2982:     const PetscInt    *icols = jj + ii[i];
2983:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2984:     if (!roworiented) {
2985:       MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2986:     } else {
2987:       PetscInt j;
2988:       for (j=0; j<ncols; j++) {
2989:         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2990:         MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
2991:       }
2992:     }
2993:   }
2994:   if (!V) { PetscFree(values); }
2995:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2996:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2997:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2998:   return(0);
2999: }

3001: /*MC
3002:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3003:    block sparse compressed row format.

3005:    Options Database Keys:
3006: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

3008:   Level: beginner

3010: .seealso: MatCreateSeqBAIJ()
3011: M*/

3013: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);

3015: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3016: {
3018:   PetscMPIInt    size;
3019:   Mat_SeqBAIJ    *b;

3022:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3023:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3025:   PetscNewLog(B,&b);
3026:   B->data = (void*)b;
3027:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

3029:   b->row          = 0;
3030:   b->col          = 0;
3031:   b->icol         = 0;
3032:   b->reallocs     = 0;
3033:   b->saved_values = 0;

3035:   b->roworiented        = PETSC_TRUE;
3036:   b->nonew              = 0;
3037:   b->diag               = 0;
3038:   B->spptr              = 0;
3039:   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3040:   b->keepnonzeropattern = PETSC_FALSE;

3042:   PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3043:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3044:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3045:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3046:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3047:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3048:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3049:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3050:   PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3051: #if defined(PETSC_HAVE_HYPRE)
3052:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);
3053: #endif
3054:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3055:   return(0);
3056: }

3058: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3059: {
3060:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3062:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3065:   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");

3067:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3068:     c->imax           = a->imax;
3069:     c->ilen           = a->ilen;
3070:     c->free_imax_ilen = PETSC_FALSE;
3071:   } else {
3072:     PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3073:     PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3074:     for (i=0; i<mbs; i++) {
3075:       c->imax[i] = a->imax[i];
3076:       c->ilen[i] = a->ilen[i];
3077:     }
3078:     c->free_imax_ilen = PETSC_TRUE;
3079:   }

3081:   /* allocate the matrix space */
3082:   if (mallocmatspace) {
3083:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3084:       PetscCalloc1(bs2*nz,&c->a);
3085:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));

3087:       c->i            = a->i;
3088:       c->j            = a->j;
3089:       c->singlemalloc = PETSC_FALSE;
3090:       c->free_a       = PETSC_TRUE;
3091:       c->free_ij      = PETSC_FALSE;
3092:       c->parent       = A;
3093:       C->preallocated = PETSC_TRUE;
3094:       C->assembled    = PETSC_TRUE;

3096:       PetscObjectReference((PetscObject)A);
3097:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3098:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3099:     } else {
3100:       PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3101:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));

3103:       c->singlemalloc = PETSC_TRUE;
3104:       c->free_a       = PETSC_TRUE;
3105:       c->free_ij      = PETSC_TRUE;

3107:       PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3108:       if (mbs > 0) {
3109:         PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3110:         if (cpvalues == MAT_COPY_VALUES) {
3111:           PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3112:         } else {
3113:           PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3114:         }
3115:       }
3116:       C->preallocated = PETSC_TRUE;
3117:       C->assembled    = PETSC_TRUE;
3118:     }
3119:   }

3121:   c->roworiented = a->roworiented;
3122:   c->nonew       = a->nonew;

3124:   PetscLayoutReference(A->rmap,&C->rmap);
3125:   PetscLayoutReference(A->cmap,&C->cmap);

3127:   c->bs2         = a->bs2;
3128:   c->mbs         = a->mbs;
3129:   c->nbs         = a->nbs;

3131:   if (a->diag) {
3132:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3133:       c->diag      = a->diag;
3134:       c->free_diag = PETSC_FALSE;
3135:     } else {
3136:       PetscMalloc1(mbs+1,&c->diag);
3137:       PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3138:       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3139:       c->free_diag = PETSC_TRUE;
3140:     }
3141:   } else c->diag = 0;

3143:   c->nz         = a->nz;
3144:   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3145:   c->solve_work = NULL;
3146:   c->mult_work  = NULL;
3147:   c->sor_workt  = NULL;
3148:   c->sor_work   = NULL;

3150:   c->compressedrow.use   = a->compressedrow.use;
3151:   c->compressedrow.nrows = a->compressedrow.nrows;
3152:   if (a->compressedrow.use) {
3153:     i    = a->compressedrow.nrows;
3154:     PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3155:     PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3156:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3157:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3158:   } else {
3159:     c->compressedrow.use    = PETSC_FALSE;
3160:     c->compressedrow.i      = NULL;
3161:     c->compressedrow.rindex = NULL;
3162:   }
3163:   C->nonzerostate = A->nonzerostate;

3165:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3166:   return(0);
3167: }

3169: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3170: {

3174:   MatCreate(PetscObjectComm((PetscObject)A),B);
3175:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3176:   MatSetType(*B,MATSEQBAIJ);
3177:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3178:   return(0);
3179: }

3181: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3182: {
3183:   Mat_SeqBAIJ    *a;
3185:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3186:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3187:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3188:   PetscInt       *masked,nmask,tmp,bs2,ishift;
3189:   PetscMPIInt    size;
3190:   int            fd;
3191:   PetscScalar    *aa;
3192:   MPI_Comm       comm;

3195:   /* force binary viewer to load .info file if it has not yet done so */
3196:   PetscViewerSetUp(viewer);
3197:   PetscObjectGetComm((PetscObject)viewer,&comm);
3198:   PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3199:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3200:   PetscOptionsEnd();
3201:   if (bs < 0) bs = 1;
3202:   bs2  = bs*bs;

3204:   MPI_Comm_size(comm,&size);
3205:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3206:   PetscViewerBinaryGetDescriptor(viewer,&fd);
3207:   PetscBinaryRead(fd,header,4,PETSC_INT);
3208:   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3209:   M = header[1]; N = header[2]; nz = header[3];

3211:   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3212:   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");

3214:   /*
3215:      This code adds extra rows to make sure the number of rows is
3216:     divisible by the blocksize
3217:   */
3218:   mbs        = M/bs;
3219:   extra_rows = bs - M + bs*(mbs);
3220:   if (extra_rows == bs) extra_rows = 0;
3221:   else mbs++;
3222:   if (extra_rows) {
3223:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3224:   }

3226:   /* Set global sizes if not already set */
3227:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3228:     MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3229:   } else { /* Check if the matrix global sizes are correct */
3230:     MatGetSize(newmat,&rows,&cols);
3231:     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3232:       MatGetLocalSize(newmat,&rows,&cols);
3233:     }
3234:     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3235:   }

3237:   /* read in row lengths */
3238:   PetscMalloc1(M+extra_rows,&rowlengths);
3239:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3240:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

3242:   /* read in column indices */
3243:   PetscMalloc1(nz+extra_rows,&jj);
3244:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
3245:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

3247:   /* loop over row lengths determining block row lengths */
3248:   PetscCalloc1(mbs,&browlengths);
3249:   PetscMalloc2(mbs,&mask,mbs,&masked);
3250:   PetscMemzero(mask,mbs*sizeof(PetscInt));
3251:   rowcount = 0;
3252:   nzcount  = 0;
3253:   for (i=0; i<mbs; i++) {
3254:     nmask = 0;
3255:     for (j=0; j<bs; j++) {
3256:       kmax = rowlengths[rowcount];
3257:       for (k=0; k<kmax; k++) {
3258:         tmp = jj[nzcount++]/bs;
3259:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3260:       }
3261:       rowcount++;
3262:     }
3263:     browlengths[i] += nmask;
3264:     /* zero out the mask elements we set */
3265:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3266:   }

3268:   /* Do preallocation  */
3269:   MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);
3270:   a    = (Mat_SeqBAIJ*)newmat->data;

3272:   /* set matrix "i" values */
3273:   a->i[0] = 0;
3274:   for (i=1; i<= mbs; i++) {
3275:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3276:     a->ilen[i-1] = browlengths[i-1];
3277:   }
3278:   a->nz = 0;
3279:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3281:   /* read in nonzero values */
3282:   PetscMalloc1(nz+extra_rows,&aa);
3283:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3284:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3286:   /* set "a" and "j" values into matrix */
3287:   nzcount = 0; jcount = 0;
3288:   for (i=0; i<mbs; i++) {
3289:     nzcountb = nzcount;
3290:     nmask    = 0;
3291:     for (j=0; j<bs; j++) {
3292:       kmax = rowlengths[i*bs+j];
3293:       for (k=0; k<kmax; k++) {
3294:         tmp = jj[nzcount++]/bs;
3295:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3296:       }
3297:     }
3298:     /* sort the masked values */
3299:     PetscSortInt(nmask,masked);

3301:     /* set "j" values into matrix */
3302:     maskcount = 1;
3303:     for (j=0; j<nmask; j++) {
3304:       a->j[jcount++]  = masked[j];
3305:       mask[masked[j]] = maskcount++;
3306:     }
3307:     /* set "a" values into matrix */
3308:     ishift = bs2*a->i[i];
3309:     for (j=0; j<bs; j++) {
3310:       kmax = rowlengths[i*bs+j];
3311:       for (k=0; k<kmax; k++) {
3312:         tmp       = jj[nzcountb]/bs;
3313:         block     = mask[tmp] - 1;
3314:         point     = jj[nzcountb] - bs*tmp;
3315:         idx       = ishift + bs2*block + j + bs*point;
3316:         a->a[idx] = (MatScalar)aa[nzcountb++];
3317:       }
3318:     }
3319:     /* zero out the mask elements we set */
3320:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3321:   }
3322:   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3324:   PetscFree(rowlengths);
3325:   PetscFree(browlengths);
3326:   PetscFree(aa);
3327:   PetscFree(jj);
3328:   PetscFree2(mask,masked);

3330:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3331:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3332:   return(0);
3333: }

3335: /*@C
3336:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3337:    compressed row) format.  For good matrix assembly performance the
3338:    user should preallocate the matrix storage by setting the parameter nz
3339:    (or the array nnz).  By setting these parameters accurately, performance
3340:    during matrix assembly can be increased by more than a factor of 50.

3342:    Collective on MPI_Comm

3344:    Input Parameters:
3345: +  comm - MPI communicator, set to PETSC_COMM_SELF
3346: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3347:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3348: .  m - number of rows
3349: .  n - number of columns
3350: .  nz - number of nonzero blocks  per block row (same for all rows)
3351: -  nnz - array containing the number of nonzero blocks in the various block rows
3352:          (possibly different for each block row) or NULL

3354:    Output Parameter:
3355: .  A - the matrix

3357:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3358:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3359:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3361:    Options Database Keys:
3362: .   -mat_no_unroll - uses code that does not unroll the loops in the
3363:                      block calculations (much slower)
3364: .    -mat_block_size - size of the blocks to use

3366:    Level: intermediate

3368:    Notes:
3369:    The number of rows and columns must be divisible by blocksize.

3371:    If the nnz parameter is given then the nz parameter is ignored

3373:    A nonzero block is any block that as 1 or more nonzeros in it

3375:    The block AIJ format is fully compatible with standard Fortran 77
3376:    storage.  That is, the stored row and column indices can begin at
3377:    either one (as in Fortran) or zero.  See the users' manual for details.

3379:    Specify the preallocated storage with either nz or nnz (not both).
3380:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3381:    allocation.  See Users-Manual: ch_mat for details.
3382:    matrices.

3384: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3385: @*/
3386: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3387: {

3391:   MatCreate(comm,A);
3392:   MatSetSizes(*A,m,n,m,n);
3393:   MatSetType(*A,MATSEQBAIJ);
3394:   MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3395:   return(0);
3396: }

3398: /*@C
3399:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3400:    per row in the matrix. For good matrix assembly performance the
3401:    user should preallocate the matrix storage by setting the parameter nz
3402:    (or the array nnz).  By setting these parameters accurately, performance
3403:    during matrix assembly can be increased by more than a factor of 50.

3405:    Collective on MPI_Comm

3407:    Input Parameters:
3408: +  B - the matrix
3409: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3410:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3411: .  nz - number of block nonzeros per block row (same for all rows)
3412: -  nnz - array containing the number of block nonzeros in the various block rows
3413:          (possibly different for each block row) or NULL

3415:    Options Database Keys:
3416: .   -mat_no_unroll - uses code that does not unroll the loops in the
3417:                      block calculations (much slower)
3418: .   -mat_block_size - size of the blocks to use

3420:    Level: intermediate

3422:    Notes:
3423:    If the nnz parameter is given then the nz parameter is ignored

3425:    You can call MatGetInfo() to get information on how effective the preallocation was;
3426:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3427:    You can also run with the option -info and look for messages with the string
3428:    malloc in them to see if additional memory allocation was needed.

3430:    The block AIJ format is fully compatible with standard Fortran 77
3431:    storage.  That is, the stored row and column indices can begin at
3432:    either one (as in Fortran) or zero.  See the users' manual for details.

3434:    Specify the preallocated storage with either nz or nnz (not both).
3435:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3436:    allocation.  See Users-Manual: ch_mat for details.

3438: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3439: @*/
3440: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3441: {

3448:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3449:   return(0);
3450: }

3452: /*@C
3453:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3454:    (the default sequential PETSc format).

3456:    Collective on MPI_Comm

3458:    Input Parameters:
3459: +  B - the matrix
3460: .  i - the indices into j for the start of each local row (starts with zero)
3461: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3462: -  v - optional values in the matrix

3464:    Level: developer

3466:    Notes:
3467:    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3468:    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3469:    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3470:    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3471:    block column and the second index is over columns within a block.

3473: .keywords: matrix, aij, compressed row, sparse

3475: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3476: @*/
3477: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3478: {

3485:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3486:   return(0);
3487: }


3490: /*@
3491:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3493:      Collective on MPI_Comm

3495:    Input Parameters:
3496: +  comm - must be an MPI communicator of size 1
3497: .  bs - size of block
3498: .  m - number of rows
3499: .  n - number of columns
3500: .  i - row indices
3501: .  j - column indices
3502: -  a - matrix values

3504:    Output Parameter:
3505: .  mat - the matrix

3507:    Level: advanced

3509:    Notes:
3510:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3511:     once the matrix is destroyed

3513:        You cannot set new nonzero locations into this matrix, that will generate an error.

3515:        The i and j indices are 0 based

3517:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).

3519:       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3520:       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3521:       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3522:       with column-major ordering within blocks.

3524: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()

3526: @*/
3527: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3528: {
3530:   PetscInt       ii;
3531:   Mat_SeqBAIJ    *baij;

3534:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3535:   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");

3537:   MatCreate(comm,mat);
3538:   MatSetSizes(*mat,m,n,m,n);
3539:   MatSetType(*mat,MATSEQBAIJ);
3540:   MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);
3541:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3542:   PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3543:   PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));

3545:   baij->i = i;
3546:   baij->j = j;
3547:   baij->a = a;

3549:   baij->singlemalloc = PETSC_FALSE;
3550:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3551:   baij->free_a       = PETSC_FALSE;
3552:   baij->free_ij      = PETSC_FALSE;

3554:   for (ii=0; ii<m; ii++) {
3555:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3556: #if defined(PETSC_USE_DEBUG)
3557:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3558: #endif
3559:   }
3560: #if defined(PETSC_USE_DEBUG)
3561:   for (ii=0; ii<baij->i[m]; ii++) {
3562:     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3563:     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3564:   }
3565: #endif

3567:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3568:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3569:   return(0);
3570: }

3572: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3573: {
3575:   PetscMPIInt    size;

3578:   MPI_Comm_size(comm,&size);
3579:   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3580:     MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3581:   } else {
3582:     MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3583:   }
3584:   return(0);
3585: }