Actual source code: baij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include ../src/mat/impls/baij/seq/baij.h
 8:  #include ../src/inline/spops.h
 9:  #include petscsys.h

 11:  #include ../src/inline/ilu.h

 15: /*@
 16:   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.

 18:   Collective on Mat

 20:   Input Parameters:
 21: . mat - the matrix

 23:   Level: advanced
 24: @*/
 25: PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
 26: {
 27:   PetscErrorCode ierr,(*f)(Mat);

 31:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 32:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 34:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
 35:   if (f) {
 36:     (*f)(mat);
 37:   } else {
 38:     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
 39:   }
 40:   return(0);
 41: }

 46: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 47: {
 48:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 50:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
 51:   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
 52:   PetscReal      shift = 0.0;

 55:   if (a->idiagvalid) return(0);
 56:   MatMarkDiagonal_SeqBAIJ(A);
 57:   diag_offset = a->diag;
 58:   if (!a->idiag) {
 59:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 60:   }
 61:   diag  = a->idiag;
 62:   mdiag = a->idiag+bs*bs*mbs;
 63:   /* factor and invert each block */
 64:   switch (bs){
 65:     case 2:
 66:       for (i=0; i<mbs; i++) {
 67:         odiag   = v + 4*diag_offset[i];
 68:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 69:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 70:         Kernel_A_gets_inverse_A_2(diag,shift);
 71:         diag    += 4;
 72:         mdiag   += 4;
 73:       }
 74:       break;
 75:     case 3:
 76:       for (i=0; i<mbs; i++) {
 77:         odiag    = v + 9*diag_offset[i];
 78:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 79:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 80:         diag[8]  = odiag[8];
 81:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 82:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 83:         mdiag[8] = odiag[8];
 84:         Kernel_A_gets_inverse_A_3(diag,shift);
 85:         diag    += 9;
 86:         mdiag   += 9;
 87:       }
 88:       break;
 89:     case 4:
 90:       for (i=0; i<mbs; i++) {
 91:         odiag  = v + 16*diag_offset[i];
 92:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 93:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 94:         Kernel_A_gets_inverse_A_4(diag,shift);
 95:         diag  += 16;
 96:         mdiag += 16;
 97:       }
 98:       break;
 99:     case 5:
100:       for (i=0; i<mbs; i++) {
101:         odiag = v + 25*diag_offset[i];
102:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
103:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
104:         Kernel_A_gets_inverse_A_5(diag,shift);
105:         diag  += 25;
106:         mdiag += 25;
107:       }
108:       break;
109:     default:
110:       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111:   }
112:   a->idiagvalid = PETSC_TRUE;
113:   return(0);
114: }

119: PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120: {
121:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122:   PetscScalar        *x,x1,s1;
123:   const PetscScalar  *b;
124:   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125:   PetscErrorCode     ierr;
126:   PetscInt           m = a->mbs,i,i2,nz,idx;
127:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

130:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131:   its = its*lits;
132:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

138:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

140:   diag  = a->diag;
141:   idiag = a->idiag;
142:   VecGetArray(xx,&x);
143:   VecGetArray(bb,(PetscScalar**)&b);

145:   if (flag & SOR_ZERO_INITIAL_GUESS) {
146:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147:       x[0] = b[0]*idiag[0];
148:       i2     = 1;
149:       idiag += 1;
150:       for (i=1; i<m; i++) {
151:         v     = aa + ai[i];
152:         vi    = aj + ai[i];
153:         nz    = diag[i] - ai[i];
154:         s1    = b[i2];
155:         while (nz--) {
156:           idx  = (*vi++);
157:           x1   = x[idx];
158:           s1  -= v[0]*x1;
159:           v   += 1;
160:         }
161:         x[i2]   = idiag[0]*s1;
162:         idiag   += 1;
163:         i2      += 1;
164:       }
165:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166:       PetscLogFlops(a->nz);
167:     }
168:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170:       i2    = 0;
171:       mdiag = a->idiag+a->mbs;
172:       for (i=0; i<m; i++) {
173:         x1      = x[i2];
174:         x[i2]   = mdiag[0]*x1;
175:         mdiag  += 1;
176:         i2     += 1;
177:       }
178:       PetscLogFlops(m);
179:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
181:     }
182:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183:       idiag   = a->idiag+a->mbs - 1;
184:       i2      = m - 1;
185:       x1      = x[i2];
186:       x[i2]   = idiag[0]*x1;
187:       idiag -= 1;
188:       i2    -= 1;
189:       for (i=m-2; i>=0; i--) {
190:         v     = aa + (diag[i]+1);
191:         vi    = aj + diag[i] + 1;
192:         nz    = ai[i+1] - diag[i] - 1;
193:         s1    = x[i2];
194:         while (nz--) {
195:           idx  = (*vi++);
196:           x1   = x[idx];
197:           s1  -= v[0]*x1;
198:           v   += 1;
199:         }
200:         x[i2]   = idiag[0]*s1;
201:         idiag   -= 2;
202:         i2      -= 1;
203:       }
204:       PetscLogFlops(a->nz);
205:     }
206:   } else {
207:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208:   }
209:   VecRestoreArray(xx,&x);
210:   VecRestoreArray(bb,(PetscScalar**)&b);
211:   return(0);
212: }

216: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217: {
218:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219:   PetscScalar        *x,x1,x2,s1,s2;
220:   const PetscScalar  *b;
221:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222:   PetscErrorCode     ierr;
223:   PetscInt           m = a->mbs,i,i2,nz,idx;
224:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

227:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228:   its = its*lits;
229:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

235:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

237:   diag  = a->diag;
238:   idiag = a->idiag;
239:   VecGetArray(xx,&x);
240:   VecGetArray(bb,(PetscScalar**)&b);

242:   if (flag & SOR_ZERO_INITIAL_GUESS) {
243:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246:       i2     = 2;
247:       idiag += 4;
248:       for (i=1; i<m; i++) {
249:         v     = aa + 4*ai[i];
250:         vi    = aj + ai[i];
251:         nz    = diag[i] - ai[i];
252:         s1    = b[i2]; s2 = b[i2+1];
253:         while (nz--) {
254:           idx  = 2*(*vi++);
255:           x1   = x[idx]; x2 = x[1+idx];
256:           s1  -= v[0]*x1 + v[2]*x2;
257:           s2  -= v[1]*x1 + v[3]*x2;
258:           v   += 4;
259:         }
260:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262:         idiag   += 4;
263:         i2      += 2;
264:       }
265:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266:       PetscLogFlops(4*(a->nz));
267:     }
268:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270:       i2    = 0;
271:       mdiag = a->idiag+4*a->mbs;
272:       for (i=0; i<m; i++) {
273:         x1      = x[i2]; x2 = x[i2+1];
274:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276:         mdiag  += 4;
277:         i2     += 2;
278:       }
279:       PetscLogFlops(6*m);
280:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
282:     }
283:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284:       idiag   = a->idiag+4*a->mbs - 4;
285:       i2      = 2*m - 2;
286:       x1      = x[i2]; x2 = x[i2+1];
287:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289:       idiag -= 4;
290:       i2    -= 2;
291:       for (i=m-2; i>=0; i--) {
292:         v     = aa + 4*(diag[i]+1);
293:         vi    = aj + diag[i] + 1;
294:         nz    = ai[i+1] - diag[i] - 1;
295:         s1    = x[i2]; s2 = x[i2+1];
296:         while (nz--) {
297:           idx  = 2*(*vi++);
298:           x1   = x[idx]; x2 = x[1+idx];
299:           s1  -= v[0]*x1 + v[2]*x2;
300:           s2  -= v[1]*x1 + v[3]*x2;
301:           v   += 4;
302:         }
303:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305:         idiag   -= 4;
306:         i2      -= 2;
307:       }
308:       PetscLogFlops(4*(a->nz));
309:     }
310:   } else {
311:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312:   }
313:   VecRestoreArray(xx,&x);
314:   VecRestoreArray(bb,(PetscScalar**)&b);
315:   return(0);
316: }

320: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321: {
322:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325:   const PetscScalar  *b;
326:   PetscErrorCode     ierr;
327:   PetscInt           m = a->mbs,i,i2,nz,idx;
328:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

331:   its = its*lits;
332:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

339:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

341:   diag  = a->diag;
342:   idiag = a->idiag;
343:   VecGetArray(xx,&x);
344:   VecGetArray(bb,(PetscScalar**)&b);

346:   if (flag & SOR_ZERO_INITIAL_GUESS) {
347:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351:       i2     = 3;
352:       idiag += 9;
353:       for (i=1; i<m; i++) {
354:         v     = aa + 9*ai[i];
355:         vi    = aj + ai[i];
356:         nz    = diag[i] - ai[i];
357:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358:         while (nz--) {
359:           idx  = 3*(*vi++);
360:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364:           v   += 9;
365:         }
366:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369:         idiag   += 9;
370:         i2      += 3;
371:       }
372:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373:       PetscLogFlops(9*(a->nz));
374:     }
375:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377:       i2    = 0;
378:       mdiag = a->idiag+9*a->mbs;
379:       for (i=0; i<m; i++) {
380:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384:         mdiag  += 9;
385:         i2     += 3;
386:       }
387:       PetscLogFlops(15*m);
388:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
390:     }
391:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392:       idiag   = a->idiag+9*a->mbs - 9;
393:       i2      = 3*m - 3;
394:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398:       idiag -= 9;
399:       i2    -= 3;
400:       for (i=m-2; i>=0; i--) {
401:         v     = aa + 9*(diag[i]+1);
402:         vi    = aj + diag[i] + 1;
403:         nz    = ai[i+1] - diag[i] - 1;
404:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405:         while (nz--) {
406:           idx  = 3*(*vi++);
407:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411:           v   += 9;
412:         }
413:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416:         idiag   -= 9;
417:         i2      -= 3;
418:       }
419:       PetscLogFlops(9*(a->nz));
420:     }
421:   } else {
422:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423:   }
424:   VecRestoreArray(xx,&x);
425:   VecRestoreArray(bb,(PetscScalar**)&b);
426:   return(0);
427: }

431: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432: {
433:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436:   const PetscScalar  *b;
437:   PetscErrorCode     ierr;
438:   PetscInt           m = a->mbs,i,i2,nz,idx;
439:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

442:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443:   its = its*lits;
444:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

450:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

452:   diag  = a->diag;
453:   idiag = a->idiag;
454:   VecGetArray(xx,&x);
455:   VecGetArray(bb,(PetscScalar**)&b);

457:   if (flag & SOR_ZERO_INITIAL_GUESS) {
458:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463:       i2     = 4;
464:       idiag += 16;
465:       for (i=1; i<m; i++) {
466:         v     = aa + 16*ai[i];
467:         vi    = aj + ai[i];
468:         nz    = diag[i] - ai[i];
469:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470:         while (nz--) {
471:           idx  = 4*(*vi++);
472:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477:           v   += 16;
478:         }
479:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483:         idiag   += 16;
484:         i2      += 4;
485:       }
486:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487:       PetscLogFlops(16*(a->nz));
488:     }
489:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491:       i2    = 0;
492:       mdiag = a->idiag+16*a->mbs;
493:       for (i=0; i<m; i++) {
494:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499:         mdiag  += 16;
500:         i2     += 4;
501:       }
502:       PetscLogFlops(28*m);
503:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
505:     }
506:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507:       idiag   = a->idiag+16*a->mbs - 16;
508:       i2      = 4*m - 4;
509:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514:       idiag -= 16;
515:       i2    -= 4;
516:       for (i=m-2; i>=0; i--) {
517:         v     = aa + 16*(diag[i]+1);
518:         vi    = aj + diag[i] + 1;
519:         nz    = ai[i+1] - diag[i] - 1;
520:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521:         while (nz--) {
522:           idx  = 4*(*vi++);
523:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528:           v   += 16;
529:         }
530:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534:         idiag   -= 16;
535:         i2      -= 4;
536:       }
537:       PetscLogFlops(16*(a->nz));
538:     }
539:   } else {
540:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541:   }
542:   VecRestoreArray(xx,&x);
543:   VecRestoreArray(bb,(PetscScalar**)&b);
544:   return(0);
545: }

549: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550: {
551:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554:   const PetscScalar  *b;
555:   PetscErrorCode     ierr;
556:   PetscInt           m = a->mbs,i,i2,nz,idx;
557:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

560:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561:   its = its*lits;
562:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

568:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

570:   diag  = a->diag;
571:   idiag = a->idiag;
572:   VecGetArray(xx,&x);
573:   VecGetArray(bb,(PetscScalar**)&b);

575:   if (flag & SOR_ZERO_INITIAL_GUESS) {
576:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582:       i2     = 5;
583:       idiag += 25;
584:       for (i=1; i<m; i++) {
585:         v     = aa + 25*ai[i];
586:         vi    = aj + ai[i];
587:         nz    = diag[i] - ai[i];
588:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589:         while (nz--) {
590:           idx  = 5*(*vi++);
591:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597:           v   += 25;
598:         }
599:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604:         idiag   += 25;
605:         i2      += 5;
606:       }
607:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608:       PetscLogFlops(25*(a->nz));
609:     }
610:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612:       i2    = 0;
613:       mdiag = a->idiag+25*a->mbs;
614:       for (i=0; i<m; i++) {
615:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621:         mdiag  += 25;
622:         i2     += 5;
623:       }
624:       PetscLogFlops(45*m);
625:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
627:     }
628:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629:       idiag   = a->idiag+25*a->mbs - 25;
630:       i2      = 5*m - 5;
631:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637:       idiag -= 25;
638:       i2    -= 5;
639:       for (i=m-2; i>=0; i--) {
640:         v     = aa + 25*(diag[i]+1);
641:         vi    = aj + diag[i] + 1;
642:         nz    = ai[i+1] - diag[i] - 1;
643:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644:         while (nz--) {
645:           idx  = 5*(*vi++);
646:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652:           v   += 25;
653:         }
654:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659:         idiag   -= 25;
660:         i2      -= 5;
661:       }
662:       PetscLogFlops(25*(a->nz));
663:     }
664:   } else {
665:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666:   }
667:   VecRestoreArray(xx,&x);
668:   VecRestoreArray(bb,(PetscScalar**)&b);
669:   return(0);
670: }

674: PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675: {
676:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679:   const PetscScalar  *b;
680:   PetscErrorCode     ierr;
681:   PetscInt           m = a->mbs,i,i2,nz,idx;
682:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

685:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686:   its = its*lits;
687:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

693:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

695:   diag  = a->diag;
696:   idiag = a->idiag;
697:   VecGetArray(xx,&x);
698:   VecGetArray(bb,(PetscScalar**)&b);

700:   if (flag & SOR_ZERO_INITIAL_GUESS) {
701:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702:       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703:       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704:       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705:       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706:       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707:       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708:       i2     = 6;
709:       idiag += 36;
710:       for (i=1; i<m; i++) {
711:         v     = aa + 36*ai[i];
712:         vi    = aj + ai[i];
713:         nz    = diag[i] - ai[i];
714:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715:         while (nz--) {
716:           idx  = 6*(*vi++);
717:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724:           v   += 36;
725:         }
726:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732:         idiag   += 36;
733:         i2      += 6;
734:       }
735:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736:       PetscLogFlops(36*(a->nz));
737:     }
738:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740:       i2    = 0;
741:       mdiag = a->idiag+36*a->mbs;
742:       for (i=0; i<m; i++) {
743:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744:         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745:         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746:         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747:         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748:         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749:         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750:         mdiag  += 36;
751:         i2     += 6;
752:       }
753:       PetscLogFlops(60*m);
754:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
756:     }
757:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758:       idiag   = a->idiag+36*a->mbs - 36;
759:       i2      = 6*m - 6;
760:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761:       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762:       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763:       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764:       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765:       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766:       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767:       idiag -= 36;
768:       i2    -= 6;
769:       for (i=m-2; i>=0; i--) {
770:         v     = aa + 36*(diag[i]+1);
771:         vi    = aj + diag[i] + 1;
772:         nz    = ai[i+1] - diag[i] - 1;
773:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774:         while (nz--) {
775:           idx  = 6*(*vi++);
776:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783:           v   += 36;
784:         }
785:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791:         idiag   -= 36;
792:         i2      -= 6;
793:       }
794:       PetscLogFlops(36*(a->nz));
795:     }
796:   } else {
797:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798:   }
799:   VecRestoreArray(xx,&x);
800:   VecRestoreArray(bb,(PetscScalar**)&b);
801:   return(0);
802: }

806: PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807: {
808:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811:   const PetscScalar  *b;
812:   PetscErrorCode     ierr;
813:   PetscInt           m = a->mbs,i,i2,nz,idx;
814:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

817:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818:   its = its*lits;
819:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

825:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

827:   diag  = a->diag;
828:   idiag = a->idiag;
829:   VecGetArray(xx,&x);
830:   VecGetArray(bb,(PetscScalar**)&b);

832:   if (flag & SOR_ZERO_INITIAL_GUESS) {
833:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834:       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835:       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836:       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837:       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838:       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839:       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840:       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841:       i2     = 7;
842:       idiag += 49;
843:       for (i=1; i<m; i++) {
844:         v     = aa + 49*ai[i];
845:         vi    = aj + ai[i];
846:         nz    = diag[i] - ai[i];
847:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848:         while (nz--) {
849:           idx  = 7*(*vi++);
850:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858:           v   += 49;
859:         }
860:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867:         idiag   += 49;
868:         i2      += 7;
869:       }
870:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871:       PetscLogFlops(49*(a->nz));
872:     }
873:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875:       i2    = 0;
876:       mdiag = a->idiag+49*a->mbs;
877:       for (i=0; i<m; i++) {
878:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879:         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880:         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881:         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882:         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883:         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884:         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885:         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886:         mdiag  += 36;
887:         i2     += 6;
888:       }
889:       PetscLogFlops(93*m);
890:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
892:     }
893:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894:       idiag   = a->idiag+49*a->mbs - 49;
895:       i2      = 7*m - 7;
896:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897:       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898:       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899:       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900:       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901:       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902:       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903:       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904:       idiag -= 49;
905:       i2    -= 7;
906:       for (i=m-2; i>=0; i--) {
907:         v     = aa + 49*(diag[i]+1);
908:         vi    = aj + diag[i] + 1;
909:         nz    = ai[i+1] - diag[i] - 1;
910:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911:         while (nz--) {
912:           idx  = 7*(*vi++);
913:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921:           v   += 49;
922:         }
923:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930:         idiag   -= 49;
931:         i2      -= 7;
932:       }
933:       PetscLogFlops(49*(a->nz));
934:     }
935:   } else {
936:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937:   }
938:   VecRestoreArray(xx,&x);
939:   VecRestoreArray(bb,(PetscScalar**)&b);
940:   return(0);
941: }

943: /*
944:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945: */
946: #if defined(PETSC_HAVE_FORTRAN_CAPS)
947: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949: #define matsetvaluesblocked4_ matsetvaluesblocked4
950: #endif

955: void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956: {
957:   Mat               A = *AA;
958:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960:   PetscInt          *ai=a->i,*ailen=a->ilen;
961:   PetscInt          *aj=a->j,stepval,lastcol = -1;
962:   const PetscScalar *value = v;
963:   MatScalar         *ap,*aa = a->a,*bap;

966:   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967:   stepval = (n-1)*4;
968:   for (k=0; k<m; k++) { /* loop over added rows */
969:     row  = im[k];
970:     rp   = aj + ai[row];
971:     ap   = aa + 16*ai[row];
972:     nrow = ailen[row];
973:     low  = 0;
974:     high = nrow;
975:     for (l=0; l<n; l++) { /* loop over added columns */
976:       col = in[l];
977:       if (col <= lastcol) low = 0; else high = nrow;
978:       lastcol = col;
979:       value = v + k*(stepval+4 + l)*4;
980:       while (high-low > 7) {
981:         t = (low+high)/2;
982:         if (rp[t] > col) high = t;
983:         else             low  = t;
984:       }
985:       for (i=low; i<high; i++) {
986:         if (rp[i] > col) break;
987:         if (rp[i] == col) {
988:           bap  = ap +  16*i;
989:           for (ii=0; ii<4; ii++,value+=stepval) {
990:             for (jj=ii; jj<16; jj+=4) {
991:               bap[jj] += *value++;
992:             }
993:           }
994:           goto noinsert2;
995:         }
996:       }
997:       N = nrow++ - 1;
998:       high++; /* added new column index thus must search to one higher than before */
999:       /* shift up all the later entries in this row */
1000:       for (ii=N; ii>=i; ii--) {
1001:         rp[ii+1] = rp[ii];
1002:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003:       }
1004:       if (N >= i) {
1005:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006:       }
1007:       rp[i] = col;
1008:       bap   = ap +  16*i;
1009:       for (ii=0; ii<4; ii++,value+=stepval) {
1010:         for (jj=ii; jj<16; jj+=4) {
1011:           bap[jj] = *value++;
1012:         }
1013:       }
1014:       noinsert2:;
1015:       low = i;
1016:     }
1017:     ailen[row] = nrow;
1018:   }
1019:   PetscFunctionReturnVoid();
1020: }

1023: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024: #define matsetvalues4_ MATSETVALUES4
1025: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026: #define matsetvalues4_ matsetvalues4
1027: #endif

1032: void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033: {
1034:   Mat         A = *AA;
1035:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037:   PetscInt    *ai=a->i,*ailen=a->ilen;
1038:   PetscInt    *aj=a->j,brow,bcol;
1039:   PetscInt    ridx,cidx,lastcol = -1;
1040:   MatScalar   *ap,value,*aa=a->a,*bap;
1041: 
1043:   for (k=0; k<m; k++) { /* loop over added rows */
1044:     row  = im[k]; brow = row/4;
1045:     rp   = aj + ai[brow];
1046:     ap   = aa + 16*ai[brow];
1047:     nrow = ailen[brow];
1048:     low  = 0;
1049:     high = nrow;
1050:     for (l=0; l<n; l++) { /* loop over added columns */
1051:       col = in[l]; bcol = col/4;
1052:       ridx = row % 4; cidx = col % 4;
1053:       value = v[l + k*n];
1054:       if (col <= lastcol) low = 0; else high = nrow;
1055:       lastcol = col;
1056:       while (high-low > 7) {
1057:         t = (low+high)/2;
1058:         if (rp[t] > bcol) high = t;
1059:         else              low  = t;
1060:       }
1061:       for (i=low; i<high; i++) {
1062:         if (rp[i] > bcol) break;
1063:         if (rp[i] == bcol) {
1064:           bap  = ap +  16*i + 4*cidx + ridx;
1065:           *bap += value;
1066:           goto noinsert1;
1067:         }
1068:       }
1069:       N = nrow++ - 1;
1070:       high++; /* added new column thus must search to one higher than before */
1071:       /* shift up all the later entries in this row */
1072:       for (ii=N; ii>=i; ii--) {
1073:         rp[ii+1] = rp[ii];
1074:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075:       }
1076:       if (N>=i) {
1077:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078:       }
1079:       rp[i]                    = bcol;
1080:       ap[16*i + 4*cidx + ridx] = value;
1081:       noinsert1:;
1082:       low = i;
1083:     }
1084:     ailen[brow] = nrow;
1085:   }
1086:   PetscFunctionReturnVoid();
1087: }

1090: /*
1091:      Checks for missing diagonals
1092: */
1095: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096: {
1097:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1099:   PetscInt       *diag,*jj = a->j,i;

1102:   MatMarkDiagonal_SeqBAIJ(A);
1103:   *missing = PETSC_FALSE;
1104:   diag     = a->diag;
1105:   for (i=0; i<a->mbs; i++) {
1106:     if (jj[diag[i]] != i) {
1107:       *missing  = PETSC_TRUE;
1108:       if (d) *d = i;
1109:     }
1110:   }
1111:   return(0);
1112: }

1116: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117: {
1118:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1120:   PetscInt       i,j,m = a->mbs;

1123:   if (!a->diag) {
1124:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
1125:   }
1126:   for (i=0; i<m; i++) {
1127:     a->diag[i] = a->i[i+1];
1128:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129:       if (a->j[j] == i) {
1130:         a->diag[i] = j;
1131:         break;
1132:       }
1133:     }
1134:   }
1135:   return(0);
1136: }


1139: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

1143: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144: {
1145:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1147:   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1148:   PetscInt       *tia, *tja;

1151:   *nn = n;
1152:   if (!ia) return(0);
1153:   if (symmetric) {
1154:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1155:     nz   = tia[n];
1156:   } else {
1157:     tia = a->i; tja = a->j;
1158:   }
1159: 
1160:   if (!blockcompressed && bs > 1) {
1161:     (*nn) *= bs;
1162:     nbs    = bs;
1163:     /* malloc & create the natural set of indices */
1164:     PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1165:     if (n) {
1166:       (*ia)[0] = 0;
1167:       for (j=1; j<bs; j++) {
1168:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1169:       }
1170:     }

1172:     for (i=1; i<n; i++) {
1173:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1174:       for (j=1; j<bs; j++) {
1175:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1176:       }
1177:     }
1178:     if (n) {
1179:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1180:     }

1182:     if (ja) {
1183:       PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1184:       cnt = 0;
1185:       for (i=0; i<n; i++) {
1186:         for (j=0; j<bs; j++) {
1187:           for (k=tia[i]; k<tia[i+1]; k++) {
1188:             for (l=0; l<bs; l++) {
1189:               (*ja)[cnt++] = bs*tja[k] + l;
1190:             }
1191:           }
1192:         }
1193:       }
1194:     }

1196:     n     *= bs;
1197:     nz *= bs*bs;
1198:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1199:       PetscFree(tia);
1200:       PetscFree(tja);
1201:     }
1202:   } else {
1203:     *ia = tia;
1204:     if (ja) *ja = tja;
1205:   }
1206:   if (oshift == 1) {
1207:     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1208:     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1209:   }
1210:   return(0);
1211: }

1215: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1216: {
1217:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1219:   PetscInt       i,n = a->mbs,nz = a->i[n];

1222:   if (!ia) return(0);
1223:   if (!blockcompressed && A->rmap->bs > 1) {
1224:     PetscFree(*ia);
1225:     if (ja) {PetscFree(*ja);}
1226:   } else if (symmetric) {
1227:     PetscFree(*ia);
1228:     if (ja) {PetscFree(*ja);}
1229:   } else if (oshift == 1) { /* blockcompressed */
1230:     for (i=0; i<n+1; i++) a->i[i]--;
1231:     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1232:   }
1233:   return(0);
1234: }

1238: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1239: {
1240:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1244: #if defined(PETSC_USE_LOG)
1245:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1246: #endif
1247:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1248:   if (a->row) {
1249:     ISDestroy(a->row);
1250:   }
1251:   if (a->col) {
1252:     ISDestroy(a->col);
1253:   }
1254:   PetscFree(a->diag);
1255:   PetscFree(a->idiag);
1256:   PetscFree2(a->imax,a->ilen);
1257:   PetscFree(a->solve_work);
1258:   PetscFree(a->mult_work);
1259:   if (a->icol) {ISDestroy(a->icol);}
1260:   PetscFree(a->saved_values);
1261:   PetscFree(a->xtoy);
1262:   if (a->compressedrow.use){PetscFree(a->compressedrow.i);}

1264:   if (a->sbaijMat) {MatDestroy(a->sbaijMat);}
1265:   PetscFree(a);

1267:   PetscObjectChangeTypeName((PetscObject)A,0);
1268:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
1269:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1270:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1271:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1272:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1273:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1274:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1275:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);
1276:   return(0);
1277: }

1281: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1282: {
1283:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1287:   switch (op) {
1288:   case MAT_ROW_ORIENTED:
1289:     a->roworiented    = flg;
1290:     break;
1291:   case MAT_KEEP_ZEROED_ROWS:
1292:     a->keepzeroedrows = flg;
1293:     break;
1294:   case MAT_NEW_NONZERO_LOCATIONS:
1295:     a->nonew          = (flg ? 0 : 1);
1296:     break;
1297:   case MAT_NEW_NONZERO_LOCATION_ERR:
1298:     a->nonew          = (flg ? -1 : 0);
1299:     break;
1300:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1301:     a->nonew          = (flg ? -2 : 0);
1302:     break;
1303:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1304:     a->nounused       = (flg ? -1 : 0);
1305:     break;
1306:   case MAT_NEW_DIAGONALS:
1307:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1308:   case MAT_USE_HASH_TABLE:
1309:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1310:     break;
1311:   case MAT_SYMMETRIC:
1312:   case MAT_STRUCTURALLY_SYMMETRIC:
1313:   case MAT_HERMITIAN:
1314:   case MAT_SYMMETRY_ETERNAL:
1315:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1316:     break;
1317:   default:
1318:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1319:   }
1320:   return(0);
1321: }

1325: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1326: {
1327:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1329:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1330:   MatScalar      *aa,*aa_i;
1331:   PetscScalar    *v_i;

1334:   bs  = A->rmap->bs;
1335:   ai  = a->i;
1336:   aj  = a->j;
1337:   aa  = a->a;
1338:   bs2 = a->bs2;
1339: 
1340:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1341: 
1342:   bn  = row/bs;   /* Block number */
1343:   bp  = row % bs; /* Block Position */
1344:   M   = ai[bn+1] - ai[bn];
1345:   *nz = bs*M;
1346: 
1347:   if (v) {
1348:     *v = 0;
1349:     if (*nz) {
1350:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
1351:       for (i=0; i<M; i++) { /* for each block in the block row */
1352:         v_i  = *v + i*bs;
1353:         aa_i = aa + bs2*(ai[bn] + i);
1354:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1355:       }
1356:     }
1357:   }

1359:   if (idx) {
1360:     *idx = 0;
1361:     if (*nz) {
1362:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
1363:       for (i=0; i<M; i++) { /* for each block in the block row */
1364:         idx_i = *idx + i*bs;
1365:         itmp  = bs*aj[ai[bn] + i];
1366:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1367:       }
1368:     }
1369:   }
1370:   return(0);
1371: }

1375: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1376: {

1380:   if (idx) {PetscFree(*idx);}
1381:   if (v)   {PetscFree(*v);}
1382:   return(0);
1383: }

1387: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1388: {
1389:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1390:   Mat            C;
1392:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1393:   PetscInt       *rows,*cols,bs2=a->bs2;
1394:   MatScalar      *array;

1397:   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1398:   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1399:     PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1400:     PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1402:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1403:     MatCreate(((PetscObject)A)->comm,&C);
1404:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1405:     MatSetType(C,((PetscObject)A)->type_name);
1406:     MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1407:     PetscFree(col);
1408:   } else {
1409:     C = *B;
1410:   }

1412:   array = a->a;
1413:   PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1414:   cols = rows + bs;
1415:   for (i=0; i<mbs; i++) {
1416:     cols[0] = i*bs;
1417:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1418:     len = ai[i+1] - ai[i];
1419:     for (j=0; j<len; j++) {
1420:       rows[0] = (*aj++)*bs;
1421:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1422:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1423:       array += bs2;
1424:     }
1425:   }
1426:   PetscFree(rows);
1427: 
1428:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1429:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1430: 
1431:   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1432:     *B = C;
1433:   } else {
1434:     MatHeaderCopy(A,C);
1435:   }
1436:   return(0);
1437: }

1441: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1442: {
1443:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1445:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1446:   int            fd;
1447:   PetscScalar    *aa;
1448:   FILE           *file;

1451:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1452:   PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1453:   col_lens[0] = MAT_FILE_COOKIE;

1455:   col_lens[1] = A->rmap->N;
1456:   col_lens[2] = A->cmap->n;
1457:   col_lens[3] = a->nz*bs2;

1459:   /* store lengths of each row and write (including header) to file */
1460:   count = 0;
1461:   for (i=0; i<a->mbs; i++) {
1462:     for (j=0; j<bs; j++) {
1463:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1464:     }
1465:   }
1466:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1467:   PetscFree(col_lens);

1469:   /* store column indices (zero start index) */
1470:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1471:   count = 0;
1472:   for (i=0; i<a->mbs; i++) {
1473:     for (j=0; j<bs; j++) {
1474:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1475:         for (l=0; l<bs; l++) {
1476:           jj[count++] = bs*a->j[k] + l;
1477:         }
1478:       }
1479:     }
1480:   }
1481:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1482:   PetscFree(jj);

1484:   /* store nonzero values */
1485:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1486:   count = 0;
1487:   for (i=0; i<a->mbs; i++) {
1488:     for (j=0; j<bs; j++) {
1489:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1490:         for (l=0; l<bs; l++) {
1491:           aa[count++] = a->a[bs2*k + l*bs + j];
1492:         }
1493:       }
1494:     }
1495:   }
1496:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1497:   PetscFree(aa);

1499:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1500:   if (file) {
1501:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1502:   }
1503:   return(0);
1504: }

1508: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1509: {
1510:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1511:   PetscErrorCode    ierr;
1512:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1513:   PetscViewerFormat format;

1516:   PetscViewerGetFormat(viewer,&format);
1517:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1518:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1519:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1520:     Mat aij;
1521:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1522:     MatView(aij,viewer);
1523:     MatDestroy(aij);
1524:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1525:      return(0);
1526:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1527:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1528:     for (i=0; i<a->mbs; i++) {
1529:       for (j=0; j<bs; j++) {
1530:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1531:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1532:           for (l=0; l<bs; l++) {
1533: #if defined(PETSC_USE_COMPLEX)
1534:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1536:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1537:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1539:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1540:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1542:             }
1543: #else
1544:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1545:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1546:             }
1547: #endif
1548:           }
1549:         }
1550:         PetscViewerASCIIPrintf(viewer,"\n");
1551:       }
1552:     }
1553:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1554:   } else {
1555:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1556:     for (i=0; i<a->mbs; i++) {
1557:       for (j=0; j<bs; j++) {
1558:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1559:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1560:           for (l=0; l<bs; l++) {
1561: #if defined(PETSC_USE_COMPLEX)
1562:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1563:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1564:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1565:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1566:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1567:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1568:             } else {
1569:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1570:             }
1571: #else
1572:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1573: #endif
1574:           }
1575:         }
1576:         PetscViewerASCIIPrintf(viewer,"\n");
1577:       }
1578:     }
1579:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1580:   }
1581:   PetscViewerFlush(viewer);
1582:   return(0);
1583: }

1587: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1588: {
1589:   Mat            A = (Mat) Aa;
1590:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1592:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1593:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1594:   MatScalar      *aa;
1595:   PetscViewer    viewer;


1599:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1600:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1602:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1604:   /* loop over matrix elements drawing boxes */
1605:   color = PETSC_DRAW_BLUE;
1606:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1607:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1608:       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1609:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1610:       aa = a->a + j*bs2;
1611:       for (k=0; k<bs; k++) {
1612:         for (l=0; l<bs; l++) {
1613:           if (PetscRealPart(*aa++) >=  0.) continue;
1614:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1615:         }
1616:       }
1617:     }
1618:   }
1619:   color = PETSC_DRAW_CYAN;
1620:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1621:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1622:       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1623:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1624:       aa = a->a + j*bs2;
1625:       for (k=0; k<bs; k++) {
1626:         for (l=0; l<bs; l++) {
1627:           if (PetscRealPart(*aa++) != 0.) continue;
1628:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1629:         }
1630:       }
1631:     }
1632:   }

1634:   color = PETSC_DRAW_RED;
1635:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1636:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1637:       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1638:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1639:       aa = a->a + j*bs2;
1640:       for (k=0; k<bs; k++) {
1641:         for (l=0; l<bs; l++) {
1642:           if (PetscRealPart(*aa++) <= 0.) continue;
1643:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1644:         }
1645:       }
1646:     }
1647:   }
1648:   return(0);
1649: }

1653: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1654: {
1656:   PetscReal      xl,yl,xr,yr,w,h;
1657:   PetscDraw      draw;
1658:   PetscTruth     isnull;


1662:   PetscViewerDrawGetDraw(viewer,0,&draw);
1663:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1665:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1666:   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1667:   xr += w;    yr += h;  xl = -w;     yl = -h;
1668:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1669:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1670:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1671:   return(0);
1672: }

1676: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1677: {
1679:   PetscTruth     iascii,isbinary,isdraw;

1682:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1683:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1684:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1685:   if (iascii){
1686:     MatView_SeqBAIJ_ASCII(A,viewer);
1687:   } else if (isbinary) {
1688:     MatView_SeqBAIJ_Binary(A,viewer);
1689:   } else if (isdraw) {
1690:     MatView_SeqBAIJ_Draw(A,viewer);
1691:   } else {
1692:     Mat B;
1693:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1694:     MatView(B,viewer);
1695:     MatDestroy(B);
1696:   }
1697:   return(0);
1698: }


1703: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1704: {
1705:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1706:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1707:   PetscInt    *ai = a->i,*ailen = a->ilen;
1708:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1709:   MatScalar   *ap,*aa = a->a;

1712:   for (k=0; k<m; k++) { /* loop over rows */
1713:     row  = im[k]; brow = row/bs;
1714:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1715:     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1716:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1717:     nrow = ailen[brow];
1718:     for (l=0; l<n; l++) { /* loop over columns */
1719:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1720:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1721:       col  = in[l] ;
1722:       bcol = col/bs;
1723:       cidx = col%bs;
1724:       ridx = row%bs;
1725:       high = nrow;
1726:       low  = 0; /* assume unsorted */
1727:       while (high-low > 5) {
1728:         t = (low+high)/2;
1729:         if (rp[t] > bcol) high = t;
1730:         else             low  = t;
1731:       }
1732:       for (i=low; i<high; i++) {
1733:         if (rp[i] > bcol) break;
1734:         if (rp[i] == bcol) {
1735:           *v++ = ap[bs2*i+bs*cidx+ridx];
1736:           goto finished;
1737:         }
1738:       }
1739:       *v++ = 0.0;
1740:       finished:;
1741:     }
1742:   }
1743:   return(0);
1744: }

1746: #define CHUNKSIZE 10
1749: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1750: {
1751:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1752:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1753:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1754:   PetscErrorCode    ierr;
1755:   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1756:   PetscTruth        roworiented=a->roworiented;
1757:   const PetscScalar *value = v;
1758:   MatScalar         *ap,*aa = a->a,*bap;

1761:   if (roworiented) {
1762:     stepval = (n-1)*bs;
1763:   } else {
1764:     stepval = (m-1)*bs;
1765:   }
1766:   for (k=0; k<m; k++) { /* loop over added rows */
1767:     row  = im[k];
1768:     if (row < 0) continue;
1769: #if defined(PETSC_USE_DEBUG)  
1770:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1771: #endif
1772:     rp   = aj + ai[row];
1773:     ap   = aa + bs2*ai[row];
1774:     rmax = imax[row];
1775:     nrow = ailen[row];
1776:     low  = 0;
1777:     high = nrow;
1778:     for (l=0; l<n; l++) { /* loop over added columns */
1779:       if (in[l] < 0) continue;
1780: #if defined(PETSC_USE_DEBUG)  
1781:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1782: #endif
1783:       col = in[l];
1784:       if (roworiented) {
1785:         value = v + k*(stepval+bs)*bs + l*bs;
1786:       } else {
1787:         value = v + l*(stepval+bs)*bs + k*bs;
1788:       }
1789:       if (col <= lastcol) low = 0; else high = nrow;
1790:       lastcol = col;
1791:       while (high-low > 7) {
1792:         t = (low+high)/2;
1793:         if (rp[t] > col) high = t;
1794:         else             low  = t;
1795:       }
1796:       for (i=low; i<high; i++) {
1797:         if (rp[i] > col) break;
1798:         if (rp[i] == col) {
1799:           bap  = ap +  bs2*i;
1800:           if (roworiented) {
1801:             if (is == ADD_VALUES) {
1802:               for (ii=0; ii<bs; ii++,value+=stepval) {
1803:                 for (jj=ii; jj<bs2; jj+=bs) {
1804:                   bap[jj] += *value++;
1805:                 }
1806:               }
1807:             } else {
1808:               for (ii=0; ii<bs; ii++,value+=stepval) {
1809:                 for (jj=ii; jj<bs2; jj+=bs) {
1810:                   bap[jj] = *value++;
1811:                 }
1812:               }
1813:             }
1814:           } else {
1815:             if (is == ADD_VALUES) {
1816:               for (ii=0; ii<bs; ii++,value+=stepval) {
1817:                 for (jj=0; jj<bs; jj++) {
1818:                   *bap++ += *value++;
1819:                 }
1820:               }
1821:             } else {
1822:               for (ii=0; ii<bs; ii++,value+=stepval) {
1823:                 for (jj=0; jj<bs; jj++) {
1824:                   *bap++  = *value++;
1825:                 }
1826:               }
1827:             }
1828:           }
1829:           goto noinsert2;
1830:         }
1831:       }
1832:       if (nonew == 1) goto noinsert2;
1833:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1834:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1835:       N = nrow++ - 1; high++;
1836:       /* shift up all the later entries in this row */
1837:       for (ii=N; ii>=i; ii--) {
1838:         rp[ii+1] = rp[ii];
1839:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1840:       }
1841:       if (N >= i) {
1842:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1843:       }
1844:       rp[i] = col;
1845:       bap   = ap +  bs2*i;
1846:       if (roworiented) {
1847:         for (ii=0; ii<bs; ii++,value+=stepval) {
1848:           for (jj=ii; jj<bs2; jj+=bs) {
1849:             bap[jj] = *value++;
1850:           }
1851:         }
1852:       } else {
1853:         for (ii=0; ii<bs; ii++,value+=stepval) {
1854:           for (jj=0; jj<bs; jj++) {
1855:             *bap++  = *value++;
1856:           }
1857:         }
1858:       }
1859:       noinsert2:;
1860:       low = i;
1861:     }
1862:     ailen[row] = nrow;
1863:   }
1864:   return(0);
1865: }

1869: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1870: {
1871:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1872:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1873:   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1875:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1876:   MatScalar      *aa = a->a,*ap;
1877:   PetscReal      ratio=0.6;

1880:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1882:   if (m) rmax = ailen[0];
1883:   for (i=1; i<mbs; i++) {
1884:     /* move each row back by the amount of empty slots (fshift) before it*/
1885:     fshift += imax[i-1] - ailen[i-1];
1886:     rmax   = PetscMax(rmax,ailen[i]);
1887:     if (fshift) {
1888:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1889:       N = ailen[i];
1890:       for (j=0; j<N; j++) {
1891:         ip[j-fshift] = ip[j];
1892:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1893:       }
1894:     }
1895:     ai[i] = ai[i-1] + ailen[i-1];
1896:   }
1897:   if (mbs) {
1898:     fshift += imax[mbs-1] - ailen[mbs-1];
1899:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1900:   }
1901:   /* reset ilen and imax for each row */
1902:   for (i=0; i<mbs; i++) {
1903:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1904:   }
1905:   a->nz = ai[mbs];

1907:   /* diagonals may have moved, so kill the diagonal pointers */
1908:   a->idiagvalid = PETSC_FALSE;
1909:   if (fshift && a->diag) {
1910:     PetscFree(a->diag);
1911:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1912:     a->diag = 0;
1913:   }
1914:   if (fshift && a->nounused == -1) {
1915:     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1916:   }
1917:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1918:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1919:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1920:   a->reallocs          = 0;
1921:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1923:   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1924:   if (a->compressedrow.use){
1925:     Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1926:   }

1928:   A->same_nonzero = PETSC_TRUE;
1929:   return(0);
1930: }

1932: /* 
1933:    This function returns an array of flags which indicate the locations of contiguous
1934:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1935:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1936:    Assume: sizes should be long enough to hold all the values.
1937: */
1940: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1941: {
1942:   PetscInt   i,j,k,row;
1943:   PetscTruth flg;

1946:   for (i=0,j=0; i<n; j++) {
1947:     row = idx[i];
1948:     if (row%bs!=0) { /* Not the begining of a block */
1949:       sizes[j] = 1;
1950:       i++;
1951:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1952:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1953:       i++;
1954:     } else { /* Begining of the block, so check if the complete block exists */
1955:       flg = PETSC_TRUE;
1956:       for (k=1; k<bs; k++) {
1957:         if (row+k != idx[i+k]) { /* break in the block */
1958:           flg = PETSC_FALSE;
1959:           break;
1960:         }
1961:       }
1962:       if (flg) { /* No break in the bs */
1963:         sizes[j] = bs;
1964:         i+= bs;
1965:       } else {
1966:         sizes[j] = 1;
1967:         i++;
1968:       }
1969:     }
1970:   }
1971:   *bs_max = j;
1972:   return(0);
1973: }
1974: 
1977: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1978: {
1979:   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1981:   PetscInt       i,j,k,count,*rows;
1982:   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1983:   PetscScalar    zero = 0.0;
1984:   MatScalar      *aa;

1987:   /* Make a copy of the IS and  sort it */
1988:   /* allocate memory for rows,sizes */
1989:   PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1990:   sizes = rows + is_n;

1992:   /* copy IS values to rows, and sort them */
1993:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1994:   PetscSortInt(is_n,rows);
1995:   if (baij->keepzeroedrows) {
1996:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1997:     bs_max = is_n;
1998:     A->same_nonzero = PETSC_TRUE;
1999:   } else {
2000:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2001:     A->same_nonzero = PETSC_FALSE;
2002:   }

2004:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2005:     row   = rows[j];
2006:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2007:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2008:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2009:     if (sizes[i] == bs && !baij->keepzeroedrows) {
2010:       if (diag != 0.0) {
2011:         if (baij->ilen[row/bs] > 0) {
2012:           baij->ilen[row/bs]       = 1;
2013:           baij->j[baij->i[row/bs]] = row/bs;
2014:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2015:         }
2016:         /* Now insert all the diagonal values for this bs */
2017:         for (k=0; k<bs; k++) {
2018:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2019:         }
2020:       } else { /* (diag == 0.0) */
2021:         baij->ilen[row/bs] = 0;
2022:       } /* end (diag == 0.0) */
2023:     } else { /* (sizes[i] != bs) */
2024: #if defined (PETSC_USE_DEBUG)
2025:       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2026: #endif
2027:       for (k=0; k<count; k++) {
2028:         aa[0] =  zero;
2029:         aa    += bs;
2030:       }
2031:       if (diag != 0.0) {
2032:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2033:       }
2034:     }
2035:   }

2037:   PetscFree(rows);
2038:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2039:   return(0);
2040: }

2044: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2045: {
2046:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2047:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2048:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2049:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2051:   PetscInt       ridx,cidx,bs2=a->bs2;
2052:   PetscTruth     roworiented=a->roworiented;
2053:   MatScalar      *ap,value,*aa=a->a,*bap;

2056:   for (k=0; k<m; k++) { /* loop over added rows */
2057:     row  = im[k];
2058:     brow = row/bs;
2059:     if (row < 0) continue;
2060: #if defined(PETSC_USE_DEBUG)  
2061:     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2062: #endif
2063:     rp   = aj + ai[brow];
2064:     ap   = aa + bs2*ai[brow];
2065:     rmax = imax[brow];
2066:     nrow = ailen[brow];
2067:     low  = 0;
2068:     high = nrow;
2069:     for (l=0; l<n; l++) { /* loop over added columns */
2070:       if (in[l] < 0) continue;
2071: #if defined(PETSC_USE_DEBUG)  
2072:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2073: #endif
2074:       col = in[l]; bcol = col/bs;
2075:       ridx = row % bs; cidx = col % bs;
2076:       if (roworiented) {
2077:         value = v[l + k*n];
2078:       } else {
2079:         value = v[k + l*m];
2080:       }
2081:       if (col <= lastcol) low = 0; else high = nrow;
2082:       lastcol = col;
2083:       while (high-low > 7) {
2084:         t = (low+high)/2;
2085:         if (rp[t] > bcol) high = t;
2086:         else              low  = t;
2087:       }
2088:       for (i=low; i<high; i++) {
2089:         if (rp[i] > bcol) break;
2090:         if (rp[i] == bcol) {
2091:           bap  = ap +  bs2*i + bs*cidx + ridx;
2092:           if (is == ADD_VALUES) *bap += value;
2093:           else                  *bap  = value;
2094:           goto noinsert1;
2095:         }
2096:       }
2097:       if (nonew == 1) goto noinsert1;
2098:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2099:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2100:       N = nrow++ - 1; high++;
2101:       /* shift up all the later entries in this row */
2102:       for (ii=N; ii>=i; ii--) {
2103:         rp[ii+1] = rp[ii];
2104:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2105:       }
2106:       if (N>=i) {
2107:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2108:       }
2109:       rp[i]                      = bcol;
2110:       ap[bs2*i + bs*cidx + ridx] = value;
2111:       a->nz++;
2112:       noinsert1:;
2113:       low = i;
2114:     }
2115:     ailen[brow] = nrow;
2116:   }
2117:   A->same_nonzero = PETSC_FALSE;
2118:   return(0);
2119: }

2121: EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);

2125: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2126: {
2127:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2128:   Mat            outA;
2130:   PetscTruth     row_identity,col_identity;

2133:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2134:   ISIdentity(row,&row_identity);
2135:   ISIdentity(col,&col_identity);
2136:   if (!row_identity || !col_identity) {
2137:     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2138:   }

2140:   outA          = inA;
2141:   inA->factor   = MAT_FACTOR_LU;

2143:   MatMarkDiagonal_SeqBAIJ(inA);

2145:   PetscObjectReference((PetscObject)row);
2146:   if (a->row) { ISDestroy(a->row); }
2147:   a->row = row;
2148:   PetscObjectReference((PetscObject)col);
2149:   if (a->col) { ISDestroy(a->col); }
2150:   a->col = col;
2151: 
2152:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2153:   if (a->icol) {
2154:     ISDestroy(a->icol);
2155:   }
2156:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2157:   PetscLogObjectParent(inA,a->icol);
2158: 
2159:   MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));
2160:   if (!a->solve_work) {
2161:     PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2162:     PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2163:   }
2164:   MatLUFactorNumeric(outA,inA,info);

2166:   return(0);
2167: }

2172: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2173: {
2174:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2175:   PetscInt    i,nz,nbs;

2178:   nz  = baij->maxnz/baij->bs2;
2179:   nbs = baij->nbs;
2180:   for (i=0; i<nz; i++) {
2181:     baij->j[i] = indices[i];
2182:   }
2183:   baij->nz = nz;
2184:   for (i=0; i<nbs; i++) {
2185:     baij->ilen[i] = baij->imax[i];
2186:   }

2188:   return(0);
2189: }

2194: /*@
2195:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2196:        in the matrix.

2198:   Input Parameters:
2199: +  mat - the SeqBAIJ matrix
2200: -  indices - the column indices

2202:   Level: advanced

2204:   Notes:
2205:     This can be called if you have precomputed the nonzero structure of the 
2206:   matrix and want to provide it to the matrix object to improve the performance
2207:   of the MatSetValues() operation.

2209:     You MUST have set the correct numbers of nonzeros per row in the call to 
2210:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2212:     MUST be called before any calls to MatSetValues();

2214: @*/
2215: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2216: {
2217:   PetscErrorCode ierr,(*f)(Mat,PetscInt *);

2222:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
2223:   if (f) {
2224:     (*f)(mat,indices);
2225:   } else {
2226:     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2227:   }
2228:   return(0);
2229: }

2233: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2234: {
2235:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2237:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2238:   PetscReal      atmp;
2239:   PetscScalar    *x,zero = 0.0;
2240:   MatScalar      *aa;
2241:   PetscInt       ncols,brow,krow,kcol;

2244:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2245:   bs   = A->rmap->bs;
2246:   aa   = a->a;
2247:   ai   = a->i;
2248:   aj   = a->j;
2249:   mbs  = a->mbs;

2251:   VecSet(v,zero);
2252:   VecGetArray(v,&x);
2253:   VecGetLocalSize(v,&n);
2254:   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2255:   for (i=0; i<mbs; i++) {
2256:     ncols = ai[1] - ai[0]; ai++;
2257:     brow  = bs*i;
2258:     for (j=0; j<ncols; j++){
2259:       for (kcol=0; kcol<bs; kcol++){
2260:         for (krow=0; krow<bs; krow++){
2261:           atmp = PetscAbsScalar(*aa);aa++;
2262:           row = brow + krow;    /* row index */
2263:           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2264:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2265:         }
2266:       }
2267:       aj++;
2268:     }
2269:   }
2270:   VecRestoreArray(v,&x);
2271:   return(0);
2272: }

2276: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2277: {

2281:   /* If the two matrices have the same copy implementation, use fast copy. */
2282:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2283:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2284:     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;

2286:     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2287:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2288:     }
2289:     PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));
2290:   } else {
2291:     MatCopy_Basic(A,B,str);
2292:   }
2293:   return(0);
2294: }

2298: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2299: {

2303:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);
2304:   return(0);
2305: }

2309: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2310: {
2311:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2313:   *array = a->a;
2314:   return(0);
2315: }

2319: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2320: {
2322:   return(0);
2323: }

2325:  #include petscblaslapack.h
2328: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2329: {
2330:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2332:   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2333:   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);

2336:   if (str == SAME_NONZERO_PATTERN) {
2337:     PetscScalar alpha = a;
2338:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2339:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2340:     if (y->xtoy && y->XtoY != X) {
2341:       PetscFree(y->xtoy);
2342:       MatDestroy(y->XtoY);
2343:     }
2344:     if (!y->xtoy) { /* get xtoy */
2345:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2346:       y->XtoY = X;
2347:       PetscObjectReference((PetscObject)X);
2348:     }
2349:     bs2 = bs*bs;
2350:     for (i=0; i<x->nz; i++) {
2351:       j = 0;
2352:       while (j < bs2){
2353:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2354:         j++;
2355:       }
2356:     }
2357:     PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2358:   } else {
2359:     MatAXPY_Basic(Y,a,X,str);
2360:   }
2361:   return(0);
2362: }

2366: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2367: {
2368:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2369:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2370:   MatScalar      *aa = a->a;

2373:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2374:   return(0);
2375: }

2379: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2380: {
2381:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2382:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2383:   MatScalar      *aa = a->a;

2386:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2387:   return(0);
2388: }


2391: /* -------------------------------------------------------------------*/
2392: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2393:        MatGetRow_SeqBAIJ,
2394:        MatRestoreRow_SeqBAIJ,
2395:        MatMult_SeqBAIJ_N,
2396: /* 4*/ MatMultAdd_SeqBAIJ_N,
2397:        MatMultTranspose_SeqBAIJ,
2398:        MatMultTransposeAdd_SeqBAIJ,
2399:        0,
2400:        0,
2401:        0,
2402: /*10*/ 0,
2403:        MatLUFactor_SeqBAIJ,
2404:        0,
2405:        0,
2406:        MatTranspose_SeqBAIJ,
2407: /*15*/ MatGetInfo_SeqBAIJ,
2408:        MatEqual_SeqBAIJ,
2409:        MatGetDiagonal_SeqBAIJ,
2410:        MatDiagonalScale_SeqBAIJ,
2411:        MatNorm_SeqBAIJ,
2412: /*20*/ 0,
2413:        MatAssemblyEnd_SeqBAIJ,
2414:        0,
2415:        MatSetOption_SeqBAIJ,
2416:        MatZeroEntries_SeqBAIJ,
2417: /*25*/ MatZeroRows_SeqBAIJ,
2418:        0,
2419:        0,
2420:        0,
2421:        0,
2422: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2423:        0,
2424:        0,
2425:        MatGetArray_SeqBAIJ,
2426:        MatRestoreArray_SeqBAIJ,
2427: /*35*/ MatDuplicate_SeqBAIJ,
2428:        0,
2429:        0,
2430:        MatILUFactor_SeqBAIJ,
2431:        0,
2432: /*40*/ MatAXPY_SeqBAIJ,
2433:        MatGetSubMatrices_SeqBAIJ,
2434:        MatIncreaseOverlap_SeqBAIJ,
2435:        MatGetValues_SeqBAIJ,
2436:        MatCopy_SeqBAIJ,
2437: /*45*/ 0,
2438:        MatScale_SeqBAIJ,
2439:        0,
2440:        0,
2441:        0,
2442: /*50*/ 0,
2443:        MatGetRowIJ_SeqBAIJ,
2444:        MatRestoreRowIJ_SeqBAIJ,
2445:        0,
2446:        0,
2447: /*55*/ 0,
2448:        0,
2449:        0,
2450:        0,
2451:        MatSetValuesBlocked_SeqBAIJ,
2452: /*60*/ MatGetSubMatrix_SeqBAIJ,
2453:        MatDestroy_SeqBAIJ,
2454:        MatView_SeqBAIJ,
2455:        0,
2456:        0,
2457: /*65*/ 0,
2458:        0,
2459:        0,
2460:        0,
2461:        0,
2462: /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2463:        0,
2464:        MatConvert_Basic,
2465:        0,
2466:        0,
2467: /*75*/ 0,
2468:        0,
2469:        0,
2470:        0,
2471:        0,
2472: /*80*/ 0,
2473:        0,
2474:        0,
2475:        0,
2476:        MatLoad_SeqBAIJ,
2477: /*85*/ 0,
2478:        0,
2479:        0,
2480:        0,
2481:        0,
2482: /*90*/ 0,
2483:        0,
2484:        0,
2485:        0,
2486:        0,
2487: /*95*/ 0,
2488:        0,
2489:        0,
2490:        0,
2491:        0,
2492: /*100*/0,
2493:        0,
2494:        0,
2495:        0,
2496:        0,
2497: /*105*/0,
2498:        MatRealPart_SeqBAIJ,
2499:        MatImaginaryPart_SeqBAIJ,
2500:        0,
2501:        0,
2502: /*110*/0,
2503:        0,
2504:        0,
2505:        0,
2506:        MatMissingDiagonal_SeqBAIJ
2507: /*115*/
2508: };

2513: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2514: {
2515:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2516:   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;

2520:   if (aij->nonew != 1) {
2521:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2522:   }

2524:   /* allocate space for values if not already there */
2525:   if (!aij->saved_values) {
2526:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2527:   }

2529:   /* copy values over */
2530:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2531:   return(0);
2532: }

2538: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2539: {
2540:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2542:   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;

2545:   if (aij->nonew != 1) {
2546:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2547:   }
2548:   if (!aij->saved_values) {
2549:     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2550:   }

2552:   /* copy values over */
2553:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2554:   return(0);
2555: }


2566: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2567: {
2568:   Mat_SeqBAIJ    *b;
2570:   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2571:   PetscTruth     flg,skipallocation = PETSC_FALSE;


2575:   if (nz == MAT_SKIP_ALLOCATION) {
2576:     skipallocation = PETSC_TRUE;
2577:     nz             = 0;
2578:   }

2580:   if (bs < 0) {
2581:     PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2582:       PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
2583:     PetscOptionsEnd();
2584:     bs   = PetscAbs(bs);
2585:   }
2586:   if (nnz && newbs != bs) {
2587:     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2588:   }
2589:   bs   = newbs;

2591:   PetscMapSetBlockSize(B->rmap,bs);
2592:   PetscMapSetBlockSize(B->cmap,bs);
2593:   PetscMapSetUp(B->rmap);
2594:   PetscMapSetUp(B->cmap);

2596:   B->preallocated = PETSC_TRUE;

2598:   mbs  = B->rmap->n/bs;
2599:   nbs  = B->cmap->n/bs;
2600:   bs2  = bs*bs;

2602:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2603:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2604:   }

2606:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2607:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2608:   if (nnz) {
2609:     for (i=0; i<mbs; i++) {
2610:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2611:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2612:     }
2613:   }

2615:   b       = (Mat_SeqBAIJ*)B->data;
2616:   PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2617:     PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2618:   PetscOptionsEnd();

2620:   if (!flg) {
2621:     switch (bs) {
2622:     case 1:
2623:       B->ops->mult            = MatMult_SeqBAIJ_1;
2624:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2625:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2626:       break;
2627:     case 2:
2628:       B->ops->mult            = MatMult_SeqBAIJ_2;
2629:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2630:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2631:       break;
2632:     case 3:
2633:       B->ops->mult            = MatMult_SeqBAIJ_3;
2634:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2635:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2636:       break;
2637:     case 4:
2638:       B->ops->mult            = MatMult_SeqBAIJ_4;
2639:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2640:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2641:       break;
2642:     case 5:
2643:       B->ops->mult            = MatMult_SeqBAIJ_5;
2644:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2645:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2646:       break;
2647:     case 6:
2648:       B->ops->mult            = MatMult_SeqBAIJ_6;
2649:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2650:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2651:       break;
2652:     case 7:
2653:       B->ops->mult            = MatMult_SeqBAIJ_7;
2654:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2655:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2656:       break;
2657:     default:
2658:       B->ops->mult            = MatMult_SeqBAIJ_N;
2659:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2660:       break;
2661:     }
2662:   }
2663:   B->rmap->bs      = bs;
2664:   b->mbs     = mbs;
2665:   b->nbs     = nbs;
2666:   if (!skipallocation) {
2667:     if (!b->imax) {
2668:       PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2669:     }
2670:     /* b->ilen will count nonzeros in each block row so far. */
2671:     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2672:     if (!nnz) {
2673:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2674:       else if (nz <= 0)        nz = 1;
2675:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2676:       nz = nz*mbs;
2677:     } else {
2678:       nz = 0;
2679:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2680:     }

2682:     /* allocate the matrix space */
2683:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2684:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
2685:     PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2686:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2687:     PetscMemzero(b->j,nz*sizeof(PetscInt));
2688:     b->singlemalloc = PETSC_TRUE;
2689:     b->i[0] = 0;
2690:     for (i=1; i<mbs+1; i++) {
2691:       b->i[i] = b->i[i-1] + b->imax[i-1];
2692:     }
2693:     b->free_a     = PETSC_TRUE;
2694:     b->free_ij    = PETSC_TRUE;
2695:   } else {
2696:     b->free_a     = PETSC_FALSE;
2697:     b->free_ij    = PETSC_FALSE;
2698:   }

2700:   B->rmap->bs          = bs;
2701:   b->bs2              = bs2;
2702:   b->mbs              = mbs;
2703:   b->nz               = 0;
2704:   b->maxnz            = nz*bs2;
2705:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2706:   return(0);
2707: }

2713: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2714: {
2715:   PetscInt       i,m,nz,nz_max=0,*nnz;
2716:   PetscScalar    *values=0;


2721:   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);

2723:   PetscMapSetBlockSize(B->rmap,bs);
2724:   PetscMapSetBlockSize(B->cmap,bs);
2725:   PetscMapSetUp(B->rmap);
2726:   PetscMapSetUp(B->cmap);
2727:   m = B->rmap->n/bs;

2729:   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2730:   PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
2731:   for(i=0; i<m; i++) {
2732:     nz = ii[i+1]- ii[i];
2733:     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2734:     nz_max = PetscMax(nz_max, nz);
2735:     nnz[i] = nz;
2736:   }
2737:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2738:   PetscFree(nnz);

2740:   values = (PetscScalar*)V;
2741:   if (!values) {
2742:     PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
2743:     PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
2744:   }
2745:   for (i=0; i<m; i++) {
2746:     PetscInt          ncols  = ii[i+1] - ii[i];
2747:     const PetscInt    *icols = jj + ii[i];
2748:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2749:     MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2750:   }
2751:   if (!V) { PetscFree(values); }
2752:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2753:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2755:   return(0);
2756: }



2765: /*MC
2766:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
2767:    block sparse compressed row format.

2769:    Options Database Keys:
2770: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

2772:   Level: beginner

2774: .seealso: MatCreateSeqBAIJ()
2775: M*/


2781: PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
2782: {
2784:   PetscMPIInt    size;
2785:   Mat_SeqBAIJ    *b;

2788:   MPI_Comm_size(((PetscObject)B)->comm,&size);
2789:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

2791:   PetscNewLog(B,Mat_SeqBAIJ,&b);
2792:   B->data = (void*)b;
2793:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2794:   B->mapping          = 0;
2795:   b->row              = 0;
2796:   b->col              = 0;
2797:   b->icol             = 0;
2798:   b->reallocs         = 0;
2799:   b->saved_values     = 0;

2801:   b->roworiented      = PETSC_TRUE;
2802:   b->nonew            = 0;
2803:   b->diag             = 0;
2804:   b->solve_work       = 0;
2805:   b->mult_work        = 0;
2806:   B->spptr            = 0;
2807:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2808:   b->keepzeroedrows   = PETSC_FALSE;
2809:   b->xtoy              = 0;
2810:   b->XtoY              = 0;
2811:   b->compressedrow.use     = PETSC_FALSE;
2812:   b->compressedrow.nrows   = 0;
2813:   b->compressedrow.i       = PETSC_NULL;
2814:   b->compressedrow.rindex  = PETSC_NULL;
2815:   b->compressedrow.checked = PETSC_FALSE;
2816:   B->same_nonzero          = PETSC_FALSE;

2818:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2819:                                      "MatGetFactorAvailable_seqbaij_petsc",
2820:                                      MatGetFactorAvailable_seqbaij_petsc);
2821:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2822:                                      "MatGetFactor_seqbaij_petsc",
2823:                                      MatGetFactor_seqbaij_petsc);
2824:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2825:                                      "MatInvertBlockDiagonal_SeqBAIJ",
2826:                                       MatInvertBlockDiagonal_SeqBAIJ);
2827:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2828:                                      "MatStoreValues_SeqBAIJ",
2829:                                       MatStoreValues_SeqBAIJ);
2830:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2831:                                      "MatRetrieveValues_SeqBAIJ",
2832:                                       MatRetrieveValues_SeqBAIJ);
2833:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2834:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2835:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
2836:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2837:                                      "MatConvert_SeqBAIJ_SeqAIJ",
2838:                                       MatConvert_SeqBAIJ_SeqAIJ);
2839:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2840:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2841:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
2842:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2843:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2844:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
2845:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2846:                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2847:                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
2848:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2849:   return(0);
2850: }

2855: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2856: {
2857:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2859:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

2862:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

2864:   PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2865:   for (i=0; i<mbs; i++) {
2866:     c->imax[i] = a->imax[i];
2867:     c->ilen[i] = a->ilen[i];
2868:   }

2870:   /* allocate the matrix space */
2871:   PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2872:   c->singlemalloc = PETSC_TRUE;
2873:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2874:   if (mbs > 0) {
2875:     PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2876:     if (cpvalues == MAT_COPY_VALUES) {
2877:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2878:     } else {
2879:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2880:     }
2881:   }
2882:   c->roworiented = a->roworiented;
2883:   c->nonew       = a->nonew;
2884:   PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);
2885:   PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);
2886:   c->bs2         = a->bs2;
2887:   c->mbs         = a->mbs;
2888:   c->nbs         = a->nbs;

2890:   if (a->diag) {
2891:     PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2892:     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2893:     for (i=0; i<mbs; i++) {
2894:       c->diag[i] = a->diag[i];
2895:     }
2896:   } else c->diag        = 0;
2897:   c->nz                 = a->nz;
2898:   c->maxnz              = a->maxnz;
2899:   c->solve_work         = 0;
2900:   c->mult_work          = 0;
2901:   c->free_a             = PETSC_TRUE;
2902:   c->free_ij            = PETSC_TRUE;
2903:   C->preallocated       = PETSC_TRUE;
2904:   C->assembled          = PETSC_TRUE;

2906:   c->compressedrow.use     = a->compressedrow.use;
2907:   c->compressedrow.nrows   = a->compressedrow.nrows;
2908:   c->compressedrow.checked = a->compressedrow.checked;
2909:   if ( a->compressedrow.checked && a->compressedrow.use){
2910:     i = a->compressedrow.nrows;
2911:     PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2912:     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2913:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2914:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2915:   } else {
2916:     c->compressedrow.use    = PETSC_FALSE;
2917:     c->compressedrow.i      = PETSC_NULL;
2918:     c->compressedrow.rindex = PETSC_NULL;
2919:   }
2920:   C->same_nonzero = A->same_nonzero;
2921:   PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2922:   return(0);
2923: }

2927: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2928: {

2932:   MatCreate(((PetscObject)A)->comm,B);
2933:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
2934:   MatSetType(*B,MATSEQBAIJ);
2935:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2936:   return(0);
2937: }

2941: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2942: {
2943:   Mat_SeqBAIJ    *a;
2944:   Mat            B;
2946:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2947:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2948:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2949:   PetscInt       *masked,nmask,tmp,bs2,ishift;
2950:   PetscMPIInt    size;
2951:   int            fd;
2952:   PetscScalar    *aa;
2953:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

2956:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2957:     PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2958:   PetscOptionsEnd();
2959:   bs2  = bs*bs;

2961:   MPI_Comm_size(comm,&size);
2962:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2963:   PetscViewerBinaryGetDescriptor(viewer,&fd);
2964:   PetscBinaryRead(fd,header,4,PETSC_INT);
2965:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2966:   M = header[1]; N = header[2]; nz = header[3];

2968:   if (header[3] < 0) {
2969:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2970:   }

2972:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

2974:   /* 
2975:      This code adds extra rows to make sure the number of rows is 
2976:     divisible by the blocksize
2977:   */
2978:   mbs        = M/bs;
2979:   extra_rows = bs - M + bs*(mbs);
2980:   if (extra_rows == bs) extra_rows = 0;
2981:   else                  mbs++;
2982:   if (extra_rows) {
2983:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2984:   }

2986:   /* read in row lengths */
2987:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2988:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2989:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

2991:   /* read in column indices */
2992:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2993:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
2994:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

2996:   /* loop over row lengths determining block row lengths */
2997:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2998:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2999:   PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
3000:   PetscMemzero(mask,mbs*sizeof(PetscInt));
3001:   masked   = mask + mbs;
3002:   rowcount = 0; nzcount = 0;
3003:   for (i=0; i<mbs; i++) {
3004:     nmask = 0;
3005:     for (j=0; j<bs; j++) {
3006:       kmax = rowlengths[rowcount];
3007:       for (k=0; k<kmax; k++) {
3008:         tmp = jj[nzcount++]/bs;
3009:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3010:       }
3011:       rowcount++;
3012:     }
3013:     browlengths[i] += nmask;
3014:     /* zero out the mask elements we set */
3015:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3016:   }

3018:   /* create our matrix */
3019:   MatCreate(comm,&B);
3020:   MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3021:   MatSetType(B,type);
3022:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
3023:   a = (Mat_SeqBAIJ*)B->data;

3025:   /* set matrix "i" values */
3026:   a->i[0] = 0;
3027:   for (i=1; i<= mbs; i++) {
3028:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3029:     a->ilen[i-1] = browlengths[i-1];
3030:   }
3031:   a->nz         = 0;
3032:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3034:   /* read in nonzero values */
3035:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3036:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3037:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3039:   /* set "a" and "j" values into matrix */
3040:   nzcount = 0; jcount = 0;
3041:   for (i=0; i<mbs; i++) {
3042:     nzcountb = nzcount;
3043:     nmask    = 0;
3044:     for (j=0; j<bs; j++) {
3045:       kmax = rowlengths[i*bs+j];
3046:       for (k=0; k<kmax; k++) {
3047:         tmp = jj[nzcount++]/bs;
3048:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3049:       }
3050:     }
3051:     /* sort the masked values */
3052:     PetscSortInt(nmask,masked);

3054:     /* set "j" values into matrix */
3055:     maskcount = 1;
3056:     for (j=0; j<nmask; j++) {
3057:       a->j[jcount++]  = masked[j];
3058:       mask[masked[j]] = maskcount++;
3059:     }
3060:     /* set "a" values into matrix */
3061:     ishift = bs2*a->i[i];
3062:     for (j=0; j<bs; j++) {
3063:       kmax = rowlengths[i*bs+j];
3064:       for (k=0; k<kmax; k++) {
3065:         tmp       = jj[nzcountb]/bs ;
3066:         block     = mask[tmp] - 1;
3067:         point     = jj[nzcountb] - bs*tmp;
3068:         idx       = ishift + bs2*block + j + bs*point;
3069:         a->a[idx] = (MatScalar)aa[nzcountb++];
3070:       }
3071:     }
3072:     /* zero out the mask elements we set */
3073:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3074:   }
3075:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3077:   PetscFree(rowlengths);
3078:   PetscFree(browlengths);
3079:   PetscFree(aa);
3080:   PetscFree(jj);
3081:   PetscFree(mask);

3083:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3084:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3085:   MatView_Private(B);

3087:   *A = B;
3088:   return(0);
3089: }

3093: /*@C
3094:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3095:    compressed row) format.  For good matrix assembly performance the
3096:    user should preallocate the matrix storage by setting the parameter nz
3097:    (or the array nnz).  By setting these parameters accurately, performance
3098:    during matrix assembly can be increased by more than a factor of 50.

3100:    Collective on MPI_Comm

3102:    Input Parameters:
3103: +  comm - MPI communicator, set to PETSC_COMM_SELF
3104: .  bs - size of block
3105: .  m - number of rows
3106: .  n - number of columns
3107: .  nz - number of nonzero blocks  per block row (same for all rows)
3108: -  nnz - array containing the number of nonzero blocks in the various block rows 
3109:          (possibly different for each block row) or PETSC_NULL

3111:    Output Parameter:
3112: .  A - the matrix 

3114:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3115:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3116:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3118:    Options Database Keys:
3119: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3120:                      block calculations (much slower)
3121: .    -mat_block_size - size of the blocks to use

3123:    Level: intermediate

3125:    Notes:
3126:    The number of rows and columns must be divisible by blocksize.

3128:    If the nnz parameter is given then the nz parameter is ignored

3130:    A nonzero block is any block that as 1 or more nonzeros in it

3132:    The block AIJ format is fully compatible with standard Fortran 77
3133:    storage.  That is, the stored row and column indices can begin at
3134:    either one (as in Fortran) or zero.  See the users' manual for details.

3136:    Specify the preallocated storage with either nz or nnz (not both).
3137:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3138:    allocation.  For additional details, see the users manual chapter on
3139:    matrices.

3141: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3142: @*/
3143: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3144: {
3146: 
3148:   MatCreate(comm,A);
3149:   MatSetSizes(*A,m,n,m,n);
3150:   MatSetType(*A,MATSEQBAIJ);
3151:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3152:   return(0);
3153: }

3157: /*@C
3158:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3159:    per row in the matrix. For good matrix assembly performance the
3160:    user should preallocate the matrix storage by setting the parameter nz
3161:    (or the array nnz).  By setting these parameters accurately, performance
3162:    during matrix assembly can be increased by more than a factor of 50.

3164:    Collective on MPI_Comm

3166:    Input Parameters:
3167: +  A - the matrix
3168: .  bs - size of block
3169: .  nz - number of block nonzeros per block row (same for all rows)
3170: -  nnz - array containing the number of block nonzeros in the various block rows 
3171:          (possibly different for each block row) or PETSC_NULL

3173:    Options Database Keys:
3174: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3175:                      block calculations (much slower)
3176: .    -mat_block_size - size of the blocks to use

3178:    Level: intermediate

3180:    Notes:
3181:    If the nnz parameter is given then the nz parameter is ignored

3183:    You can call MatGetInfo() to get information on how effective the preallocation was;
3184:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3185:    You can also run with the option -info and look for messages with the string 
3186:    malloc in them to see if additional memory allocation was needed.

3188:    The block AIJ format is fully compatible with standard Fortran 77
3189:    storage.  That is, the stored row and column indices can begin at
3190:    either one (as in Fortran) or zero.  See the users' manual for details.

3192:    Specify the preallocated storage with either nz or nnz (not both).
3193:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3194:    allocation.  For additional details, see the users manual chapter on
3195:    matrices.

3197: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3198: @*/
3199: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3200: {
3201:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);

3204:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
3205:   if (f) {
3206:     (*f)(B,bs,nz,nnz);
3207:   }
3208:   return(0);
3209: }

3213: /*@C
3214:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3215:    (the default sequential PETSc format).  

3217:    Collective on MPI_Comm

3219:    Input Parameters:
3220: +  A - the matrix 
3221: .  i - the indices into j for the start of each local row (starts with zero)
3222: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3223: -  v - optional values in the matrix

3225:    Level: developer

3227: .keywords: matrix, aij, compressed row, sparse

3229: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3230: @*/
3231: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3232: {
3233:   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);

3236:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);
3237:   if (f) {
3238:     (*f)(B,bs,i,j,v);
3239:   }
3240:   return(0);
3241: }


3246: /*@
3247:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 
3248:               (upper triangular entries in CSR format) provided by the user.

3250:      Collective on MPI_Comm

3252:    Input Parameters:
3253: +  comm - must be an MPI communicator of size 1
3254: .  bs - size of block
3255: .  m - number of rows
3256: .  n - number of columns
3257: .  i - row indices
3258: .  j - column indices
3259: -  a - matrix values

3261:    Output Parameter:
3262: .  mat - the matrix

3264:    Level: intermediate

3266:    Notes:
3267:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3268:     once the matrix is destroyed

3270:        You cannot set new nonzero locations into this matrix, that will generate an error.

3272:        The i and j indices are 0 based

3274: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()

3276: @*/
3277: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3278: {
3280:   PetscInt       ii;
3281:   Mat_SeqBAIJ    *baij;

3284:   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3285:   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3286: 
3287:   MatCreate(comm,mat);
3288:   MatSetSizes(*mat,m,n,m,n);
3289:   MatSetType(*mat,MATSEQBAIJ);
3290:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3291:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3292:   PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);

3294:   baij->i = i;
3295:   baij->j = j;
3296:   baij->a = a;
3297:   baij->singlemalloc = PETSC_FALSE;
3298:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3299:   baij->free_a       = PETSC_FALSE;
3300:   baij->free_ij       = PETSC_FALSE;

3302:   for (ii=0; ii<m; ii++) {
3303:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3304: #if defined(PETSC_USE_DEBUG)
3305:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3306: #endif    
3307:   }
3308: #if defined(PETSC_USE_DEBUG)
3309:   for (ii=0; ii<baij->i[m]; ii++) {
3310:     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3311:     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3312:   }
3313: #endif    

3315:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3316:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3317:   return(0);
3318: }