Actual source code: bdfact.c

  2: /* Block diagonal matrix format - factorization and triangular solves */

 4:  #include src/mat/impls/bdiag/seq/bdiag.h
 5:  #include src/inline/ilu.h

  9: PetscErrorCode MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
 10: {
 11:   PetscTruth     idn;

 15:   if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 16:   if (isrow) {
 17:     ISIdentity(isrow,&idn);
 18:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 19:   }
 20:   if (iscol) {
 21:     ISIdentity(iscol,&idn);
 22:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 23:   }
 24:   if (info->levels != 0) {
 25:     SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 26:   }
 27:   MatConvert(A,MATSAME,B);

 29:   /* Must set to zero for repeated calls with different nonzero structure */
 30:   (*B)->factor = 0;
 31:   return(0);
 32: }

 36: PetscErrorCode MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info)
 37: {
 38:   PetscTruth     idn;

 42:   /* For now, no fill is allocated in symbolic factorization phase, so we
 43:      directly use the input matrix for numeric factorization. */
 44:   if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 45:   if (isrow) {
 46:     ISIdentity(isrow,&idn);
 47:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 48:   }
 49:   if (iscol) {
 50:     ISIdentity(iscol,&idn);
 51:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 52:   }
 53:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 54:   MatLUFactorNumeric(A,&A);
 55:   return(0);
 56: }

 58: /* --------------------------------------------------------------------------*/
 61: PetscErrorCode MatLUFactorNumeric_SeqBDiag_N(Mat A,Mat *B)
 62: {
 63:   Mat            C = *B;
 64:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
 65:   PetscInt       k,d,d2,dgk,elim_row,elim_col,bs = A->bs,knb,knb2,bs2 = bs*bs;
 67:   PetscInt       dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock;
 68:   PetscInt       *diag = a->diag, m = A->m,mainbd = a->mainbd,*dgptr,len,i;
 69:   PetscScalar    **dv = a->diagv,*dd = dv[mainbd],*v_work;
 70:   PetscScalar    *multiplier;

 73:   /* Copy input matrix to factored matrix if we've already factored the
 74:      matrix before AND the nonzero structure remains the same.  This is done
 75:      in symbolic factorization the first time through, but there's no symbolic
 76:      factorization for successive calls with same matrix sparsity structure. */
 77:   if (C->factor == FACTOR_LU) {
 78:     for (i=0; i<a->nd; i++) {
 79:       len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
 80:       d   = diag[i];
 81:       if (d > 0) {
 82:         PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
 83:       } else {
 84:         PetscMemcpy(dv[i],a1->diagv[i],len);
 85:       }
 86:     }
 87:   }

 89:   if (!a->pivot) {
 90:     PetscMalloc((m+1)*sizeof(PetscInt),&a->pivot);
 91:     PetscLogObjectMemory(C,m*sizeof(PetscInt));
 92:   }
 93:   PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
 94:   multiplier = v_work + bs;
 95:   PetscMalloc((mblock+nblock+1)*sizeof(PetscInt),&dgptr);
 96:   PetscMemzero(dgptr,(mblock+nblock)*sizeof(PetscInt));
 97:   for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
 98:   for (k=0; k<mblock; k++) { /* k = block pivot_row */
 99:     knb = k*bs; knb2 = knb*bs;
100:     /* invert the diagonal block */
101:     Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
102:     for (d=mainbd-1; d>=0; d--) {
103:       elim_row = k + diag[d];
104:       if (elim_row < mblock) { /* sweep down */
105:         /* dv[d][knb2]: test if entire block is zero? */
106:         Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
107:         for (d2=d+1; d2<nd; d2++) {
108:           elim_col = elim_row - diag[d2];
109:           if (elim_col >=0 && elim_col < nblock) {
110:             dgk = k - elim_col;
111:             if ((dnum = dgptr[dgk+mblock])) {
112:               Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
113:                              &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
114:             }
115:           }
116:         }
117:       }
118:     }
119:   }
120:   PetscFree(dgptr);
121:   PetscFree(v_work);
122:   if (!a->solvework) {
123:     PetscMalloc(bs*sizeof(PetscScalar),&a->solvework);
124:   }
125:   C->factor = FACTOR_LU;
126:   return(0);
127: }

131: PetscErrorCode MatLUFactorNumeric_SeqBDiag_1(Mat A,Mat *B)
132: {
133:   Mat            C = *B;
134:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
136:   PetscInt       k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len;
137:   PetscInt       *diag = a->diag,n = A->n,m = A->m,mainbd = a->mainbd,*dgptr;
138:   PetscScalar    **dv = a->diagv,*dd = dv[mainbd],mult;

141:   /* Copy input matrix to factored matrix if we've already factored the
142:      matrix before AND the nonzero structure remains the same.  This is done
143:      in symbolic factorization the first time through, but there's no symbolic
144:      factorization for successive calls with same matrix sparsity structure. */
145:   if (C->factor == FACTOR_LU) {
146:     for (i=0; i<nd; i++) {
147:       len = a->bdlen[i] * sizeof(PetscScalar);
148:       d   = diag[i];
149:       if (d > 0) {
150:         PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
151:       } else {
152:         PetscMemcpy(dv[i],a1->diagv[i],len);
153:       }
154:     }
155:   }

157:   PetscMalloc((m+n+1)*sizeof(PetscInt),&dgptr);
158:   PetscMemzero(dgptr,(m+n)*sizeof(PetscInt));
159:   for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
160:   for (k=0; k<m; k++) { /* k = pivot_row */
161:     dd[k] = 1.0/dd[k];
162:     for (d=mainbd-1; d>=0; d--) {
163:       elim_row = k + diag[d];
164:       if (elim_row < m) { /* sweep down */
165:         if (dv[d][elim_row] != 0.0) {
166:           dv[d][elim_row] *= dd[k];
167:           mult = dv[d][elim_row];
168:           for (d2=d+1; d2<nd; d2++) {
169:             elim_col = elim_row - diag[d2];
170:             dgk = k - elim_col;
171:             if (elim_col >=0 && elim_col < n) {
172:               if ((dnum = dgptr[dgk+m])) {
173:                 dv[d2][elim_row] -= mult * dv[dnum-1][k];
174:               }
175:             }
176:           }
177:         }
178:       }
179:     }
180:   }
181:   PetscFree(dgptr);
182:   C->factor = FACTOR_LU;
183:   return(0);
184: }

186: /* -----------------------------------------------------------------*/

190: PetscErrorCode MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
191: {
192:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
194:   PetscInt       i,d,loc,mainbd = a->mainbd;
195:   PetscInt       n = A->n,m = A->m,*diag = a->diag,col;
196:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;

199:   VecGetArray(xx,&x);
200:   VecGetArray(yy,&y);
201:   /* forward solve the lower triangular part */
202:   for (i=0; i<m; i++) {
203:     sum = x[i];
204:     for (d=0; d<mainbd; d++) {
205:       loc = i - diag[d];
206:       if (loc >= 0) sum -= dv[d][i] * y[loc];
207:     }
208:     y[i] = sum;
209:   }
210:   /* backward solve the upper triangular part */
211:   for (i=m-1; i>=0; i--) {
212:     sum = y[i];
213:     for (d=mainbd+1; d<a->nd; d++) {
214:       col = i - diag[d];
215:       if (col < n) sum -= dv[d][i] * y[col];
216:     }
217:     y[i] = sum*dd[i];
218:   }
219:   VecRestoreArray(xx,&x);
220:   VecRestoreArray(yy,&y);
221:   PetscLogFlops(2*a->nz - A->n);
222:   return(0);
223: }

227: PetscErrorCode MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
228: {
229:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
230:   PetscInt       i,d,loc,mainbd = a->mainbd;
231:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
233:   PetscInt       m = A->m,*diag = a->diag,col;
234:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
235:   PetscScalar    w0,w1,sum0,sum1;

238:   VecGetArray(xx,&x);
239:   VecGetArray(yy,&y);
240:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

242:   /* forward solve the lower triangular part */
243:   if (mainbd != 0) {
244:     inb = 0;
245:     for (i=0; i<mblock; i++) {
246:       sum0 = sum1 = 0.0;
247:       for (d=0; d<mainbd; d++) {
248:         loc = 2*(i - diag[d]);
249:         if (loc >= 0) {
250:           dvt = &dv[d][4*i];
251:           w0 = y[loc]; w1 = y[loc+1];
252:           sum0 += dvt[0]*w0 + dvt[2]*w1;
253:           sum1 += dvt[1]*w0 + dvt[3]*w1;
254:         }
255:       }
256:       y[inb] -= sum0; y[inb+1] -= sum1;

258:       inb += 2;
259:     }
260:   }
261:   /* backward solve the upper triangular part */
262:   inb = 2*(mblock-1); inb2 = 2*inb;
263:   for (i=mblock-1; i>=0; i--) {
264:     sum0 = y[inb]; sum1 = y[inb+1];
265:     for (d=mainbd+1; d<a->nd; d++) {
266:       col = 2*(i - diag[d]);
267:       if (col < 2*nblock) {
268:         dvt = &dv[d][4*i];
269:         w0 = y[col]; w1 = y[col+1];
270:         sum0 -= dvt[0]*w0 + dvt[2]*w1;
271:         sum1 -= dvt[1]*w0 + dvt[3]*w1;
272:       }
273:     }
274:     dvt = dd+inb2;
275:     y[inb]   = dvt[0]*sum0 + dvt[2]*sum1;
276:     y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
277:     inb -= 2; inb2 -= 4;
278:   }
279:   VecRestoreArray(xx,&x);
280:   VecRestoreArray(yy,&y);
281:   PetscLogFlops(2*a->nz - A->n);
282:   return(0);
283: }

287: PetscErrorCode MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
288: {
289:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
290:   PetscInt       i,d,loc,mainbd = a->mainbd;
291:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
293:   PetscInt       m = A->m,*diag = a->diag,col;
294:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
295:   PetscScalar    w0,w1,w2,sum0,sum1,sum2;

298:   VecGetArray(xx,&x);
299:   VecGetArray(yy,&y);
300:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

302:   /* forward solve the lower triangular part */
303:   if (mainbd != 0) {
304:     inb = 0;
305:     for (i=0; i<mblock; i++) {
306:       sum0 = sum1 = sum2 = 0.0;
307:       for (d=0; d<mainbd; d++) {
308:         loc = 3*(i - diag[d]);
309:         if (loc >= 0) {
310:           dvt = &dv[d][9*i];
311:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
312:           sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
313:           sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
314:           sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
315:         }
316:       }
317:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
318:       inb += 3;
319:     }
320:   }
321:   /* backward solve the upper triangular part */
322:   inb = 3*(mblock-1); inb2 = 3*inb;
323:   for (i=mblock-1; i>=0; i--) {
324:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2];
325:     for (d=mainbd+1; d<a->nd; d++) {
326:       col = 3*(i - diag[d]);
327:       if (col < 3*nblock) {
328:         dvt = &dv[d][9*i];
329:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
330:         sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
331:         sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
332:         sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
333:       }
334:     }
335:     dvt = dd+inb2;
336:     y[inb]   = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
337:     y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
338:     y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
339:     inb -= 3; inb2 -= 9;
340:   }
341:   VecRestoreArray(xx,&x);
342:   VecRestoreArray(yy,&y);
343:   PetscLogFlops(2*a->nz - A->n);
344:   return(0);
345: }

349: PetscErrorCode MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
350: {
351:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
352:   PetscInt       i,d,loc,mainbd = a->mainbd;
353:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
355:   PetscInt       m = A->m,*diag = a->diag,col;
356:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
357:   PetscScalar    w0,w1,w2,w3,sum0,sum1,sum2,sum3;

360:   VecGetArray(xx,&x);
361:   VecGetArray(yy,&y);
362:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

364:   /* forward solve the lower triangular part */
365:   if (mainbd != 0) {
366:     inb = 0;
367:     for (i=0; i<mblock; i++) {
368:       sum0 = sum1 = sum2 = sum3 = 0.0;
369:       for (d=0; d<mainbd; d++) {
370:         loc = 4*(i - diag[d]);
371:         if (loc >= 0) {
372:           dvt = &dv[d][16*i];
373:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
374:           sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
375:           sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
376:           sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
377:           sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
378:         }
379:       }
380:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
381:       inb += 4;
382:     }
383:   }
384:   /* backward solve the upper triangular part */
385:   inb = 4*(mblock-1); inb2 = 4*inb;
386:   for (i=mblock-1; i>=0; i--) {
387:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2]; sum3 =  y[inb+3];
388:     for (d=mainbd+1; d<a->nd; d++) {
389:       col = 4*(i - diag[d]);
390:       if (col < 4*nblock) {
391:         dvt = &dv[d][16*i];
392:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
393:         sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
394:         sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
395:         sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
396:         sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
397:       }
398:     }
399:     dvt = dd+inb2;
400:     y[inb]   = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
401:     y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
402:     y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
403:     y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
404:     inb -= 4; inb2 -= 16;
405:   }
406:   VecRestoreArray(xx,&x);
407:   VecRestoreArray(yy,&y);
408:   PetscLogFlops(2*a->nz - A->n);
409:   return(0);
410: }

414: PetscErrorCode MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
415: {
416:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
417:   PetscInt       i,d,loc,mainbd = a->mainbd;
418:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
420:   PetscInt       m = A->m,*diag = a->diag,col;
421:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
422:   PetscScalar    w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;

425:   VecGetArray(xx,&x);
426:   VecGetArray(yy,&y);
427:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

429:   /* forward solve the lower triangular part */
430:   if (mainbd != 0) {
431:     inb = 0;
432:     for (i=0; i<mblock; i++) {
433:       sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
434:       for (d=0; d<mainbd; d++) {
435:         loc = 5*(i - diag[d]);
436:         if (loc >= 0) {
437:           dvt = &dv[d][25*i];
438:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
439:           sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
440:           sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
441:           sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
442:           sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
443:           sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
444:         }
445:       }
446:       y[inb]   -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
447:       y[inb+4] -= sum4;
448:       inb += 5;
449:     }
450:   }
451:   /* backward solve the upper triangular part */
452:   inb = 5*(mblock-1); inb2 = 5*inb;
453:   for (i=mblock-1; i>=0; i--) {
454:     sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
455:     for (d=mainbd+1; d<a->nd; d++) {
456:       col = 5*(i - diag[d]);
457:       if (col < 5*nblock) {
458:         dvt = &dv[d][25*i];
459:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
460:         sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
461:         sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
462:         sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
463:         sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
464:         sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
465:       }
466:     }
467:     dvt = dd+inb2;
468:     y[inb]   = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
469:                + dvt[20]*sum4;
470:     y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
471:                + dvt[21]*sum4;
472:     y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
473:                + dvt[22]*sum4;
474:     y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
475:                + dvt[23]*sum4;
476:     y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
477:                + dvt[24]*sum4;
478:     inb -= 5; inb2 -= 25;
479:   }
480:   VecRestoreArray(xx,&x);
481:   VecRestoreArray(yy,&y);
482:   PetscLogFlops(2*a->nz - A->n);
483:   return(0);
484: }

488: PetscErrorCode MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
489: {
490:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
491:   PetscInt       i,d,loc,mainbd = a->mainbd;
492:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
494:   PetscInt       bs = A->bs,m = A->m,*diag = a->diag,col,bs2 = bs*bs;
495:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
496:   PetscScalar    *work = a->solvework;

499:   VecGetArray(xx,&x);
500:   VecGetArray(yy,&y);
501:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

503:   /* forward solve the lower triangular part */
504:   if (mainbd != 0) {
505:     inb = 0;
506:     for (i=0; i<mblock; i++) {
507:       for (d=0; d<mainbd; d++) {
508:         loc = i - diag[d];
509:         if (loc >= 0) {
510:           Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
511:         }
512:       }
513:       inb += bs;
514:     }
515:   }
516:   /* backward solve the upper triangular part */
517:   inb = bs*(mblock-1); inb2 = inb*bs;
518:   for (i=mblock-1; i>=0; i--) {
519:     for (d=mainbd+1; d<a->nd; d++) {
520:       col = i - diag[d];
521:       if (col < nblock) {
522:         Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
523:       }
524:     }
525:     Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
526:     PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
527:     inb -= bs; inb2 -= bs2;
528:   }
529:   VecRestoreArray(xx,&x);
530:   VecRestoreArray(yy,&y);
531:   PetscLogFlops(2*a->nz - A->n);
532:   return(0);
533: }