Actual source code: sbaijfact.c

 2:  #include src/mat/impls/baij/seq/baij.h
 3:  #include src/mat/impls/sbaij/seq/sbaij.h
 4:  #include src/inline/ilu.h
 5:  #include include/petscis.h

  7: #if !defined(PETSC_USE_COMPLEX)
  8: /* 
  9:   input:
 10:    F -- numeric factor 
 11:   output:
 12:    nneg, nzero, npos: matrix inertia 
 13: */

 17: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos)
 18: {
 19:   Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
 20:   PetscScalar  *dd = fact_ptr->a;
 21:   PetscInt     mbs=fact_ptr->mbs,bs=F->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i;

 24:   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs);
 25:   nneig_tmp = 0; npos_tmp = 0;
 26:   for (i=0; i<mbs; i++){
 27:     if (PetscRealPart(dd[*fi]) > 0.0){
 28:       npos_tmp++;
 29:     } else if (PetscRealPart(dd[*fi]) < 0.0){
 30:       nneig_tmp++;
 31:     }
 32:     fi++;
 33:   }
 34:   if (nneig) *nneig = nneig_tmp;
 35:   if (npos)  *npos  = npos_tmp;
 36:   if (nzero) *nzero = mbs - nneig_tmp - npos_tmp;

 38:   return(0);
 39: }
 40: #endif /* !defined(PETSC_USE_COMPLEX) */

 42: /* Using Modified Sparse Row (MSR) storage.
 43: See page 85, "Iterative Methods ..." by Saad. */
 44: /*
 45:     Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 46: */
 47: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
 50: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
 51: {
 52:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
 54:   PetscInt       *rip,i,mbs = a->mbs,*ai,*aj;
 55:   PetscInt       *jutmp,bs = A->bs,bs2=a->bs2;
 56:   PetscInt       m,reallocs = 0,prow;
 57:   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
 58:   PetscInt       *il,ili,nextprow;
 59:   PetscReal      f = info->fill;
 60:   PetscTruth     perm_identity;

 63:   /* check whether perm is the identity mapping */
 64:   ISIdentity(perm,&perm_identity);

 66:   /* -- inplace factorization, i.e., use sbaij for *B -- */
 67:   if (perm_identity && bs==1 ){
 68:     if (!perm_identity) a->permute = PETSC_TRUE;
 69: 
 70:   ISGetIndices(perm,&rip);
 71: 
 72:   if (perm_identity){ /* without permutation */
 73:     ai = a->i; aj = a->j;
 74:   } else {            /* non-trivial permutation */
 75:     MatReorderingSeqSBAIJ(A,perm);
 76:     ai = a->inew; aj = a->jnew;
 77:   }
 78: 
 79:   /* initialization */
 80:   PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);
 81:   umax  = (PetscInt)(f*ai[mbs] + 1);
 82:   PetscMalloc(umax*sizeof(PetscInt),&ju);
 83:   iu[0] = 0;
 84:   juidx = 0; /* index for ju */
 85:   PetscMalloc((3*mbs+1)*sizeof(PetscInt),&jl); /* linked list for getting pivot row */
 86:   q     = jl + mbs;   /* linked list for col index of active row */
 87:   il    = q  + mbs;
 88:   for (i=0; i<mbs; i++){
 89:     jl[i] = mbs;
 90:     q[i]  = 0;
 91:     il[i] = 0;
 92:   }

 94:   /* for each row k */
 95:   for (k=0; k<mbs; k++){
 96:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
 97:     q[k] = mbs;
 98:     /* initialize nonzero structure of k-th row to row rip[k] of A */
 99:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
100:     jmax = ai[rip[k]+1];
101:     for (j=jmin; j<jmax; j++){
102:       vj = rip[aj[j]]; /* col. value */
103:       if(vj > k){
104:         qm = k;
105:         do {
106:           m  = qm; qm = q[m];
107:         } while(qm < vj);
108:         if (qm == vj) {
109:           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
110:         }
111:         nzk++;
112:         q[m]  = vj;
113:         q[vj] = qm;
114:       } /* if(vj > k) */
115:     } /* for (j=jmin; j<jmax; j++) */

117:     /* modify nonzero structure of k-th row by computing fill-in
118:        for each row i to be merged in */
119:     prow = k;
120:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
121: 
122:     while (prow < k){
123:       nextprow = jl[prow];
124: 
125:       /* merge row prow into k-th row */
126:       ili = il[prow];
127:       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
128:       jmax = iu[prow+1];
129:       qm = k;
130:       for (j=jmin; j<jmax; j++){
131:         vj = ju[j];
132:         do {
133:           m = qm; qm = q[m];
134:         } while (qm < vj);
135:         if (qm != vj){  /* a fill */
136:           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
137:         }
138:       } /* end of for (j=jmin; j<jmax; j++) */
139:       if (jmin < jmax){
140:         il[prow] = jmin;
141:         j = ju[jmin];
142:         jl[prow] = jl[j]; jl[j] = prow;  /* update jl */
143:       }
144:       prow = nextprow;
145:     }
146: 
147:     /* update il and jl */
148:     if (nzk > 0){
149:       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
150:       jl[k] = jl[i]; jl[i] = k;
151:       il[k] = iu[k] + 1;
152:     }
153:     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */

155:     /* allocate more space to ju if needed */
156:     if (iu[k+1] > umax) {
157:       /* estimate how much additional space we will need */
158:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
159:       /* just double the memory each time */
160:       maxadd = umax;
161:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
162:       umax += maxadd;

164:       /* allocate a longer ju */
165:       PetscMalloc(umax*sizeof(PetscInt),&jutmp);
166:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));
167:       PetscFree(ju);
168:       ju   = jutmp;
169:       reallocs++; /* count how many times we realloc */
170:     }

172:     /* save nonzero structure of k-th row in ju */
173:     ju[juidx++] = k; /* diag[k] */
174:     i = k;
175:     while (nzk --) {
176:       i           = q[i];
177:       ju[juidx++] = i;
178:     }
179:   }

181:   if (ai[mbs] != 0) {
182:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
183:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
184:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
185:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
186:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
187:   } else {
188:      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
189:   }

191:   ISRestoreIndices(perm,&rip);
192:   /* PetscFree(q); */
193:   PetscFree(jl);

195:   /* put together the new matrix */
196:   MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);
197:   MatSetType(*B,A->type_name);
198:   MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);

200:   /* PetscLogObjectParent(*B,iperm); */
201:   b = (Mat_SeqSBAIJ*)(*B)->data;
202:   PetscFree(b->imax);
203:   b->singlemalloc = PETSC_FALSE;
204:   /* the next line frees the default space generated by the Create() */
205:   PetscFree(b->a);
206:   PetscFree(b->ilen);
207:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
208:   b->j    = ju;
209:   b->i    = iu;
210:   b->diag = 0;
211:   b->ilen = 0;
212:   b->imax = 0;
213:   b->row  = perm;
214:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
215:   PetscObjectReference((PetscObject)perm);
216:   b->icol = perm;
217:   PetscObjectReference((PetscObject)perm);
218:   PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
219:   /* In b structure:  Free imax, ilen, old a, old j.  
220:      Allocate idnew, solve_work, new a, new j */
221:   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
222:   b->maxnz = b->nz = iu[mbs];
223: 
224:   (*B)->factor                 = FACTOR_CHOLESKY;
225:   (*B)->info.factor_mallocs    = reallocs;
226:   (*B)->info.fill_ratio_given  = f;
227:   if (ai[mbs] != 0) {
228:     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
229:   } else {
230:     (*B)->info.fill_ratio_needed = 0.0;
231:   }


234:   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
235:   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
236:   (*B)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
237:   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
238: 
239:   return(0);
240:   }
241:   /* -----------  end of new code --------------------*/


244:   if (!perm_identity) a->permute = PETSC_TRUE;
245: 
246:   ISGetIndices(perm,&rip);
247: 
248:   if (perm_identity){ /* without permutation */
249:     ai = a->i; aj = a->j;
250:   } else {            /* non-trivial permutation */
251:     MatReorderingSeqSBAIJ(A,perm);
252:     ai = a->inew; aj = a->jnew;
253:   }
254: 
255:   /* initialization */
256:   PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);
257:   umax  = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
258:   PetscMalloc(umax*sizeof(PetscInt),&ju);
259:   iu[0] = mbs+1;
260:   juidx = mbs + 1; /* index for ju */
261:   PetscMalloc(2*mbs*sizeof(PetscInt),&jl); /* linked list for pivot row */
262:   q     = jl + mbs;   /* linked list for col index */
263:   for (i=0; i<mbs; i++){
264:     jl[i] = mbs;
265:     q[i] = 0;
266:   }

268:   /* for each row k */
269:   for (k=0; k<mbs; k++){
270:     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
271:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
272:     q[k] = mbs;
273:     /* initialize nonzero structure of k-th row to row rip[k] of A */
274:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
275:     jmax = ai[rip[k]+1];
276:     for (j=jmin; j<jmax; j++){
277:       vj = rip[aj[j]]; /* col. value */
278:       if(vj > k){
279:         qm = k;
280:         do {
281:           m  = qm; qm = q[m];
282:         } while(qm < vj);
283:         if (qm == vj) {
284:           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
285:         }
286:         nzk++;
287:         q[m]  = vj;
288:         q[vj] = qm;
289:       } /* if(vj > k) */
290:     } /* for (j=jmin; j<jmax; j++) */

292:     /* modify nonzero structure of k-th row by computing fill-in
293:        for each row i to be merged in */
294:     prow = k;
295:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
296: 
297:     while (prow < k){
298:       /* merge row prow into k-th row */
299:       jmin = iu[prow] + 1; jmax = iu[prow+1];
300:       qm = k;
301:       for (j=jmin; j<jmax; j++){
302:         vj = ju[j];
303:         do {
304:           m = qm; qm = q[m];
305:         } while (qm < vj);
306:         if (qm != vj){
307:          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
308:         }
309:       }
310:       prow = jl[prow]; /* next pivot row */
311:     }
312: 
313:     /* add k to row list for first nonzero element in k-th row */
314:     if (nzk > 0){
315:       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
316:       jl[k] = jl[i]; jl[i] = k;
317:     }
318:     iu[k+1] = iu[k] + nzk;

320:     /* allocate more space to ju if needed */
321:     if (iu[k+1] > umax) {
322:       /* estimate how much additional space we will need */
323:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
324:       /* just double the memory each time */
325:       maxadd = umax;
326:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
327:       umax += maxadd;

329:       /* allocate a longer ju */
330:       PetscMalloc(umax*sizeof(PetscInt),&jutmp);
331:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));
332:       PetscFree(ju);
333:       ju   = jutmp;
334:       reallocs++; /* count how many times we realloc */
335:     }

337:     /* save nonzero structure of k-th row in ju */
338:     i=k;
339:     while (nzk --) {
340:       i           = q[i];
341:       ju[juidx++] = i;
342:     }
343:   }

345:   if (ai[mbs] != 0) {
346:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
347:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
348:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
349:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
350:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
351:   } else {
352:      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
353:   }

355:   ISRestoreIndices(perm,&rip);
356:   /* PetscFree(q); */
357:   PetscFree(jl);

359:   /* put together the new matrix */
360:   MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);
361:   MatSetType(*B,A->type_name);
362:   MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);

364:   /* PetscLogObjectParent(*B,iperm); */
365:   b = (Mat_SeqSBAIJ*)(*B)->data;
366:   PetscFree(b->imax);
367:   b->singlemalloc = PETSC_FALSE;
368:   /* the next line frees the default space generated by the Create() */
369:   PetscFree(b->a);
370:   PetscFree(b->ilen);
371:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
372:   b->j    = ju;
373:   b->i    = iu;
374:   b->diag = 0;
375:   b->ilen = 0;
376:   b->imax = 0;
377:   b->row  = perm;
378:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
379:   PetscObjectReference((PetscObject)perm);
380:   b->icol = perm;
381:   PetscObjectReference((PetscObject)perm);
382:   PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
383:   /* In b structure:  Free imax, ilen, old a, old j.  
384:      Allocate idnew, solve_work, new a, new j */
385:   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
386:   b->maxnz = b->nz = iu[mbs];
387: 
388:   (*B)->factor                 = FACTOR_CHOLESKY;
389:   (*B)->info.factor_mallocs    = reallocs;
390:   (*B)->info.fill_ratio_given  = f;
391:   if (ai[mbs] != 0) {
392:     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
393:   } else {
394:     (*B)->info.fill_ratio_needed = 0.0;
395:   }

397:   if (perm_identity){
398:     switch (bs) {
399:       case 1:
400:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
401:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
402:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
403:         break;
404:       case 2:
405:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
406:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
407:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
408:         break;
409:       case 3:
410:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
411:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
412:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
413:         break;
414:       case 4:
415:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
416:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
417:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
418:         break;
419:       case 5:
420:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
421:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
422:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
423:         break;
424:       case 6:
425:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
426:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
427:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
428:         break;
429:       case 7:
430:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
431:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
432:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
433:       break;
434:       default:
435:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
436:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
437:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
438:       break;
439:     }
440:   }

442:   return(0);
443: }


448: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
449: {
450:   Mat            C = *B;
451:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
452:   IS             perm = b->row;
454:   PetscInt       *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
455:   PetscInt       *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
456:   PetscInt       bs=A->bs,bs2 = a->bs2;
457:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
458:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
459:   MatScalar      *work;
460:   PetscInt       *pivots;

463:   /* initialization */
464:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
465:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
466:   PetscMalloc(2*mbs*sizeof(PetscInt),&il);
467:   jl   = il + mbs;
468:   for (i=0; i<mbs; i++) {
469:     jl[i] = mbs; il[0] = 0;
470:   }
471:   PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
472:   uik  = dk + bs2;
473:   work = uik + bs2;
474:   PetscMalloc(bs*sizeof(PetscInt),&pivots);
475: 
476:   ISGetIndices(perm,&perm_ptr);
477: 
478:   /* check permutation */
479:   if (!a->permute){
480:     ai = a->i; aj = a->j; aa = a->a;
481:   } else {
482:     ai   = a->inew; aj = a->jnew;
483:     PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
484:     PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
485:     PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
486:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));

488:     for (i=0; i<mbs; i++){
489:       jmin = ai[i]; jmax = ai[i+1];
490:       for (j=jmin; j<jmax; j++){
491:         while (a2anew[j] != j){
492:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
493:           for (k1=0; k1<bs2; k1++){
494:             dk[k1]       = aa[k*bs2+k1];
495:             aa[k*bs2+k1] = aa[j*bs2+k1];
496:             aa[j*bs2+k1] = dk[k1];
497:           }
498:         }
499:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
500:         if (i > aj[j]){
501:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
502:           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
503:           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
504:           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
505:             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
506:           }
507:         }
508:       }
509:     }
510:     PetscFree(a2anew);
511:   }
512: 
513:   /* for each row k */
514:   for (k = 0; k<mbs; k++){

516:     /*initialize k-th row with elements nonzero in row perm(k) of A */
517:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
518: 
519:     ap = aa + jmin*bs2;
520:     for (j = jmin; j < jmax; j++){
521:       vj = perm_ptr[aj[j]];         /* block col. index */
522:       rtmp_ptr = rtmp + vj*bs2;
523:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
524:     }

526:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
527:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
528:     i = jl[k]; /* first row to be added to k_th row  */

530:     while (i < k){
531:       nexti = jl[i]; /* next row to be added to k_th row */

533:       /* compute multiplier */
534:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

536:       /* uik = -inv(Di)*U_bar(i,k) */
537:       diag = ba + i*bs2;
538:       u    = ba + ili*bs2;
539:       PetscMemzero(uik,bs2*sizeof(MatScalar));
540:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
541: 
542:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
543:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
544: 
545:       /* update -U(i,k) */
546:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

548:       /* add multiple of row i to k-th row ... */
549:       jmin = ili + 1; jmax = bi[i+1];
550:       if (jmin < jmax){
551:         for (j=jmin; j<jmax; j++) {
552:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
553:           rtmp_ptr = rtmp + bj[j]*bs2;
554:           u = ba + j*bs2;
555:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
556:         }
557: 
558:         /* ... add i to row list for next nonzero entry */
559:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
560:         j     = bj[jmin];
561:         jl[i] = jl[j]; jl[j] = i; /* update jl */
562:       }
563:       i = nexti;
564:     }

566:     /* save nonzero entries in k-th row of U ... */

568:     /* invert diagonal block */
569:     diag = ba+k*bs2;
570:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
571:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
572: 
573:     jmin = bi[k]; jmax = bi[k+1];
574:     if (jmin < jmax) {
575:       for (j=jmin; j<jmax; j++){
576:          vj = bj[j];           /* block col. index of U */
577:          u   = ba + j*bs2;
578:          rtmp_ptr = rtmp + vj*bs2;
579:          for (k1=0; k1<bs2; k1++){
580:            *u++        = *rtmp_ptr;
581:            *rtmp_ptr++ = 0.0;
582:          }
583:       }
584: 
585:       /* ... add k to row list for first nonzero entry in k-th row */
586:       il[k] = jmin;
587:       i     = bj[jmin];
588:       jl[k] = jl[i]; jl[i] = k;
589:     }
590:   }

592:   PetscFree(rtmp);
593:   PetscFree(il);
594:   PetscFree(dk);
595:   PetscFree(pivots);
596:   if (a->permute){
597:     PetscFree(aa);
598:   }

600:   ISRestoreIndices(perm,&perm_ptr);
601:   C->factor       = FACTOR_CHOLESKY;
602:   C->assembled    = PETSC_TRUE;
603:   C->preallocated = PETSC_TRUE;
604:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
605:   return(0);
606: }

610: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
611: {
612:   Mat            C = *B;
613:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
615:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
616:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
617:   PetscInt       bs=A->bs,bs2 = a->bs2;
618:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
619:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
620:   MatScalar      *work;
621:   PetscInt       *pivots;

624:   /* initialization */
625: 
626:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
627:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
628:   PetscMalloc(2*mbs*sizeof(PetscInt),&il);
629:   jl   = il + mbs;
630:   for (i=0; i<mbs; i++) {
631:     jl[i] = mbs; il[0] = 0;
632:   }
633:   PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
634:   uik  = dk + bs2;
635:   work = uik + bs2;
636:   PetscMalloc(bs*sizeof(PetscInt),&pivots);
637: 
638:   ai = a->i; aj = a->j; aa = a->a;
639: 
640:   /* for each row k */
641:   for (k = 0; k<mbs; k++){

643:     /*initialize k-th row with elements nonzero in row k of A */
644:     jmin = ai[k]; jmax = ai[k+1];
645:     ap = aa + jmin*bs2;
646:     for (j = jmin; j < jmax; j++){
647:       vj = aj[j];         /* block col. index */
648:       rtmp_ptr = rtmp + vj*bs2;
649:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
650:     }

652:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
653:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
654:     i = jl[k]; /* first row to be added to k_th row  */

656:     while (i < k){
657:       nexti = jl[i]; /* next row to be added to k_th row */

659:       /* compute multiplier */
660:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

662:       /* uik = -inv(Di)*U_bar(i,k) */
663:       diag = ba + i*bs2;
664:       u    = ba + ili*bs2;
665:       PetscMemzero(uik,bs2*sizeof(MatScalar));
666:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
667: 
668:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
669:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
670: 
671:       /* update -U(i,k) */
672:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

674:       /* add multiple of row i to k-th row ... */
675:       jmin = ili + 1; jmax = bi[i+1];
676:       if (jmin < jmax){
677:         for (j=jmin; j<jmax; j++) {
678:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
679:           rtmp_ptr = rtmp + bj[j]*bs2;
680:           u = ba + j*bs2;
681:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
682:         }
683: 
684:         /* ... add i to row list for next nonzero entry */
685:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
686:         j     = bj[jmin];
687:         jl[i] = jl[j]; jl[j] = i; /* update jl */
688:       }
689:       i = nexti;
690:     }

692:     /* save nonzero entries in k-th row of U ... */

694:     /* invert diagonal block */
695:     diag = ba+k*bs2;
696:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
697:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
698: 
699:     jmin = bi[k]; jmax = bi[k+1];
700:     if (jmin < jmax) {
701:       for (j=jmin; j<jmax; j++){
702:          vj = bj[j];           /* block col. index of U */
703:          u   = ba + j*bs2;
704:          rtmp_ptr = rtmp + vj*bs2;
705:          for (k1=0; k1<bs2; k1++){
706:            *u++        = *rtmp_ptr;
707:            *rtmp_ptr++ = 0.0;
708:          }
709:       }
710: 
711:       /* ... add k to row list for first nonzero entry in k-th row */
712:       il[k] = jmin;
713:       i     = bj[jmin];
714:       jl[k] = jl[i]; jl[i] = k;
715:     }
716:   }

718:   PetscFree(rtmp);
719:   PetscFree(il);
720:   PetscFree(dk);
721:   PetscFree(pivots);

723:   C->factor    = FACTOR_CHOLESKY;
724:   C->assembled = PETSC_TRUE;
725:   C->preallocated = PETSC_TRUE;
726:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
727:   return(0);
728: }

730: /*
731:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
732:     Version for blocks 2 by 2.
733: */
736: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
737: {
738:   Mat            C = *B;
739:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
740:   IS             perm = b->row;
742:   PetscInt       *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
743:   PetscInt       *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
744:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
745:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;

748:   /* initialization */
749:   /* il and jl record the first nonzero element in each row of the accessing 
750:      window U(0:k, k:mbs-1).
751:      jl:    list of rows to be added to uneliminated rows 
752:             i>= k: jl(i) is the first row to be added to row i
753:             i<  k: jl(i) is the row following row i in some list of rows
754:             jl(i) = mbs indicates the end of a list                        
755:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
756:             row i of U */
757:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
758:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
759:   PetscMalloc(2*mbs*sizeof(PetscInt),&il);
760:   jl   = il + mbs;
761:   for (i=0; i<mbs; i++) {
762:     jl[i] = mbs; il[0] = 0;
763:   }
764:   PetscMalloc(8*sizeof(MatScalar),&dk);
765:   uik  = dk + 4;
766:   ISGetIndices(perm,&perm_ptr);

768:   /* check permutation */
769:   if (!a->permute){
770:     ai = a->i; aj = a->j; aa = a->a;
771:   } else {
772:     ai   = a->inew; aj = a->jnew;
773:     PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
774:     PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
775:     PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
776:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));

778:     for (i=0; i<mbs; i++){
779:       jmin = ai[i]; jmax = ai[i+1];
780:       for (j=jmin; j<jmax; j++){
781:         while (a2anew[j] != j){
782:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
783:           for (k1=0; k1<4; k1++){
784:             dk[k1]       = aa[k*4+k1];
785:             aa[k*4+k1] = aa[j*4+k1];
786:             aa[j*4+k1] = dk[k1];
787:           }
788:         }
789:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
790:         if (i > aj[j]){
791:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
792:           ap = aa + j*4;     /* ptr to the beginning of the block */
793:           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
794:           ap[1] = ap[2];
795:           ap[2] = dk[1];
796:         }
797:       }
798:     }
799:     PetscFree(a2anew);
800:   }

802:   /* for each row k */
803:   for (k = 0; k<mbs; k++){

805:     /*initialize k-th row with elements nonzero in row perm(k) of A */
806:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
807:     ap = aa + jmin*4;
808:     for (j = jmin; j < jmax; j++){
809:       vj = perm_ptr[aj[j]];         /* block col. index */
810:       rtmp_ptr = rtmp + vj*4;
811:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
812:     }

814:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
815:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
816:     i = jl[k]; /* first row to be added to k_th row  */

818:     while (i < k){
819:       nexti = jl[i]; /* next row to be added to k_th row */

821:       /* compute multiplier */
822:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

824:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
825:       diag = ba + i*4;
826:       u    = ba + ili*4;
827:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
828:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
829:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
830:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
831: 
832:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
833:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
834:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
835:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
836:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

838:       /* update -U(i,k): ba[ili] = uik */
839:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

841:       /* add multiple of row i to k-th row ... */
842:       jmin = ili + 1; jmax = bi[i+1];
843:       if (jmin < jmax){
844:         for (j=jmin; j<jmax; j++) {
845:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
846:           rtmp_ptr = rtmp + bj[j]*4;
847:           u = ba + j*4;
848:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
849:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
850:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
851:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
852:         }
853: 
854:         /* ... add i to row list for next nonzero entry */
855:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
856:         j     = bj[jmin];
857:         jl[i] = jl[j]; jl[j] = i; /* update jl */
858:       }
859:       i = nexti;
860:     }

862:     /* save nonzero entries in k-th row of U ... */

864:     /* invert diagonal block */
865:     diag = ba+k*4;
866:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
867:     Kernel_A_gets_inverse_A_2(diag);
868: 
869:     jmin = bi[k]; jmax = bi[k+1];
870:     if (jmin < jmax) {
871:       for (j=jmin; j<jmax; j++){
872:          vj = bj[j];           /* block col. index of U */
873:          u   = ba + j*4;
874:          rtmp_ptr = rtmp + vj*4;
875:          for (k1=0; k1<4; k1++){
876:            *u++        = *rtmp_ptr;
877:            *rtmp_ptr++ = 0.0;
878:          }
879:       }
880: 
881:       /* ... add k to row list for first nonzero entry in k-th row */
882:       il[k] = jmin;
883:       i     = bj[jmin];
884:       jl[k] = jl[i]; jl[i] = k;
885:     }
886:   }

888:   PetscFree(rtmp);
889:   PetscFree(il);
890:   PetscFree(dk);
891:   if (a->permute) {
892:     PetscFree(aa);
893:   }
894:   ISRestoreIndices(perm,&perm_ptr);
895:   C->factor    = FACTOR_CHOLESKY;
896:   C->assembled = PETSC_TRUE;
897:   C->preallocated = PETSC_TRUE;
898:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
899:   return(0);
900: }

902: /*
903:       Version for when blocks are 2 by 2 Using natural ordering
904: */
907: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
908: {
909:   Mat            C = *B;
910:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
912:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
913:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
914:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
915:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;

918:   /* initialization */
919:   /* il and jl record the first nonzero element in each row of the accessing 
920:      window U(0:k, k:mbs-1).
921:      jl:    list of rows to be added to uneliminated rows 
922:             i>= k: jl(i) is the first row to be added to row i
923:             i<  k: jl(i) is the row following row i in some list of rows
924:             jl(i) = mbs indicates the end of a list                        
925:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
926:             row i of U */
927:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
928:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
929:   PetscMalloc(2*mbs*sizeof(PetscInt),&il);
930:   jl   = il + mbs;
931:   for (i=0; i<mbs; i++) {
932:     jl[i] = mbs; il[0] = 0;
933:   }
934:   PetscMalloc(8*sizeof(MatScalar),&dk);
935:   uik  = dk + 4;
936: 
937:   ai = a->i; aj = a->j; aa = a->a;

939:   /* for each row k */
940:   for (k = 0; k<mbs; k++){

942:     /*initialize k-th row with elements nonzero in row k of A */
943:     jmin = ai[k]; jmax = ai[k+1];
944:     ap = aa + jmin*4;
945:     for (j = jmin; j < jmax; j++){
946:       vj = aj[j];         /* block col. index */
947:       rtmp_ptr = rtmp + vj*4;
948:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
949:     }
950: 
951:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
952:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
953:     i = jl[k]; /* first row to be added to k_th row  */

955:     while (i < k){
956:       nexti = jl[i]; /* next row to be added to k_th row */

958:       /* compute multiplier */
959:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

961:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
962:       diag = ba + i*4;
963:       u    = ba + ili*4;
964:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
965:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
966:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
967:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
968: 
969:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
970:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
971:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
972:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
973:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

975:       /* update -U(i,k): ba[ili] = uik */
976:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

978:       /* add multiple of row i to k-th row ... */
979:       jmin = ili + 1; jmax = bi[i+1];
980:       if (jmin < jmax){
981:         for (j=jmin; j<jmax; j++) {
982:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
983:           rtmp_ptr = rtmp + bj[j]*4;
984:           u = ba + j*4;
985:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
986:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
987:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
988:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
989:         }
990: 
991:         /* ... add i to row list for next nonzero entry */
992:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
993:         j     = bj[jmin];
994:         jl[i] = jl[j]; jl[j] = i; /* update jl */
995:       }
996:       i = nexti;
997:     }

999:     /* save nonzero entries in k-th row of U ... */

1001:     /* invert diagonal block */
1002:     diag = ba+k*4;
1003:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1004:     Kernel_A_gets_inverse_A_2(diag);
1005: 
1006:     jmin = bi[k]; jmax = bi[k+1];
1007:     if (jmin < jmax) {
1008:       for (j=jmin; j<jmax; j++){
1009:          vj = bj[j];           /* block col. index of U */
1010:          u   = ba + j*4;
1011:          rtmp_ptr = rtmp + vj*4;
1012:          for (k1=0; k1<4; k1++){
1013:            *u++        = *rtmp_ptr;
1014:            *rtmp_ptr++ = 0.0;
1015:          }
1016:       }
1017: 
1018:       /* ... add k to row list for first nonzero entry in k-th row */
1019:       il[k] = jmin;
1020:       i     = bj[jmin];
1021:       jl[k] = jl[i]; jl[i] = k;
1022:     }
1023:   }

1025:   PetscFree(rtmp);
1026:   PetscFree(il);
1027:   PetscFree(dk);

1029:   C->factor    = FACTOR_CHOLESKY;
1030:   C->assembled = PETSC_TRUE;
1031:   C->preallocated = PETSC_TRUE;
1032:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1033:   return(0);
1034: }

1036: /*
1037:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
1038:     Version for blocks are 1 by 1.
1039: */
1042: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
1043: {
1044:   Mat            C = *B;
1045:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1046:   IS             ip = b->row;
1048:   PetscInt       *rip,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1049:   PetscInt       *ai,*aj,*r;
1050:   PetscInt       k,jmin,jmax,*jl,*il,vj,nexti,ili;
1051:   MatScalar      *rtmp;
1052:   MatScalar      *ba = b->a,*aa,ak;
1053:   MatScalar      dk,uikdi;

1056:   ISGetIndices(ip,&rip);
1057:   if (!a->permute){
1058:     ai = a->i; aj = a->j; aa = a->a;
1059:   } else {
1060:     ai = a->inew; aj = a->jnew;
1061:     PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);
1062:     PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));
1063:     PetscMalloc(ai[mbs]*sizeof(PetscInt),&r);
1064:     ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(PetscInt));

1066:     jmin = ai[0]; jmax = ai[mbs];
1067:     for (j=jmin; j<jmax; j++){
1068:       while (r[j] != j){
1069:         k = r[j]; r[j] = r[k]; r[k] = k;
1070:         ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
1071:       }
1072:     }
1073:     PetscFree(r);
1074:   }
1075: 
1076:   /* initialization */
1077:   /* il and jl record the first nonzero element in each row of the accessing 
1078:      window U(0:k, k:mbs-1).
1079:      jl:    list of rows to be added to uneliminated rows 
1080:             i>= k: jl(i) is the first row to be added to row i
1081:             i<  k: jl(i) is the row following row i in some list of rows
1082:             jl(i) = mbs indicates the end of a list                        
1083:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
1084:             row i of U */
1085:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1086:   PetscMalloc(2*mbs*sizeof(PetscInt),&il);
1087:   jl   = il + mbs;
1088:   for (i=0; i<mbs; i++) {
1089:     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1090:   }

1092:   /* for each row k */
1093:   for (k = 0; k<mbs; k++){

1095:     /*initialize k-th row with elements nonzero in row perm(k) of A */
1096:     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1097: 
1098:     for (j = jmin; j < jmax; j++){
1099:       vj = rip[aj[j]];
1100:       rtmp[vj] = aa[j];
1101:     }

1103:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1104:     dk = rtmp[k];
1105:     i = jl[k]; /* first row to be added to k_th row  */

1107:     while (i < k){
1108:       nexti = jl[i]; /* next row to be added to k_th row */

1110:       /* compute multiplier, update D(k) and U(i,k) */
1111:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1112:       uikdi = - ba[ili]*ba[i];
1113:       dk += uikdi*ba[ili];
1114:       ba[ili] = uikdi; /* -U(i,k) */

1116:       /* add multiple of row i to k-th row ... */
1117:       jmin = ili + 1; jmax = bi[i+1];
1118:       if (jmin < jmax){
1119:         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1120:         /* ... add i to row list for next nonzero entry */
1121:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1122:         j     = bj[jmin];
1123:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1124:       }
1125:       i = nexti;
1126:     }

1128:     /* check for zero pivot and save diagoanl element */
1129:     if (dk == 0.0){
1130:       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1131:       /*
1132:     } else if (PetscRealPart(dk) < 0.0){
1133:       SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);  
1134:       */
1135:     }

1137:     /* save nonzero entries in k-th row of U ... */
1138:     ba[k] = 1.0/dk;
1139:     jmin = bi[k]; jmax = bi[k+1];
1140:     if (jmin < jmax) {
1141:       for (j=jmin; j<jmax; j++){
1142:          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1143:       }
1144:       /* ... add k to row list for first nonzero entry in k-th row */
1145:       il[k] = jmin;
1146:       i     = bj[jmin];
1147:       jl[k] = jl[i]; jl[i] = k;
1148:     }
1149:   }
1150: 
1151:   PetscFree(rtmp);
1152:   PetscFree(il);
1153:   if (a->permute){
1154:     PetscFree(aa);
1155:   }

1157:   ISRestoreIndices(ip,&rip);
1158:   C->factor    = FACTOR_CHOLESKY;
1159:   C->assembled = PETSC_TRUE;
1160:   C->preallocated = PETSC_TRUE;
1161:   PetscLogFlops(b->mbs);
1162:   return(0);
1163: }

1165: /*
1166:   Version for when blocks are 1 by 1 Using natural ordering
1167: */
1170: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
1171: {
1172:   Mat            C = *B;
1173:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1175:   PetscInt       i,j,mbs = a->mbs;
1176:   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1177:   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1178:   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1179:   PetscReal      damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1180:   PetscTruth     damp,chshift;
1181:   PetscInt       nshift=0;

1184:   /* initialization */
1185:   /* il and jl record the first nonzero element in each row of the accessing 
1186:      window U(0:k, k:mbs-1).
1187:      jl:    list of rows to be added to uneliminated rows 
1188:             i>= k: jl(i) is the first row to be added to row i
1189:             i<  k: jl(i) is the row following row i in some list of rows
1190:             jl(i) = mbs indicates the end of a list                        
1191:      il(i): points to the first nonzero element in U(i,k:mbs-1) 
1192:   */
1193:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1194:   PetscMalloc(2*mbs*sizeof(PetscInt),&il);
1195:   jl   = il + mbs;

1197:   shift_amount = 0;
1198:   do {
1199:     damp = PETSC_FALSE;
1200:     chshift = PETSC_FALSE;
1201:     for (i=0; i<mbs; i++) {
1202:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1203:     }

1205:     for (k = 0; k<mbs; k++){ /* row k */
1206:     /*initialize k-th row with elements nonzero in row perm(k) of A */
1207:       nz   = ai[k+1] - ai[k];
1208:       acol = aj + ai[k];
1209:       aval = aa + ai[k];
1210:       bval = ba + bi[k];
1211:       while (nz -- ){
1212:         rtmp[*acol++] = *aval++;
1213:         *bval++       = 0.0; /* for in-place factorization */
1214:       }
1215:       /* damp the diagonal of the matrix */
1216:       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1217: 
1218:       /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1219:       dk = rtmp[k];
1220:       i  = jl[k]; /* first row to be added to k_th row  */

1222:       while (i < k){
1223:         nexti = jl[i]; /* next row to be added to k_th row */
1224: 
1225:         /* compute multiplier, update D(k) and U(i,k) */
1226:         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1227:         uikdi = - ba[ili]*ba[bi[i]];
1228:         dk   += uikdi*ba[ili];
1229:         ba[ili] = uikdi; /* -U(i,k) */

1231:         /* add multiple of row i to k-th row ... */
1232:         jmin = ili + 1;
1233:         nz   = bi[i+1] - jmin;
1234:         if (nz > 0){
1235:           bcol = bj + jmin;
1236:           bval = ba + jmin;
1237:           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1238:           /* ... add i to row list for next nonzero entry */
1239:           il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1240:           j     = bj[jmin];
1241:           jl[i] = jl[j]; jl[j] = i; /* update jl */
1242:         }
1243:         i = nexti;
1244:       }

1246:       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1247:         /* calculate a shift that would make this row diagonally dominant */
1248:         PetscReal rs = PetscAbs(PetscRealPart(dk));
1249:         jmin      = bi[k]+1;
1250:         nz        = bi[k+1] - jmin;
1251:         if (nz){
1252:           bcol = bj + jmin;
1253:           bval = ba + jmin;
1254:           while (nz--){
1255:             rs += PetscAbsScalar(rtmp[*bcol++]);
1256:           }
1257:         }
1258:         /* if this shift is less than the previous, just up the previous
1259:            one by a bit */
1260:         shift_amount = PetscMax(rs,1.1*shift_amount);
1261:         chshift  = PETSC_TRUE;
1262:         /* Unlike in the ILU case there is no exit condition on nshift:
1263:            we increase the shift until it converges. There is no guarantee that
1264:            this algorithm converges faster or slower, or is better or worse
1265:            than the ILU algorithm. */
1266:         nshift++;
1267:         break;
1268:       }
1269:       if (PetscRealPart(dk) < zeropivot){
1270:         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1271:         if (damping > 0.0) {
1272:           if (ndamp) damping *= 2.0;
1273:           damp = PETSC_TRUE;
1274:           ndamp++;
1275:           break;
1276:         } else if (PetscAbsScalar(dk) < zeropivot){
1277:           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1278:         } else {
1279:           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %D of Cholesky factorization\n",PetscRealPart(dk),k);
1280:         }
1281:       }
1282: 
1283:       /* save nonzero entries in k-th row of U ... */
1284:       /* printf("%D, dk: %g, 1/dk: %g\n",k,dk,1/dk); */
1285:       ba[bi[k]] = 1.0/dk;
1286:       jmin      = bi[k]+1;
1287:       nz        = bi[k+1] - jmin;
1288:       if (nz){
1289:         bcol = bj + jmin;
1290:         bval = ba + jmin;
1291:         while (nz--){
1292:           *bval++       = rtmp[*bcol];
1293:           rtmp[*bcol++] = 0.0;
1294:         }
1295:         /* ... add k to row list for first nonzero entry in k-th row */
1296:         il[k] = jmin;
1297:         i     = bj[jmin];
1298:         jl[k] = jl[i]; jl[i] = k;
1299:       }
1300:     } /* end of for (k = 0; k<mbs; k++) */
1301:   } while (damp||chshift);
1302:   PetscFree(rtmp);
1303:   PetscFree(il);
1304: 
1305:   C->factor       = FACTOR_CHOLESKY;
1306:   C->assembled    = PETSC_TRUE;
1307:   C->preallocated = PETSC_TRUE;
1308:   PetscLogFlops(b->mbs);
1309:   if (ndamp) {
1310:     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %D damping value %g\n",ndamp,damping);
1311:   }
1312:  if (nshift) {
1313:     PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering diagonal shifted %D shifts\n",nshift);
1314:   }
1315: 
1316:   return(0);
1317: }

1321: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info)
1322: {
1324:   Mat            C;

1327:   MatCholeskyFactorSymbolic(A,perm,info,&C);
1328:   MatCholeskyFactorNumeric(A,&C);
1329:   MatHeaderCopy(A,C);
1330:   return(0);
1331: }