Actual source code: aijfact.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  3: #include <petscbt.h>
  4: #include <../src/mat/utils/freespace.h>

  6: /*
  7:       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix

  9:       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
 10: */
 11: #if 0
 12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
 13: {
 14:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
 15:   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
 16:   const PetscInt    *ai = a->i, *aj = a->j;
 17:   const PetscScalar *aa = a->a;
 18:   PetscBool         *done;
 19:   PetscReal          best, past = 0, future;

 21:   PetscFunctionBegin;
 22:   /* pick initial row */
 23:   best = -1;
 24:   for (i = 0; i < n; i++) {
 25:     future = 0.0;
 26:     for (j = ai[i]; j < ai[i + 1]; j++) {
 27:       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
 28:       else past = PetscAbsScalar(aa[j]);
 29:     }
 30:     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 31:     if (past / future > best) {
 32:       best    = past / future;
 33:       current = i;
 34:     }
 35:   }

 37:   PetscCall(PetscMalloc1(n, &done));
 38:   PetscCall(PetscArrayzero(done, n));
 39:   PetscCall(PetscMalloc1(n, &order));
 40:   order[0] = current;
 41:   for (i = 0; i < n - 1; i++) {
 42:     done[current] = PETSC_TRUE;
 43:     best          = -1;
 44:     /* loop over all neighbors of current pivot */
 45:     for (j = ai[current]; j < ai[current + 1]; j++) {
 46:       jj = aj[j];
 47:       if (done[jj]) continue;
 48:       /* loop over columns of potential next row computing weights for below and above diagonal */
 49:       past = future = 0.0;
 50:       for (k = ai[jj]; k < ai[jj + 1]; k++) {
 51:         kk = aj[k];
 52:         if (done[kk]) past += PetscAbsScalar(aa[k]);
 53:         else if (kk != jj) future += PetscAbsScalar(aa[k]);
 54:       }
 55:       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 56:       if (past / future > best) {
 57:         best       = past / future;
 58:         newcurrent = jj;
 59:       }
 60:     }
 61:     if (best == -1) { /* no neighbors to select from so select best of all that remain */
 62:       best = -1;
 63:       for (k = 0; k < n; k++) {
 64:         if (done[k]) continue;
 65:         future = 0.0;
 66:         past   = 0.0;
 67:         for (j = ai[k]; j < ai[k + 1]; j++) {
 68:           kk = aj[j];
 69:           if (done[kk]) past += PetscAbsScalar(aa[j]);
 70:           else if (kk != k) future += PetscAbsScalar(aa[j]);
 71:         }
 72:         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 73:         if (past / future > best) {
 74:           best       = past / future;
 75:           newcurrent = k;
 76:         }
 77:       }
 78:     }
 79:     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
 80:     current      = newcurrent;
 81:     order[i + 1] = current;
 82:   }
 83:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
 84:   *icol = *irow;
 85:   PetscCall(PetscObjectReference((PetscObject)*irow));
 86:   PetscCall(PetscFree(done));
 87:   PetscCall(PetscFree(order));
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }
 90: #endif

 92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
 93: {
 94:   PetscFunctionBegin;
 95:   *type = MATSOLVERPETSC;
 96:   PetscFunctionReturn(PETSC_SUCCESS);
 97: }

 99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101:   PetscInt n = A->rmap->n;

103:   PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105:   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107:     *B = NULL;
108:     PetscFunctionReturn(PETSC_SUCCESS);
109:   }
110: #endif

112:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113:   PetscCall(MatSetSizes(*B, n, n, n, n));
114:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115:     PetscCall(MatSetType(*B, MATSEQAIJ));

117:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

120:     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125:     PetscCall(MatSetType(*B, MATSEQSBAIJ));
126:     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));

128:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133:   (*B)->factortype = ftype;

135:   PetscCall(PetscFree((*B)->solvertype));
136:   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137:   (*B)->canuseordering = PETSC_TRUE;
138:   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139:   PetscFunctionReturn(PETSC_SUCCESS);
140: }

142: #if 0
143: // currently unused
144: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145: {
146:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147:   IS                 isicol;
148:   const PetscInt    *r, *ic;
149:   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150:   PetscInt          *bi, *bj, *ajtmp;
151:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152:   PetscReal          f;
153:   PetscInt           nlnk, *lnk, k, **bi_ptr;
154:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
155:   PetscBT            lnkbt;
156:   PetscBool          missing;

158:   PetscFunctionBegin;
159:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160:   PetscCall(MatMissingDiagonal(A, &missing, &i));
161:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

163:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164:   PetscCall(ISGetIndices(isrow, &r));
165:   PetscCall(ISGetIndices(isicol, &ic));

167:   /* get new row pointers */
168:   PetscCall(PetscMalloc1(n + 1, &bi));
169:   bi[0] = 0;

171:   /* bdiag is location of diagonal in factor */
172:   PetscCall(PetscMalloc1(n + 1, &bdiag));
173:   bdiag[0] = 0;

175:   /* linked list for storing column indices of the active row */
176:   nlnk = n + 1;
177:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

179:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

181:   /* initial FreeSpace size is f*(ai[n]+1) */
182:   f = info->fill;
183:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
184:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
185:   current_space = free_space;

187:   for (i = 0; i < n; i++) {
188:     /* copy previous fill into linked list */
189:     nzi   = 0;
190:     nnz   = ai[r[i] + 1] - ai[r[i]];
191:     ajtmp = aj + ai[r[i]];
192:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
193:     nzi += nlnk;

195:     /* add pivot rows into linked list */
196:     row = lnk[n];
197:     while (row < i) {
198:       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
199:       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
200:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
201:       nzi += nlnk;
202:       row = lnk[row];
203:     }
204:     bi[i + 1] = bi[i] + nzi;
205:     im[i]     = nzi;

207:     /* mark bdiag */
208:     nzbd = 0;
209:     nnz  = nzi;
210:     k    = lnk[n];
211:     while (nnz-- && k < i) {
212:       nzbd++;
213:       k = lnk[k];
214:     }
215:     bdiag[i] = bi[i] + nzbd;

217:     /* if free space is not available, make more free space */
218:     if (current_space->local_remaining < nzi) {
219:       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
220:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
221:       reallocs++;
222:     }

224:     /* copy data into free space, then initialize lnk */
225:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

227:     bi_ptr[i] = current_space->array;
228:     current_space->array += nzi;
229:     current_space->local_used += nzi;
230:     current_space->local_remaining -= nzi;
231:   }
232:   #if defined(PETSC_USE_INFO)
233:   if (ai[n] != 0) {
234:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
235:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
236:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
237:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
238:     PetscCall(PetscInfo(A, "for best performance.\n"));
239:   } else {
240:     PetscCall(PetscInfo(A, "Empty matrix\n"));
241:   }
242:   #endif

244:   PetscCall(ISRestoreIndices(isrow, &r));
245:   PetscCall(ISRestoreIndices(isicol, &ic));

247:   /* destroy list of free space and other temporary array(s) */
248:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
249:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
250:   PetscCall(PetscLLDestroy(lnk, lnkbt));
251:   PetscCall(PetscFree2(bi_ptr, im));

253:   /* put together the new matrix */
254:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
255:   b = (Mat_SeqAIJ *)(B)->data;

257:   b->free_a       = PETSC_TRUE;
258:   b->free_ij      = PETSC_TRUE;
259:   b->singlemalloc = PETSC_FALSE;

261:   PetscCall(PetscMalloc1(bi[n] + 1, &b->a));
262:   b->j    = bj;
263:   b->i    = bi;
264:   b->diag = bdiag;
265:   b->ilen = NULL;
266:   b->imax = NULL;
267:   b->row  = isrow;
268:   b->col  = iscol;
269:   PetscCall(PetscObjectReference((PetscObject)isrow));
270:   PetscCall(PetscObjectReference((PetscObject)iscol));
271:   b->icol = isicol;
272:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

274:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
275:   b->maxnz = b->nz = bi[n];

277:   (B)->factortype            = MAT_FACTOR_LU;
278:   (B)->info.factor_mallocs   = reallocs;
279:   (B)->info.fill_ratio_given = f;

281:   if (ai[n]) {
282:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
283:   } else {
284:     (B)->info.fill_ratio_needed = 0.0;
285:   }
286:   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
287:   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
288:   PetscFunctionReturn(PETSC_SUCCESS);
289: }
290: #endif

292: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
293: {
294:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
295:   IS                 isicol;
296:   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
297:   PetscInt           i, n = A->rmap->n;
298:   PetscInt          *bi, *bj;
299:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
300:   PetscReal          f;
301:   PetscInt           nlnk, *lnk, k, **bi_ptr;
302:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
303:   PetscBT            lnkbt;
304:   PetscBool          missing;

306:   PetscFunctionBegin;
307:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
308:   PetscCall(MatMissingDiagonal(A, &missing, &i));
309:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

311:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
312:   PetscCall(ISGetIndices(isrow, &r));
313:   PetscCall(ISGetIndices(isicol, &ic));

315:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316:   PetscCall(PetscMalloc1(n + 1, &bi));
317:   PetscCall(PetscMalloc1(n + 1, &bdiag));
318:   bi[0] = bdiag[0] = 0;

320:   /* linked list for storing column indices of the active row */
321:   nlnk = n + 1;
322:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

324:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

326:   /* initial FreeSpace size is f*(ai[n]+1) */
327:   f = info->fill;
328:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
329:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
330:   current_space = free_space;

332:   for (i = 0; i < n; i++) {
333:     /* copy previous fill into linked list */
334:     nzi   = 0;
335:     nnz   = ai[r[i] + 1] - ai[r[i]];
336:     ajtmp = aj + ai[r[i]];
337:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
338:     nzi += nlnk;

340:     /* add pivot rows into linked list */
341:     row = lnk[n];
342:     while (row < i) {
343:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
344:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
346:       nzi += nlnk;
347:       row = lnk[row];
348:     }
349:     bi[i + 1] = bi[i] + nzi;
350:     im[i]     = nzi;

352:     /* mark bdiag */
353:     nzbd = 0;
354:     nnz  = nzi;
355:     k    = lnk[n];
356:     while (nnz-- && k < i) {
357:       nzbd++;
358:       k = lnk[k];
359:     }
360:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

362:     /* if free space is not available, make more free space */
363:     if (current_space->local_remaining < nzi) {
364:       /* estimated additional space needed */
365:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
366:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
367:       reallocs++;
368:     }

370:     /* copy data into free space, then initialize lnk */
371:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

373:     bi_ptr[i] = current_space->array;
374:     current_space->array += nzi;
375:     current_space->local_used += nzi;
376:     current_space->local_remaining -= nzi;
377:   }

379:   PetscCall(ISRestoreIndices(isrow, &r));
380:   PetscCall(ISRestoreIndices(isicol, &ic));

382:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
383:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
384:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
385:   PetscCall(PetscLLDestroy(lnk, lnkbt));
386:   PetscCall(PetscFree2(bi_ptr, im));

388:   /* put together the new matrix */
389:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
390:   b = (Mat_SeqAIJ *)(B)->data;

392:   b->free_a       = PETSC_TRUE;
393:   b->free_ij      = PETSC_TRUE;
394:   b->singlemalloc = PETSC_FALSE;

396:   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));

398:   b->j    = bj;
399:   b->i    = bi;
400:   b->diag = bdiag;
401:   b->ilen = NULL;
402:   b->imax = NULL;
403:   b->row  = isrow;
404:   b->col  = iscol;
405:   PetscCall(PetscObjectReference((PetscObject)isrow));
406:   PetscCall(PetscObjectReference((PetscObject)iscol));
407:   b->icol = isicol;
408:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

410:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
411:   b->maxnz = b->nz = bdiag[0] + 1;

413:   B->factortype            = MAT_FACTOR_LU;
414:   B->info.factor_mallocs   = reallocs;
415:   B->info.fill_ratio_given = f;

417:   if (ai[n]) {
418:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
419:   } else {
420:     B->info.fill_ratio_needed = 0.0;
421:   }
422: #if defined(PETSC_USE_INFO)
423:   if (ai[n] != 0) {
424:     PetscReal af = B->info.fill_ratio_needed;
425:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
426:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
427:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
428:     PetscCall(PetscInfo(A, "for best performance.\n"));
429:   } else {
430:     PetscCall(PetscInfo(A, "Empty matrix\n"));
431:   }
432: #endif
433:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
434:   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
435:   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
436:   PetscFunctionReturn(PETSC_SUCCESS);
437: }

439: /*
440:     Trouble in factorization, should we dump the original matrix?
441: */
442: PetscErrorCode MatFactorDumpMatrix(Mat A)
443: {
444:   PetscBool flg = PETSC_FALSE;

446:   PetscFunctionBegin;
447:   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
448:   if (flg) {
449:     PetscViewer viewer;
450:     char        filename[PETSC_MAX_PATH_LEN];

452:     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
453:     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
454:     PetscCall(MatView(A, viewer));
455:     PetscCall(PetscViewerDestroy(&viewer));
456:   }
457:   PetscFunctionReturn(PETSC_SUCCESS);
458: }

460: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
461: {
462:   Mat              C = B;
463:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
464:   IS               isrow = b->row, isicol = b->icol;
465:   const PetscInt  *r, *ic, *ics;
466:   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
467:   PetscInt         i, j, k, nz, nzL, row, *pj;
468:   const PetscInt  *ajtmp, *bjtmp;
469:   MatScalar       *rtmp, *pc, multiplier, *pv;
470:   const MatScalar *aa = a->a, *v;
471:   PetscBool        row_identity, col_identity;
472:   FactorShiftCtx   sctx;
473:   const PetscInt  *ddiag;
474:   PetscReal        rs;
475:   MatScalar        d;

477:   PetscFunctionBegin;
478:   /* MatPivotSetUp(): initialize shift context sctx */
479:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

481:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
482:     ddiag          = a->diag;
483:     sctx.shift_top = info->zeropivot;
484:     for (i = 0; i < n; i++) {
485:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
486:       d  = (aa)[ddiag[i]];
487:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
488:       v  = aa + ai[i];
489:       nz = ai[i + 1] - ai[i];
490:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
491:       if (rs > sctx.shift_top) sctx.shift_top = rs;
492:     }
493:     sctx.shift_top *= 1.1;
494:     sctx.nshift_max = 5;
495:     sctx.shift_lo   = 0.;
496:     sctx.shift_hi   = 1.;
497:   }

499:   PetscCall(ISGetIndices(isrow, &r));
500:   PetscCall(ISGetIndices(isicol, &ic));
501:   PetscCall(PetscMalloc1(n + 1, &rtmp));
502:   ics = ic;

504:   do {
505:     sctx.newshift = PETSC_FALSE;
506:     for (i = 0; i < n; i++) {
507:       /* zero rtmp */
508:       /* L part */
509:       nz    = bi[i + 1] - bi[i];
510:       bjtmp = bj + bi[i];
511:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

513:       /* U part */
514:       nz    = bdiag[i] - bdiag[i + 1];
515:       bjtmp = bj + bdiag[i + 1] + 1;
516:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

518:       /* load in initial (unfactored row) */
519:       nz    = ai[r[i] + 1] - ai[r[i]];
520:       ajtmp = aj + ai[r[i]];
521:       v     = aa + ai[r[i]];
522:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
523:       /* ZeropivotApply() */
524:       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */

526:       /* elimination */
527:       bjtmp = bj + bi[i];
528:       row   = *bjtmp++;
529:       nzL   = bi[i + 1] - bi[i];
530:       for (k = 0; k < nzL; k++) {
531:         pc = rtmp + row;
532:         if (*pc != 0.0) {
533:           pv         = b->a + bdiag[row];
534:           multiplier = *pc * (*pv);
535:           *pc        = multiplier;

537:           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
538:           pv = b->a + bdiag[row + 1] + 1;
539:           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */

541:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
543:         }
544:         row = *bjtmp++;
545:       }

547:       /* finished row so stick it into b->a */
548:       rs = 0.0;
549:       /* L part */
550:       pv = b->a + bi[i];
551:       pj = b->j + bi[i];
552:       nz = bi[i + 1] - bi[i];
553:       for (j = 0; j < nz; j++) {
554:         pv[j] = rtmp[pj[j]];
555:         rs += PetscAbsScalar(pv[j]);
556:       }

558:       /* U part */
559:       pv = b->a + bdiag[i + 1] + 1;
560:       pj = b->j + bdiag[i + 1] + 1;
561:       nz = bdiag[i] - bdiag[i + 1] - 1;
562:       for (j = 0; j < nz; j++) {
563:         pv[j] = rtmp[pj[j]];
564:         rs += PetscAbsScalar(pv[j]);
565:       }

567:       sctx.rs = rs;
568:       sctx.pv = rtmp[i];
569:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
570:       if (sctx.newshift) break; /* break for-loop */
571:       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

573:       /* Mark diagonal and invert diagonal for simpler triangular solves */
574:       pv  = b->a + bdiag[i];
575:       *pv = 1.0 / rtmp[i];

577:     } /* endof for (i=0; i<n; i++) { */

579:     /* MatPivotRefine() */
580:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
581:       /*
582:        * if no shift in this attempt & shifting & started shifting & can refine,
583:        * then try lower shift
584:        */
585:       sctx.shift_hi       = sctx.shift_fraction;
586:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
587:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
588:       sctx.newshift       = PETSC_TRUE;
589:       sctx.nshift++;
590:     }
591:   } while (sctx.newshift);

593:   PetscCall(PetscFree(rtmp));
594:   PetscCall(ISRestoreIndices(isicol, &ic));
595:   PetscCall(ISRestoreIndices(isrow, &r));

597:   PetscCall(ISIdentity(isrow, &row_identity));
598:   PetscCall(ISIdentity(isicol, &col_identity));
599:   if (b->inode.size) {
600:     C->ops->solve = MatSolve_SeqAIJ_Inode;
601:   } else if (row_identity && col_identity) {
602:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
603:   } else {
604:     C->ops->solve = MatSolve_SeqAIJ;
605:   }
606:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
607:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
608:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
609:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
610:   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
611:   C->assembled              = PETSC_TRUE;
612:   C->preallocated           = PETSC_TRUE;

614:   PetscCall(PetscLogFlops(C->cmap->n));

616:   /* MatShiftView(A,info,&sctx) */
617:   if (sctx.nshift) {
618:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
619:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
620:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
621:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
622:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
623:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
624:     }
625:   }
626:   PetscFunctionReturn(PETSC_SUCCESS);
627: }

629: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
630: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
631: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);

633: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
634: {
635:   Mat              C = B;
636:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
637:   IS               isrow = b->row, isicol = b->icol;
638:   const PetscInt  *r, *ic, *ics;
639:   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
640:   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
641:   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
642:   MatScalar       *pv, *rtmp, *pc, multiplier, d;
643:   const MatScalar *v, *aa = a->a;
644:   PetscReal        rs = 0.0;
645:   FactorShiftCtx   sctx;
646:   const PetscInt  *ddiag;
647:   PetscBool        row_identity, col_identity;

649:   PetscFunctionBegin;
650:   /* MatPivotSetUp(): initialize shift context sctx */
651:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

653:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
654:     ddiag          = a->diag;
655:     sctx.shift_top = info->zeropivot;
656:     for (i = 0; i < n; i++) {
657:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
658:       d  = (aa)[ddiag[i]];
659:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
660:       v  = aa + ai[i];
661:       nz = ai[i + 1] - ai[i];
662:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
663:       if (rs > sctx.shift_top) sctx.shift_top = rs;
664:     }
665:     sctx.shift_top *= 1.1;
666:     sctx.nshift_max = 5;
667:     sctx.shift_lo   = 0.;
668:     sctx.shift_hi   = 1.;
669:   }

671:   PetscCall(ISGetIndices(isrow, &r));
672:   PetscCall(ISGetIndices(isicol, &ic));
673:   PetscCall(PetscMalloc1(n + 1, &rtmp));
674:   ics = ic;

676:   do {
677:     sctx.newshift = PETSC_FALSE;
678:     for (i = 0; i < n; i++) {
679:       nz    = bi[i + 1] - bi[i];
680:       bjtmp = bj + bi[i];
681:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

683:       /* load in initial (unfactored row) */
684:       nz    = ai[r[i] + 1] - ai[r[i]];
685:       ajtmp = aj + ai[r[i]];
686:       v     = aa + ai[r[i]];
687:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
688:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

690:       row = *bjtmp++;
691:       while (row < i) {
692:         pc = rtmp + row;
693:         if (*pc != 0.0) {
694:           pv         = b->a + diag_offset[row];
695:           pj         = b->j + diag_offset[row] + 1;
696:           multiplier = *pc / *pv++;
697:           *pc        = multiplier;
698:           nz         = bi[row + 1] - diag_offset[row] - 1;
699:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
700:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
701:         }
702:         row = *bjtmp++;
703:       }
704:       /* finished row so stick it into b->a */
705:       pv   = b->a + bi[i];
706:       pj   = b->j + bi[i];
707:       nz   = bi[i + 1] - bi[i];
708:       diag = diag_offset[i] - bi[i];
709:       rs   = 0.0;
710:       for (j = 0; j < nz; j++) {
711:         pv[j] = rtmp[pj[j]];
712:         rs += PetscAbsScalar(pv[j]);
713:       }
714:       rs -= PetscAbsScalar(pv[diag]);

716:       sctx.rs = rs;
717:       sctx.pv = pv[diag];
718:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
719:       if (sctx.newshift) break;
720:       pv[diag] = sctx.pv;
721:     }

723:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
724:       /*
725:        * if no shift in this attempt & shifting & started shifting & can refine,
726:        * then try lower shift
727:        */
728:       sctx.shift_hi       = sctx.shift_fraction;
729:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
730:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
731:       sctx.newshift       = PETSC_TRUE;
732:       sctx.nshift++;
733:     }
734:   } while (sctx.newshift);

736:   /* invert diagonal entries for simpler triangular solves */
737:   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
738:   PetscCall(PetscFree(rtmp));
739:   PetscCall(ISRestoreIndices(isicol, &ic));
740:   PetscCall(ISRestoreIndices(isrow, &r));

742:   PetscCall(ISIdentity(isrow, &row_identity));
743:   PetscCall(ISIdentity(isicol, &col_identity));
744:   if (row_identity && col_identity) {
745:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
746:   } else {
747:     C->ops->solve = MatSolve_SeqAIJ_inplace;
748:   }
749:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
750:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
751:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
752:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
753:   C->ops->matsolvetranspose = NULL;

755:   C->assembled    = PETSC_TRUE;
756:   C->preallocated = PETSC_TRUE;

758:   PetscCall(PetscLogFlops(C->cmap->n));
759:   if (sctx.nshift) {
760:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
761:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
762:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
763:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
764:     }
765:   }
766:   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
767:   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

769:   PetscCall(MatSeqAIJCheckInode(C));
770:   PetscFunctionReturn(PETSC_SUCCESS);
771: }

773: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);

775: /*
776:    This routine implements inplace ILU(0) with row or/and column permutations.
777:    Input:
778:      A - original matrix
779:    Output;
780:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
781:          a->j (col index) is permuted by the inverse of colperm, then sorted
782:          a->a reordered accordingly with a->j
783:          a->diag (ptr to diagonal elements) is updated.
784: */
785: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
786: {
787:   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
788:   IS               isrow = a->row, isicol = a->icol;
789:   const PetscInt  *r, *ic, *ics;
790:   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
791:   PetscInt        *ajtmp, nz, row;
792:   PetscInt        *diag = a->diag, nbdiag, *pj;
793:   PetscScalar     *rtmp, *pc, multiplier, d;
794:   MatScalar       *pv, *v;
795:   PetscReal        rs;
796:   FactorShiftCtx   sctx;
797:   const MatScalar *aa = a->a, *vtmp;

799:   PetscFunctionBegin;
800:   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");

802:   /* MatPivotSetUp(): initialize shift context sctx */
803:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

805:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
806:     const PetscInt *ddiag = a->diag;
807:     sctx.shift_top        = info->zeropivot;
808:     for (i = 0; i < n; i++) {
809:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
810:       d    = (aa)[ddiag[i]];
811:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
812:       vtmp = aa + ai[i];
813:       nz   = ai[i + 1] - ai[i];
814:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
815:       if (rs > sctx.shift_top) sctx.shift_top = rs;
816:     }
817:     sctx.shift_top *= 1.1;
818:     sctx.nshift_max = 5;
819:     sctx.shift_lo   = 0.;
820:     sctx.shift_hi   = 1.;
821:   }

823:   PetscCall(ISGetIndices(isrow, &r));
824:   PetscCall(ISGetIndices(isicol, &ic));
825:   PetscCall(PetscMalloc1(n + 1, &rtmp));
826:   PetscCall(PetscArrayzero(rtmp, n + 1));
827:   ics = ic;

829: #if defined(MV)
830:   sctx.shift_top      = 0.;
831:   sctx.nshift_max     = 0;
832:   sctx.shift_lo       = 0.;
833:   sctx.shift_hi       = 0.;
834:   sctx.shift_fraction = 0.;

836:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
837:     sctx.shift_top = 0.;
838:     for (i = 0; i < n; i++) {
839:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
840:       d  = (a->a)[diag[i]];
841:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
842:       v  = a->a + ai[i];
843:       nz = ai[i + 1] - ai[i];
844:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
845:       if (rs > sctx.shift_top) sctx.shift_top = rs;
846:     }
847:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
848:     sctx.shift_top *= 1.1;
849:     sctx.nshift_max = 5;
850:     sctx.shift_lo   = 0.;
851:     sctx.shift_hi   = 1.;
852:   }

854:   sctx.shift_amount = 0.;
855:   sctx.nshift       = 0;
856: #endif

858:   do {
859:     sctx.newshift = PETSC_FALSE;
860:     for (i = 0; i < n; i++) {
861:       /* load in initial unfactored row */
862:       nz    = ai[r[i] + 1] - ai[r[i]];
863:       ajtmp = aj + ai[r[i]];
864:       v     = a->a + ai[r[i]];
865:       /* sort permuted ajtmp and values v accordingly */
866:       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
867:       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));

869:       diag[r[i]] = ai[r[i]];
870:       for (j = 0; j < nz; j++) {
871:         rtmp[ajtmp[j]] = v[j];
872:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
873:       }
874:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

876:       row = *ajtmp++;
877:       while (row < i) {
878:         pc = rtmp + row;
879:         if (*pc != 0.0) {
880:           pv = a->a + diag[r[row]];
881:           pj = aj + diag[r[row]] + 1;

883:           multiplier = *pc / *pv++;
884:           *pc        = multiplier;
885:           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
886:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
887:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
888:         }
889:         row = *ajtmp++;
890:       }
891:       /* finished row so overwrite it onto a->a */
892:       pv     = a->a + ai[r[i]];
893:       pj     = aj + ai[r[i]];
894:       nz     = ai[r[i] + 1] - ai[r[i]];
895:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

897:       rs = 0.0;
898:       for (j = 0; j < nz; j++) {
899:         pv[j] = rtmp[pj[j]];
900:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
901:       }

903:       sctx.rs = rs;
904:       sctx.pv = pv[nbdiag];
905:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
906:       if (sctx.newshift) break;
907:       pv[nbdiag] = sctx.pv;
908:     }

910:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
911:       /*
912:        * if no shift in this attempt & shifting & started shifting & can refine,
913:        * then try lower shift
914:        */
915:       sctx.shift_hi       = sctx.shift_fraction;
916:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
917:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
918:       sctx.newshift       = PETSC_TRUE;
919:       sctx.nshift++;
920:     }
921:   } while (sctx.newshift);

923:   /* invert diagonal entries for simpler triangular solves */
924:   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];

926:   PetscCall(PetscFree(rtmp));
927:   PetscCall(ISRestoreIndices(isicol, &ic));
928:   PetscCall(ISRestoreIndices(isrow, &r));

930:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
931:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
932:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
933:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

935:   A->assembled    = PETSC_TRUE;
936:   A->preallocated = PETSC_TRUE;

938:   PetscCall(PetscLogFlops(A->cmap->n));
939:   if (sctx.nshift) {
940:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
941:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
942:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
943:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
944:     }
945:   }
946:   PetscFunctionReturn(PETSC_SUCCESS);
947: }

949: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
950: {
951:   Mat C;

953:   PetscFunctionBegin;
954:   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
955:   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
956:   PetscCall(MatLUFactorNumeric(C, A, info));

958:   A->ops->solve          = C->ops->solve;
959:   A->ops->solvetranspose = C->ops->solvetranspose;

961:   PetscCall(MatHeaderMerge(A, &C));
962:   PetscFunctionReturn(PETSC_SUCCESS);
963: }

965: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
966: {
967:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
968:   IS                 iscol = a->col, isrow = a->row;
969:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
970:   PetscInt           nz;
971:   const PetscInt    *rout, *cout, *r, *c;
972:   PetscScalar       *x, *tmp, *tmps, sum;
973:   const PetscScalar *b;
974:   const MatScalar   *aa = a->a, *v;

976:   PetscFunctionBegin;
977:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

979:   PetscCall(VecGetArrayRead(bb, &b));
980:   PetscCall(VecGetArrayWrite(xx, &x));
981:   tmp = a->solve_work;

983:   PetscCall(ISGetIndices(isrow, &rout));
984:   r = rout;
985:   PetscCall(ISGetIndices(iscol, &cout));
986:   c = cout + (n - 1);

988:   /* forward solve the lower triangular */
989:   tmp[0] = b[*r++];
990:   tmps   = tmp;
991:   for (i = 1; i < n; i++) {
992:     v   = aa + ai[i];
993:     vi  = aj + ai[i];
994:     nz  = a->diag[i] - ai[i];
995:     sum = b[*r++];
996:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
997:     tmp[i] = sum;
998:   }

1000:   /* backward solve the upper triangular */
1001:   for (i = n - 1; i >= 0; i--) {
1002:     v   = aa + a->diag[i] + 1;
1003:     vi  = aj + a->diag[i] + 1;
1004:     nz  = ai[i + 1] - a->diag[i] - 1;
1005:     sum = tmp[i];
1006:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1007:     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1008:   }

1010:   PetscCall(ISRestoreIndices(isrow, &rout));
1011:   PetscCall(ISRestoreIndices(iscol, &cout));
1012:   PetscCall(VecRestoreArrayRead(bb, &b));
1013:   PetscCall(VecRestoreArrayWrite(xx, &x));
1014:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1015:   PetscFunctionReturn(PETSC_SUCCESS);
1016: }

1018: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1019: {
1020:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1021:   IS                 iscol = a->col, isrow = a->row;
1022:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1023:   PetscInt           nz, neq, ldb, ldx;
1024:   const PetscInt    *rout, *cout, *r, *c;
1025:   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1026:   const PetscScalar *b, *aa  = a->a, *v;
1027:   PetscBool          isdense;

1029:   PetscFunctionBegin;
1030:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1031:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1032:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1033:   if (X != B) {
1034:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1035:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1036:   }
1037:   PetscCall(MatDenseGetArrayRead(B, &b));
1038:   PetscCall(MatDenseGetLDA(B, &ldb));
1039:   PetscCall(MatDenseGetArray(X, &x));
1040:   PetscCall(MatDenseGetLDA(X, &ldx));
1041:   PetscCall(ISGetIndices(isrow, &rout));
1042:   r = rout;
1043:   PetscCall(ISGetIndices(iscol, &cout));
1044:   c = cout;
1045:   for (neq = 0; neq < B->cmap->n; neq++) {
1046:     /* forward solve the lower triangular */
1047:     tmp[0] = b[r[0]];
1048:     tmps   = tmp;
1049:     for (i = 1; i < n; i++) {
1050:       v   = aa + ai[i];
1051:       vi  = aj + ai[i];
1052:       nz  = a->diag[i] - ai[i];
1053:       sum = b[r[i]];
1054:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1055:       tmp[i] = sum;
1056:     }
1057:     /* backward solve the upper triangular */
1058:     for (i = n - 1; i >= 0; i--) {
1059:       v   = aa + a->diag[i] + 1;
1060:       vi  = aj + a->diag[i] + 1;
1061:       nz  = ai[i + 1] - a->diag[i] - 1;
1062:       sum = tmp[i];
1063:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1064:       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1065:     }
1066:     b += ldb;
1067:     x += ldx;
1068:   }
1069:   PetscCall(ISRestoreIndices(isrow, &rout));
1070:   PetscCall(ISRestoreIndices(iscol, &cout));
1071:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1072:   PetscCall(MatDenseRestoreArray(X, &x));
1073:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1074:   PetscFunctionReturn(PETSC_SUCCESS);
1075: }

1077: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1078: {
1079:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1080:   IS                 iscol = a->col, isrow = a->row;
1081:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1082:   PetscInt           nz, neq, ldb, ldx;
1083:   const PetscInt    *rout, *cout, *r, *c;
1084:   PetscScalar       *x, *tmp = a->solve_work, sum;
1085:   const PetscScalar *b, *aa  = a->a, *v;
1086:   PetscBool          isdense;

1088:   PetscFunctionBegin;
1089:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1090:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1091:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1092:   if (X != B) {
1093:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1094:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1095:   }
1096:   PetscCall(MatDenseGetArrayRead(B, &b));
1097:   PetscCall(MatDenseGetLDA(B, &ldb));
1098:   PetscCall(MatDenseGetArray(X, &x));
1099:   PetscCall(MatDenseGetLDA(X, &ldx));
1100:   PetscCall(ISGetIndices(isrow, &rout));
1101:   r = rout;
1102:   PetscCall(ISGetIndices(iscol, &cout));
1103:   c = cout;
1104:   for (neq = 0; neq < B->cmap->n; neq++) {
1105:     /* forward solve the lower triangular */
1106:     tmp[0] = b[r[0]];
1107:     v      = aa;
1108:     vi     = aj;
1109:     for (i = 1; i < n; i++) {
1110:       nz  = ai[i + 1] - ai[i];
1111:       sum = b[r[i]];
1112:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1113:       tmp[i] = sum;
1114:       v += nz;
1115:       vi += nz;
1116:     }
1117:     /* backward solve the upper triangular */
1118:     for (i = n - 1; i >= 0; i--) {
1119:       v   = aa + adiag[i + 1] + 1;
1120:       vi  = aj + adiag[i + 1] + 1;
1121:       nz  = adiag[i] - adiag[i + 1] - 1;
1122:       sum = tmp[i];
1123:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1124:       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1125:     }
1126:     b += ldb;
1127:     x += ldx;
1128:   }
1129:   PetscCall(ISRestoreIndices(isrow, &rout));
1130:   PetscCall(ISRestoreIndices(iscol, &cout));
1131:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1132:   PetscCall(MatDenseRestoreArray(X, &x));
1133:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1134:   PetscFunctionReturn(PETSC_SUCCESS);
1135: }

1137: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1138: {
1139:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1140:   IS                 iscol = a->col, isrow = a->row;
1141:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1142:   PetscInt           nz, neq, ldb, ldx;
1143:   const PetscInt    *rout, *cout, *r, *c;
1144:   PetscScalar       *x, *tmp = a->solve_work, s1;
1145:   const PetscScalar *b, *aa  = a->a, *v;
1146:   PetscBool          isdense;

1148:   PetscFunctionBegin;
1149:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1150:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1151:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1152:   if (X != B) {
1153:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1154:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1155:   }
1156:   PetscCall(MatDenseGetArrayRead(B, &b));
1157:   PetscCall(MatDenseGetLDA(B, &ldb));
1158:   PetscCall(MatDenseGetArray(X, &x));
1159:   PetscCall(MatDenseGetLDA(X, &ldx));
1160:   tmp = a->solve_work;
1161:   PetscCall(ISGetIndices(isrow, &rout));
1162:   r = rout;
1163:   PetscCall(ISGetIndices(iscol, &cout));
1164:   c = cout;
1165:   for (neq = 0; neq < B->cmap->n; neq++) {
1166:     /* copy the b into temp work space according to permutation */
1167:     for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1169:     /* forward solve the U^T */
1170:     for (i = 0; i < n; i++) {
1171:       v  = aa + adiag[i + 1] + 1;
1172:       vi = aj + adiag[i + 1] + 1;
1173:       nz = adiag[i] - adiag[i + 1] - 1;
1174:       s1 = tmp[i];
1175:       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1176:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1177:       tmp[i] = s1;
1178:     }

1180:     /* backward solve the L^T */
1181:     for (i = n - 1; i >= 0; i--) {
1182:       v  = aa + ai[i];
1183:       vi = aj + ai[i];
1184:       nz = ai[i + 1] - ai[i];
1185:       s1 = tmp[i];
1186:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1187:     }

1189:     /* copy tmp into x according to permutation */
1190:     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1191:     b += ldb;
1192:     x += ldx;
1193:   }
1194:   PetscCall(ISRestoreIndices(isrow, &rout));
1195:   PetscCall(ISRestoreIndices(iscol, &cout));
1196:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1197:   PetscCall(MatDenseRestoreArray(X, &x));
1198:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1199:   PetscFunctionReturn(PETSC_SUCCESS);
1200: }

1202: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1203: {
1204:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1205:   IS                 iscol = a->col, isrow = a->row;
1206:   const PetscInt    *r, *c, *rout, *cout;
1207:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1208:   PetscInt           nz, row;
1209:   PetscScalar       *x, *tmp, *tmps, sum;
1210:   const PetscScalar *b;
1211:   const MatScalar   *aa = a->a, *v;

1213:   PetscFunctionBegin;
1214:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1216:   PetscCall(VecGetArrayRead(bb, &b));
1217:   PetscCall(VecGetArrayWrite(xx, &x));
1218:   tmp = a->solve_work;

1220:   PetscCall(ISGetIndices(isrow, &rout));
1221:   r = rout;
1222:   PetscCall(ISGetIndices(iscol, &cout));
1223:   c = cout + (n - 1);

1225:   /* forward solve the lower triangular */
1226:   tmp[0] = b[*r++];
1227:   tmps   = tmp;
1228:   for (row = 1; row < n; row++) {
1229:     i   = rout[row]; /* permuted row */
1230:     v   = aa + ai[i];
1231:     vi  = aj + ai[i];
1232:     nz  = a->diag[i] - ai[i];
1233:     sum = b[*r++];
1234:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1235:     tmp[row] = sum;
1236:   }

1238:   /* backward solve the upper triangular */
1239:   for (row = n - 1; row >= 0; row--) {
1240:     i   = rout[row]; /* permuted row */
1241:     v   = aa + a->diag[i] + 1;
1242:     vi  = aj + a->diag[i] + 1;
1243:     nz  = ai[i + 1] - a->diag[i] - 1;
1244:     sum = tmp[row];
1245:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1246:     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1247:   }

1249:   PetscCall(ISRestoreIndices(isrow, &rout));
1250:   PetscCall(ISRestoreIndices(iscol, &cout));
1251:   PetscCall(VecRestoreArrayRead(bb, &b));
1252:   PetscCall(VecRestoreArrayWrite(xx, &x));
1253:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1254:   PetscFunctionReturn(PETSC_SUCCESS);
1255: }

1257: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1258: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1259: {
1260:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1261:   PetscInt           n  = A->rmap->n;
1262:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1263:   PetscScalar       *x;
1264:   const PetscScalar *b;
1265:   const MatScalar   *aa = a->a;
1266: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1267:   PetscInt         adiag_i, i, nz, ai_i;
1268:   const PetscInt  *vi;
1269:   const MatScalar *v;
1270:   PetscScalar      sum;
1271: #endif

1273:   PetscFunctionBegin;
1274:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1276:   PetscCall(VecGetArrayRead(bb, &b));
1277:   PetscCall(VecGetArrayWrite(xx, &x));

1279: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1280:   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1281: #else
1282:   /* forward solve the lower triangular */
1283:   x[0] = b[0];
1284:   for (i = 1; i < n; i++) {
1285:     ai_i = ai[i];
1286:     v    = aa + ai_i;
1287:     vi   = aj + ai_i;
1288:     nz   = adiag[i] - ai_i;
1289:     sum  = b[i];
1290:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1291:     x[i] = sum;
1292:   }

1294:   /* backward solve the upper triangular */
1295:   for (i = n - 1; i >= 0; i--) {
1296:     adiag_i = adiag[i];
1297:     v       = aa + adiag_i + 1;
1298:     vi      = aj + adiag_i + 1;
1299:     nz      = ai[i + 1] - adiag_i - 1;
1300:     sum     = x[i];
1301:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1302:     x[i] = sum * aa[adiag_i];
1303:   }
1304: #endif
1305:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1306:   PetscCall(VecRestoreArrayRead(bb, &b));
1307:   PetscCall(VecRestoreArrayWrite(xx, &x));
1308:   PetscFunctionReturn(PETSC_SUCCESS);
1309: }

1311: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1312: {
1313:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1314:   IS                 iscol = a->col, isrow = a->row;
1315:   PetscInt           i, n                  = A->rmap->n, j;
1316:   PetscInt           nz;
1317:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1318:   PetscScalar       *x, *tmp, sum;
1319:   const PetscScalar *b;
1320:   const MatScalar   *aa = a->a, *v;

1322:   PetscFunctionBegin;
1323:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1325:   PetscCall(VecGetArrayRead(bb, &b));
1326:   PetscCall(VecGetArray(xx, &x));
1327:   tmp = a->solve_work;

1329:   PetscCall(ISGetIndices(isrow, &rout));
1330:   r = rout;
1331:   PetscCall(ISGetIndices(iscol, &cout));
1332:   c = cout + (n - 1);

1334:   /* forward solve the lower triangular */
1335:   tmp[0] = b[*r++];
1336:   for (i = 1; i < n; i++) {
1337:     v   = aa + ai[i];
1338:     vi  = aj + ai[i];
1339:     nz  = a->diag[i] - ai[i];
1340:     sum = b[*r++];
1341:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1342:     tmp[i] = sum;
1343:   }

1345:   /* backward solve the upper triangular */
1346:   for (i = n - 1; i >= 0; i--) {
1347:     v   = aa + a->diag[i] + 1;
1348:     vi  = aj + a->diag[i] + 1;
1349:     nz  = ai[i + 1] - a->diag[i] - 1;
1350:     sum = tmp[i];
1351:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1352:     tmp[i] = sum * aa[a->diag[i]];
1353:     x[*c--] += tmp[i];
1354:   }

1356:   PetscCall(ISRestoreIndices(isrow, &rout));
1357:   PetscCall(ISRestoreIndices(iscol, &cout));
1358:   PetscCall(VecRestoreArrayRead(bb, &b));
1359:   PetscCall(VecRestoreArray(xx, &x));
1360:   PetscCall(PetscLogFlops(2.0 * a->nz));
1361:   PetscFunctionReturn(PETSC_SUCCESS);
1362: }

1364: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1365: {
1366:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1367:   IS                 iscol = a->col, isrow = a->row;
1368:   PetscInt           i, n                  = A->rmap->n, j;
1369:   PetscInt           nz;
1370:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1371:   PetscScalar       *x, *tmp, sum;
1372:   const PetscScalar *b;
1373:   const MatScalar   *aa = a->a, *v;

1375:   PetscFunctionBegin;
1376:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1378:   PetscCall(VecGetArrayRead(bb, &b));
1379:   PetscCall(VecGetArray(xx, &x));
1380:   tmp = a->solve_work;

1382:   PetscCall(ISGetIndices(isrow, &rout));
1383:   r = rout;
1384:   PetscCall(ISGetIndices(iscol, &cout));
1385:   c = cout;

1387:   /* forward solve the lower triangular */
1388:   tmp[0] = b[r[0]];
1389:   v      = aa;
1390:   vi     = aj;
1391:   for (i = 1; i < n; i++) {
1392:     nz  = ai[i + 1] - ai[i];
1393:     sum = b[r[i]];
1394:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1395:     tmp[i] = sum;
1396:     v += nz;
1397:     vi += nz;
1398:   }

1400:   /* backward solve the upper triangular */
1401:   v  = aa + adiag[n - 1];
1402:   vi = aj + adiag[n - 1];
1403:   for (i = n - 1; i >= 0; i--) {
1404:     nz  = adiag[i] - adiag[i + 1] - 1;
1405:     sum = tmp[i];
1406:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1407:     tmp[i] = sum * v[nz];
1408:     x[c[i]] += tmp[i];
1409:     v += nz + 1;
1410:     vi += nz + 1;
1411:   }

1413:   PetscCall(ISRestoreIndices(isrow, &rout));
1414:   PetscCall(ISRestoreIndices(iscol, &cout));
1415:   PetscCall(VecRestoreArrayRead(bb, &b));
1416:   PetscCall(VecRestoreArray(xx, &x));
1417:   PetscCall(PetscLogFlops(2.0 * a->nz));
1418:   PetscFunctionReturn(PETSC_SUCCESS);
1419: }

1421: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1422: {
1423:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1424:   IS                 iscol = a->col, isrow = a->row;
1425:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1426:   PetscInt           i, n = A->rmap->n, j;
1427:   PetscInt           nz;
1428:   PetscScalar       *x, *tmp, s1;
1429:   const MatScalar   *aa = a->a, *v;
1430:   const PetscScalar *b;

1432:   PetscFunctionBegin;
1433:   PetscCall(VecGetArrayRead(bb, &b));
1434:   PetscCall(VecGetArrayWrite(xx, &x));
1435:   tmp = a->solve_work;

1437:   PetscCall(ISGetIndices(isrow, &rout));
1438:   r = rout;
1439:   PetscCall(ISGetIndices(iscol, &cout));
1440:   c = cout;

1442:   /* copy the b into temp work space according to permutation */
1443:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1445:   /* forward solve the U^T */
1446:   for (i = 0; i < n; i++) {
1447:     v  = aa + diag[i];
1448:     vi = aj + diag[i] + 1;
1449:     nz = ai[i + 1] - diag[i] - 1;
1450:     s1 = tmp[i];
1451:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1452:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1453:     tmp[i] = s1;
1454:   }

1456:   /* backward solve the L^T */
1457:   for (i = n - 1; i >= 0; i--) {
1458:     v  = aa + diag[i] - 1;
1459:     vi = aj + diag[i] - 1;
1460:     nz = diag[i] - ai[i];
1461:     s1 = tmp[i];
1462:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1463:   }

1465:   /* copy tmp into x according to permutation */
1466:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1468:   PetscCall(ISRestoreIndices(isrow, &rout));
1469:   PetscCall(ISRestoreIndices(iscol, &cout));
1470:   PetscCall(VecRestoreArrayRead(bb, &b));
1471:   PetscCall(VecRestoreArrayWrite(xx, &x));

1473:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1474:   PetscFunctionReturn(PETSC_SUCCESS);
1475: }

1477: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1478: {
1479:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1480:   IS                 iscol = a->col, isrow = a->row;
1481:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1482:   PetscInt           i, n = A->rmap->n, j;
1483:   PetscInt           nz;
1484:   PetscScalar       *x, *tmp, s1;
1485:   const MatScalar   *aa = a->a, *v;
1486:   const PetscScalar *b;

1488:   PetscFunctionBegin;
1489:   PetscCall(VecGetArrayRead(bb, &b));
1490:   PetscCall(VecGetArrayWrite(xx, &x));
1491:   tmp = a->solve_work;

1493:   PetscCall(ISGetIndices(isrow, &rout));
1494:   r = rout;
1495:   PetscCall(ISGetIndices(iscol, &cout));
1496:   c = cout;

1498:   /* copy the b into temp work space according to permutation */
1499:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1501:   /* forward solve the U^T */
1502:   for (i = 0; i < n; i++) {
1503:     v  = aa + adiag[i + 1] + 1;
1504:     vi = aj + adiag[i + 1] + 1;
1505:     nz = adiag[i] - adiag[i + 1] - 1;
1506:     s1 = tmp[i];
1507:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1508:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1509:     tmp[i] = s1;
1510:   }

1512:   /* backward solve the L^T */
1513:   for (i = n - 1; i >= 0; i--) {
1514:     v  = aa + ai[i];
1515:     vi = aj + ai[i];
1516:     nz = ai[i + 1] - ai[i];
1517:     s1 = tmp[i];
1518:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1519:   }

1521:   /* copy tmp into x according to permutation */
1522:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1524:   PetscCall(ISRestoreIndices(isrow, &rout));
1525:   PetscCall(ISRestoreIndices(iscol, &cout));
1526:   PetscCall(VecRestoreArrayRead(bb, &b));
1527:   PetscCall(VecRestoreArrayWrite(xx, &x));

1529:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1530:   PetscFunctionReturn(PETSC_SUCCESS);
1531: }

1533: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1534: {
1535:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1536:   IS                 iscol = a->col, isrow = a->row;
1537:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1538:   PetscInt           i, n = A->rmap->n, j;
1539:   PetscInt           nz;
1540:   PetscScalar       *x, *tmp, s1;
1541:   const MatScalar   *aa = a->a, *v;
1542:   const PetscScalar *b;

1544:   PetscFunctionBegin;
1545:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1546:   PetscCall(VecGetArrayRead(bb, &b));
1547:   PetscCall(VecGetArray(xx, &x));
1548:   tmp = a->solve_work;

1550:   PetscCall(ISGetIndices(isrow, &rout));
1551:   r = rout;
1552:   PetscCall(ISGetIndices(iscol, &cout));
1553:   c = cout;

1555:   /* copy the b into temp work space according to permutation */
1556:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1558:   /* forward solve the U^T */
1559:   for (i = 0; i < n; i++) {
1560:     v  = aa + diag[i];
1561:     vi = aj + diag[i] + 1;
1562:     nz = ai[i + 1] - diag[i] - 1;
1563:     s1 = tmp[i];
1564:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1565:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1566:     tmp[i] = s1;
1567:   }

1569:   /* backward solve the L^T */
1570:   for (i = n - 1; i >= 0; i--) {
1571:     v  = aa + diag[i] - 1;
1572:     vi = aj + diag[i] - 1;
1573:     nz = diag[i] - ai[i];
1574:     s1 = tmp[i];
1575:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1576:   }

1578:   /* copy tmp into x according to permutation */
1579:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1581:   PetscCall(ISRestoreIndices(isrow, &rout));
1582:   PetscCall(ISRestoreIndices(iscol, &cout));
1583:   PetscCall(VecRestoreArrayRead(bb, &b));
1584:   PetscCall(VecRestoreArray(xx, &x));

1586:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1587:   PetscFunctionReturn(PETSC_SUCCESS);
1588: }

1590: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1591: {
1592:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1593:   IS                 iscol = a->col, isrow = a->row;
1594:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1595:   PetscInt           i, n = A->rmap->n, j;
1596:   PetscInt           nz;
1597:   PetscScalar       *x, *tmp, s1;
1598:   const MatScalar   *aa = a->a, *v;
1599:   const PetscScalar *b;

1601:   PetscFunctionBegin;
1602:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1603:   PetscCall(VecGetArrayRead(bb, &b));
1604:   PetscCall(VecGetArray(xx, &x));
1605:   tmp = a->solve_work;

1607:   PetscCall(ISGetIndices(isrow, &rout));
1608:   r = rout;
1609:   PetscCall(ISGetIndices(iscol, &cout));
1610:   c = cout;

1612:   /* copy the b into temp work space according to permutation */
1613:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1615:   /* forward solve the U^T */
1616:   for (i = 0; i < n; i++) {
1617:     v  = aa + adiag[i + 1] + 1;
1618:     vi = aj + adiag[i + 1] + 1;
1619:     nz = adiag[i] - adiag[i + 1] - 1;
1620:     s1 = tmp[i];
1621:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1622:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1623:     tmp[i] = s1;
1624:   }

1626:   /* backward solve the L^T */
1627:   for (i = n - 1; i >= 0; i--) {
1628:     v  = aa + ai[i];
1629:     vi = aj + ai[i];
1630:     nz = ai[i + 1] - ai[i];
1631:     s1 = tmp[i];
1632:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1633:   }

1635:   /* copy tmp into x according to permutation */
1636:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1638:   PetscCall(ISRestoreIndices(isrow, &rout));
1639:   PetscCall(ISRestoreIndices(iscol, &cout));
1640:   PetscCall(VecRestoreArrayRead(bb, &b));
1641:   PetscCall(VecRestoreArray(xx, &x));

1643:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1644:   PetscFunctionReturn(PETSC_SUCCESS);
1645: }

1647: /*
1648:    ilu() under revised new data structure.
1649:    Factored arrays bj and ba are stored as
1650:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1652:    bi=fact->i is an array of size n+1, in which
1653:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1654:      bi[n]:  points to L(n-1,n-1)+1

1656:   bdiag=fact->diag is an array of size n+1,in which
1657:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1658:      bdiag[n]: points to entry of U(n-1,0)-1

1660:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1661:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1662: */
1663: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1664: {
1665:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1666:   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1667:   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1668:   IS             isicol;

1670:   PetscFunctionBegin;
1671:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1672:   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1673:   b = (Mat_SeqAIJ *)(fact)->data;

1675:   /* allocate matrix arrays for new data structure */
1676:   PetscCall(PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i));

1678:   b->singlemalloc = PETSC_TRUE;
1679:   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1680:   bdiag = b->diag;

1682:   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));

1684:   /* set bi and bj with new data structure */
1685:   bi = b->i;
1686:   bj = b->j;

1688:   /* L part */
1689:   bi[0] = 0;
1690:   for (i = 0; i < n; i++) {
1691:     nz        = adiag[i] - ai[i];
1692:     bi[i + 1] = bi[i] + nz;
1693:     aj        = a->j + ai[i];
1694:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1695:   }

1697:   /* U part */
1698:   bdiag[n] = bi[n] - 1;
1699:   for (i = n - 1; i >= 0; i--) {
1700:     nz = ai[i + 1] - adiag[i] - 1;
1701:     aj = a->j + adiag[i] + 1;
1702:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1703:     /* diag[i] */
1704:     bj[k++]  = i;
1705:     bdiag[i] = bdiag[i + 1] + nz + 1;
1706:   }

1708:   fact->factortype             = MAT_FACTOR_ILU;
1709:   fact->info.factor_mallocs    = 0;
1710:   fact->info.fill_ratio_given  = info->fill;
1711:   fact->info.fill_ratio_needed = 1.0;
1712:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1713:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));

1715:   b       = (Mat_SeqAIJ *)(fact)->data;
1716:   b->row  = isrow;
1717:   b->col  = iscol;
1718:   b->icol = isicol;
1719:   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1720:   PetscCall(PetscObjectReference((PetscObject)isrow));
1721:   PetscCall(PetscObjectReference((PetscObject)iscol));
1722:   PetscFunctionReturn(PETSC_SUCCESS);
1723: }

1725: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1726: {
1727:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1728:   IS                 isicol;
1729:   const PetscInt    *r, *ic;
1730:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1731:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1732:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1733:   PetscInt           i, levels, diagonal_fill;
1734:   PetscBool          col_identity, row_identity, missing;
1735:   PetscReal          f;
1736:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1737:   PetscBT            lnkbt;
1738:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1739:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1740:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;

1742:   PetscFunctionBegin;
1743:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1744:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1745:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1747:   levels = (PetscInt)info->levels;
1748:   PetscCall(ISIdentity(isrow, &row_identity));
1749:   PetscCall(ISIdentity(iscol, &col_identity));
1750:   if (!levels && row_identity && col_identity) {
1751:     /* special case: ilu(0) with natural ordering */
1752:     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1753:     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1754:     PetscFunctionReturn(PETSC_SUCCESS);
1755:   }

1757:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1758:   PetscCall(ISGetIndices(isrow, &r));
1759:   PetscCall(ISGetIndices(isicol, &ic));

1761:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1762:   PetscCall(PetscMalloc1(n + 1, &bi));
1763:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1764:   bi[0] = bdiag[0] = 0;
1765:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1767:   /* create a linked list for storing column indices of the active row */
1768:   nlnk = n + 1;
1769:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1771:   /* initial FreeSpace size is f*(ai[n]+1) */
1772:   f             = info->fill;
1773:   diagonal_fill = (PetscInt)info->diagonal_fill;
1774:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1775:   current_space = free_space;
1776:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1777:   current_space_lvl = free_space_lvl;
1778:   for (i = 0; i < n; i++) {
1779:     nzi = 0;
1780:     /* copy current row into linked list */
1781:     nnz = ai[r[i] + 1] - ai[r[i]];
1782:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1783:     cols   = aj + ai[r[i]];
1784:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1785:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1786:     nzi += nlnk;

1788:     /* make sure diagonal entry is included */
1789:     if (diagonal_fill && lnk[i] == -1) {
1790:       fm = n;
1791:       while (lnk[fm] < i) fm = lnk[fm];
1792:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1793:       lnk[fm]    = i;
1794:       lnk_lvl[i] = 0;
1795:       nzi++;
1796:       dcount++;
1797:     }

1799:     /* add pivot rows into the active row */
1800:     nzbd = 0;
1801:     prow = lnk[n];
1802:     while (prow < i) {
1803:       nnz      = bdiag[prow];
1804:       cols     = bj_ptr[prow] + nnz + 1;
1805:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1806:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1807:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1808:       nzi += nlnk;
1809:       prow = lnk[prow];
1810:       nzbd++;
1811:     }
1812:     bdiag[i]  = nzbd;
1813:     bi[i + 1] = bi[i] + nzi;
1814:     /* if free space is not available, make more free space */
1815:     if (current_space->local_remaining < nzi) {
1816:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1817:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1818:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1819:       reallocs++;
1820:     }

1822:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1823:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1824:     bj_ptr[i]    = current_space->array;
1825:     bjlvl_ptr[i] = current_space_lvl->array;

1827:     /* make sure the active row i has diagonal entry */
1828:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

1830:     current_space->array += nzi;
1831:     current_space->local_used += nzi;
1832:     current_space->local_remaining -= nzi;
1833:     current_space_lvl->array += nzi;
1834:     current_space_lvl->local_used += nzi;
1835:     current_space_lvl->local_remaining -= nzi;
1836:   }

1838:   PetscCall(ISRestoreIndices(isrow, &r));
1839:   PetscCall(ISRestoreIndices(isicol, &ic));
1840:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1841:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
1842:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));

1844:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1845:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1846:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

1848: #if defined(PETSC_USE_INFO)
1849:   {
1850:     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1851:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1852:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1853:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1854:     PetscCall(PetscInfo(A, "for best performance.\n"));
1855:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1856:   }
1857: #endif
1858:   /* put together the new matrix */
1859:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1860:   b = (Mat_SeqAIJ *)(fact)->data;

1862:   b->free_a       = PETSC_TRUE;
1863:   b->free_ij      = PETSC_TRUE;
1864:   b->singlemalloc = PETSC_FALSE;

1866:   PetscCall(PetscMalloc1(bdiag[0] + 1, &b->a));

1868:   b->j    = bj;
1869:   b->i    = bi;
1870:   b->diag = bdiag;
1871:   b->ilen = NULL;
1872:   b->imax = NULL;
1873:   b->row  = isrow;
1874:   b->col  = iscol;
1875:   PetscCall(PetscObjectReference((PetscObject)isrow));
1876:   PetscCall(PetscObjectReference((PetscObject)iscol));
1877:   b->icol = isicol;

1879:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1880:   /* In b structure:  Free imax, ilen, old a, old j.
1881:      Allocate bdiag, solve_work, new a, new j */
1882:   b->maxnz = b->nz = bdiag[0] + 1;

1884:   (fact)->info.factor_mallocs    = reallocs;
1885:   (fact)->info.fill_ratio_given  = f;
1886:   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1887:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1888:   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1889:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1890:   PetscFunctionReturn(PETSC_SUCCESS);
1891: }

1893: #if 0
1894: // unused
1895: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1896: {
1897:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1898:   IS                 isicol;
1899:   const PetscInt    *r, *ic;
1900:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1901:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1902:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1903:   PetscInt           i, levels, diagonal_fill;
1904:   PetscBool          col_identity, row_identity;
1905:   PetscReal          f;
1906:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1907:   PetscBT            lnkbt;
1908:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1909:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1910:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1911:   PetscBool          missing;

1913:   PetscFunctionBegin;
1914:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1915:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1916:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1918:   f             = info->fill;
1919:   levels        = (PetscInt)info->levels;
1920:   diagonal_fill = (PetscInt)info->diagonal_fill;

1922:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

1924:   PetscCall(ISIdentity(isrow, &row_identity));
1925:   PetscCall(ISIdentity(iscol, &col_identity));
1926:   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1927:     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));

1929:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1930:     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1931:     fact->factortype               = MAT_FACTOR_ILU;
1932:     (fact)->info.factor_mallocs    = 0;
1933:     (fact)->info.fill_ratio_given  = info->fill;
1934:     (fact)->info.fill_ratio_needed = 1.0;

1936:     b       = (Mat_SeqAIJ *)(fact)->data;
1937:     b->row  = isrow;
1938:     b->col  = iscol;
1939:     b->icol = isicol;
1940:     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1941:     PetscCall(PetscObjectReference((PetscObject)isrow));
1942:     PetscCall(PetscObjectReference((PetscObject)iscol));
1943:     PetscFunctionReturn(PETSC_SUCCESS);
1944:   }

1946:   PetscCall(ISGetIndices(isrow, &r));
1947:   PetscCall(ISGetIndices(isicol, &ic));

1949:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1950:   PetscCall(PetscMalloc1(n + 1, &bi));
1951:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1952:   bi[0] = bdiag[0] = 0;

1954:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1956:   /* create a linked list for storing column indices of the active row */
1957:   nlnk = n + 1;
1958:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1960:   /* initial FreeSpace size is f*(ai[n]+1) */
1961:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1962:   current_space = free_space;
1963:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1964:   current_space_lvl = free_space_lvl;

1966:   for (i = 0; i < n; i++) {
1967:     nzi = 0;
1968:     /* copy current row into linked list */
1969:     nnz = ai[r[i] + 1] - ai[r[i]];
1970:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1971:     cols   = aj + ai[r[i]];
1972:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1973:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1974:     nzi += nlnk;

1976:     /* make sure diagonal entry is included */
1977:     if (diagonal_fill && lnk[i] == -1) {
1978:       fm = n;
1979:       while (lnk[fm] < i) fm = lnk[fm];
1980:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1981:       lnk[fm]    = i;
1982:       lnk_lvl[i] = 0;
1983:       nzi++;
1984:       dcount++;
1985:     }

1987:     /* add pivot rows into the active row */
1988:     nzbd = 0;
1989:     prow = lnk[n];
1990:     while (prow < i) {
1991:       nnz      = bdiag[prow];
1992:       cols     = bj_ptr[prow] + nnz + 1;
1993:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1994:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1995:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1996:       nzi += nlnk;
1997:       prow = lnk[prow];
1998:       nzbd++;
1999:     }
2000:     bdiag[i]  = nzbd;
2001:     bi[i + 1] = bi[i] + nzi;

2003:     /* if free space is not available, make more free space */
2004:     if (current_space->local_remaining < nzi) {
2005:       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
2006:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
2007:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
2008:       reallocs++;
2009:     }

2011:     /* copy data into free_space and free_space_lvl, then initialize lnk */
2012:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2013:     bj_ptr[i]    = current_space->array;
2014:     bjlvl_ptr[i] = current_space_lvl->array;

2016:     /* make sure the active row i has diagonal entry */
2017:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

2019:     current_space->array += nzi;
2020:     current_space->local_used += nzi;
2021:     current_space->local_remaining -= nzi;
2022:     current_space_lvl->array += nzi;
2023:     current_space_lvl->local_used += nzi;
2024:     current_space_lvl->local_remaining -= nzi;
2025:   }

2027:   PetscCall(ISRestoreIndices(isrow, &r));
2028:   PetscCall(ISRestoreIndices(isicol, &ic));

2030:   /* destroy list of free space and other temporary arrays */
2031:   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
2032:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2033:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2034:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2035:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

2037:   #if defined(PETSC_USE_INFO)
2038:   {
2039:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2040:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2041:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2042:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2043:     PetscCall(PetscInfo(A, "for best performance.\n"));
2044:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2045:   }
2046:   #endif

2048:   /* put together the new matrix */
2049:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2050:   b = (Mat_SeqAIJ *)(fact)->data;

2052:   b->free_a       = PETSC_TRUE;
2053:   b->free_ij      = PETSC_TRUE;
2054:   b->singlemalloc = PETSC_FALSE;

2056:   PetscCall(PetscMalloc1(bi[n], &b->a));
2057:   b->j = bj;
2058:   b->i = bi;
2059:   for (i = 0; i < n; i++) bdiag[i] += bi[i];
2060:   b->diag = bdiag;
2061:   b->ilen = NULL;
2062:   b->imax = NULL;
2063:   b->row  = isrow;
2064:   b->col  = iscol;
2065:   PetscCall(PetscObjectReference((PetscObject)isrow));
2066:   PetscCall(PetscObjectReference((PetscObject)iscol));
2067:   b->icol = isicol;
2068:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2069:   /* In b structure:  Free imax, ilen, old a, old j.
2070:      Allocate bdiag, solve_work, new a, new j */
2071:   b->maxnz = b->nz = bi[n];

2073:   (fact)->info.factor_mallocs    = reallocs;
2074:   (fact)->info.fill_ratio_given  = f;
2075:   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2076:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2077:   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2078:   PetscFunctionReturn(PETSC_SUCCESS);
2079: }
2080: #endif

2082: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2083: {
2084:   Mat             C  = B;
2085:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2086:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2087:   IS              ip = b->row, iip = b->icol;
2088:   const PetscInt *rip, *riip;
2089:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2090:   PetscInt       *ai = a->i, *aj = a->j;
2091:   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2092:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2093:   PetscBool       perm_identity;
2094:   FactorShiftCtx  sctx;
2095:   PetscReal       rs;
2096:   MatScalar       d, *v;

2098:   PetscFunctionBegin;
2099:   /* MatPivotSetUp(): initialize shift context sctx */
2100:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2102:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2103:     sctx.shift_top = info->zeropivot;
2104:     for (i = 0; i < mbs; i++) {
2105:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2106:       d  = (aa)[a->diag[i]];
2107:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2108:       v  = aa + ai[i];
2109:       nz = ai[i + 1] - ai[i];
2110:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2111:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2112:     }
2113:     sctx.shift_top *= 1.1;
2114:     sctx.nshift_max = 5;
2115:     sctx.shift_lo   = 0.;
2116:     sctx.shift_hi   = 1.;
2117:   }

2119:   PetscCall(ISGetIndices(ip, &rip));
2120:   PetscCall(ISGetIndices(iip, &riip));

2122:   /* allocate working arrays
2123:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2124:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2125:   */
2126:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

2128:   do {
2129:     sctx.newshift = PETSC_FALSE;

2131:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2132:     if (mbs) il[0] = 0;

2134:     for (k = 0; k < mbs; k++) {
2135:       /* zero rtmp */
2136:       nz    = bi[k + 1] - bi[k];
2137:       bjtmp = bj + bi[k];
2138:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2140:       /* load in initial unfactored row */
2141:       bval = ba + bi[k];
2142:       jmin = ai[rip[k]];
2143:       jmax = ai[rip[k] + 1];
2144:       for (j = jmin; j < jmax; j++) {
2145:         col = riip[aj[j]];
2146:         if (col >= k) { /* only take upper triangular entry */
2147:           rtmp[col] = aa[j];
2148:           *bval++   = 0.0; /* for in-place factorization */
2149:         }
2150:       }
2151:       /* shift the diagonal of the matrix: ZeropivotApply() */
2152:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

2154:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2155:       dk = rtmp[k];
2156:       i  = c2r[k]; /* first row to be added to k_th row  */

2158:       while (i < k) {
2159:         nexti = c2r[i]; /* next row to be added to k_th row */

2161:         /* compute multiplier, update diag(k) and U(i,k) */
2162:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2163:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2164:         dk += uikdi * ba[ili];           /* update diag[k] */
2165:         ba[ili] = uikdi;                 /* -U(i,k) */

2167:         /* add multiple of row i to k-th row */
2168:         jmin = ili + 1;
2169:         jmax = bi[i + 1];
2170:         if (jmin < jmax) {
2171:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2172:           /* update il and c2r for row i */
2173:           il[i]  = jmin;
2174:           j      = bj[jmin];
2175:           c2r[i] = c2r[j];
2176:           c2r[j] = i;
2177:         }
2178:         i = nexti;
2179:       }

2181:       /* copy data into U(k,:) */
2182:       rs   = 0.0;
2183:       jmin = bi[k];
2184:       jmax = bi[k + 1] - 1;
2185:       if (jmin < jmax) {
2186:         for (j = jmin; j < jmax; j++) {
2187:           col   = bj[j];
2188:           ba[j] = rtmp[col];
2189:           rs += PetscAbsScalar(ba[j]);
2190:         }
2191:         /* add the k-th row into il and c2r */
2192:         il[k]  = jmin;
2193:         i      = bj[jmin];
2194:         c2r[k] = c2r[i];
2195:         c2r[i] = k;
2196:       }

2198:       /* MatPivotCheck() */
2199:       sctx.rs = rs;
2200:       sctx.pv = dk;
2201:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2202:       if (sctx.newshift) break;
2203:       dk = sctx.pv;

2205:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2206:     }
2207:   } while (sctx.newshift);

2209:   PetscCall(PetscFree3(rtmp, il, c2r));
2210:   PetscCall(ISRestoreIndices(ip, &rip));
2211:   PetscCall(ISRestoreIndices(iip, &riip));

2213:   PetscCall(ISIdentity(ip, &perm_identity));
2214:   if (perm_identity) {
2215:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2216:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2217:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2218:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2219:   } else {
2220:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2221:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2222:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2223:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2224:   }

2226:   C->assembled    = PETSC_TRUE;
2227:   C->preallocated = PETSC_TRUE;

2229:   PetscCall(PetscLogFlops(C->rmap->n));

2231:   /* MatPivotView() */
2232:   if (sctx.nshift) {
2233:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2234:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2235:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2236:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2237:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2238:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2239:     }
2240:   }
2241:   PetscFunctionReturn(PETSC_SUCCESS);
2242: }

2244: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2245: {
2246:   Mat             C  = B;
2247:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2248:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2249:   IS              ip = b->row, iip = b->icol;
2250:   const PetscInt *rip, *riip;
2251:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2252:   PetscInt       *ai = a->i, *aj = a->j;
2253:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2254:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2255:   PetscBool       perm_identity;
2256:   FactorShiftCtx  sctx;
2257:   PetscReal       rs;
2258:   MatScalar       d, *v;

2260:   PetscFunctionBegin;
2261:   /* MatPivotSetUp(): initialize shift context sctx */
2262:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2264:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2265:     sctx.shift_top = info->zeropivot;
2266:     for (i = 0; i < mbs; i++) {
2267:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2268:       d  = (aa)[a->diag[i]];
2269:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2270:       v  = aa + ai[i];
2271:       nz = ai[i + 1] - ai[i];
2272:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2273:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2274:     }
2275:     sctx.shift_top *= 1.1;
2276:     sctx.nshift_max = 5;
2277:     sctx.shift_lo   = 0.;
2278:     sctx.shift_hi   = 1.;
2279:   }

2281:   PetscCall(ISGetIndices(ip, &rip));
2282:   PetscCall(ISGetIndices(iip, &riip));

2284:   /* initialization */
2285:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

2287:   do {
2288:     sctx.newshift = PETSC_FALSE;

2290:     for (i = 0; i < mbs; i++) jl[i] = mbs;
2291:     il[0] = 0;

2293:     for (k = 0; k < mbs; k++) {
2294:       /* zero rtmp */
2295:       nz    = bi[k + 1] - bi[k];
2296:       bjtmp = bj + bi[k];
2297:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2299:       bval = ba + bi[k];
2300:       /* initialize k-th row by the perm[k]-th row of A */
2301:       jmin = ai[rip[k]];
2302:       jmax = ai[rip[k] + 1];
2303:       for (j = jmin; j < jmax; j++) {
2304:         col = riip[aj[j]];
2305:         if (col >= k) { /* only take upper triangular entry */
2306:           rtmp[col] = aa[j];
2307:           *bval++   = 0.0; /* for in-place factorization */
2308:         }
2309:       }
2310:       /* shift the diagonal of the matrix */
2311:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

2313:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2314:       dk = rtmp[k];
2315:       i  = jl[k]; /* first row to be added to k_th row  */

2317:       while (i < k) {
2318:         nexti = jl[i]; /* next row to be added to k_th row */

2320:         /* compute multiplier, update diag(k) and U(i,k) */
2321:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2322:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2323:         dk += uikdi * ba[ili];
2324:         ba[ili] = uikdi; /* -U(i,k) */

2326:         /* add multiple of row i to k-th row */
2327:         jmin = ili + 1;
2328:         jmax = bi[i + 1];
2329:         if (jmin < jmax) {
2330:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2331:           /* update il and jl for row i */
2332:           il[i] = jmin;
2333:           j     = bj[jmin];
2334:           jl[i] = jl[j];
2335:           jl[j] = i;
2336:         }
2337:         i = nexti;
2338:       }

2340:       /* shift the diagonals when zero pivot is detected */
2341:       /* compute rs=sum of abs(off-diagonal) */
2342:       rs   = 0.0;
2343:       jmin = bi[k] + 1;
2344:       nz   = bi[k + 1] - jmin;
2345:       bcol = bj + jmin;
2346:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);

2348:       sctx.rs = rs;
2349:       sctx.pv = dk;
2350:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2351:       if (sctx.newshift) break;
2352:       dk = sctx.pv;

2354:       /* copy data into U(k,:) */
2355:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2356:       jmin      = bi[k] + 1;
2357:       jmax      = bi[k + 1];
2358:       if (jmin < jmax) {
2359:         for (j = jmin; j < jmax; j++) {
2360:           col   = bj[j];
2361:           ba[j] = rtmp[col];
2362:         }
2363:         /* add the k-th row into il and jl */
2364:         il[k] = jmin;
2365:         i     = bj[jmin];
2366:         jl[k] = jl[i];
2367:         jl[i] = k;
2368:       }
2369:     }
2370:   } while (sctx.newshift);

2372:   PetscCall(PetscFree3(rtmp, il, jl));
2373:   PetscCall(ISRestoreIndices(ip, &rip));
2374:   PetscCall(ISRestoreIndices(iip, &riip));

2376:   PetscCall(ISIdentity(ip, &perm_identity));
2377:   if (perm_identity) {
2378:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2379:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2380:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2381:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2382:   } else {
2383:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2384:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2385:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2386:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2387:   }

2389:   C->assembled    = PETSC_TRUE;
2390:   C->preallocated = PETSC_TRUE;

2392:   PetscCall(PetscLogFlops(C->rmap->n));
2393:   if (sctx.nshift) {
2394:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2395:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2396:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2397:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2398:     }
2399:   }
2400:   PetscFunctionReturn(PETSC_SUCCESS);
2401: }

2403: /*
2404:    icc() under revised new data structure.
2405:    Factored arrays bj and ba are stored as
2406:      U(0,:),...,U(i,:),U(n-1,:)

2408:    ui=fact->i is an array of size n+1, in which
2409:    ui+
2410:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2411:      ui[n]:  points to U(n-1,n-1)+1

2413:   udiag=fact->diag is an array of size n,in which
2414:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2416:    U(i,:) contains udiag[i] as its last entry, i.e.,
2417:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2418: */

2420: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2421: {
2422:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2423:   Mat_SeqSBAIJ      *b;
2424:   PetscBool          perm_identity, missing;
2425:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2426:   const PetscInt    *rip, *riip;
2427:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2428:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2429:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2430:   PetscReal          fill = info->fill, levels = info->levels;
2431:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2432:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2433:   PetscBT            lnkbt;
2434:   IS                 iperm;

2436:   PetscFunctionBegin;
2437:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2438:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2439:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2440:   PetscCall(ISIdentity(perm, &perm_identity));
2441:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2443:   PetscCall(PetscMalloc1(am + 1, &ui));
2444:   PetscCall(PetscMalloc1(am + 1, &udiag));
2445:   ui[0] = 0;

2447:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2448:   if (!levels && perm_identity) {
2449:     for (i = 0; i < am; i++) {
2450:       ncols     = ai[i + 1] - a->diag[i];
2451:       ui[i + 1] = ui[i] + ncols;
2452:       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2453:     }
2454:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2455:     cols = uj;
2456:     for (i = 0; i < am; i++) {
2457:       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2458:       ncols = ai[i + 1] - a->diag[i] - 1;
2459:       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2460:       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2461:     }
2462:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2463:     PetscCall(ISGetIndices(iperm, &riip));
2464:     PetscCall(ISGetIndices(perm, &rip));

2466:     /* initialization */
2467:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2469:     /* jl: linked list for storing indices of the pivot rows
2470:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2471:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2472:     for (i = 0; i < am; i++) {
2473:       jl[i] = am;
2474:       il[i] = 0;
2475:     }

2477:     /* create and initialize a linked list for storing column indices of the active row k */
2478:     nlnk = am + 1;
2479:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2481:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2482:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2483:     current_space = free_space;
2484:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2485:     current_space_lvl = free_space_lvl;

2487:     for (k = 0; k < am; k++) { /* for each active row k */
2488:       /* initialize lnk by the column indices of row rip[k] of A */
2489:       nzk   = 0;
2490:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2491:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2492:       ncols_upper = 0;
2493:       for (j = 0; j < ncols; j++) {
2494:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2495:         if (riip[i] >= k) {         /* only take upper triangular entry */
2496:           ajtmp[ncols_upper] = i;
2497:           ncols_upper++;
2498:         }
2499:       }
2500:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2501:       nzk += nlnk;

2503:       /* update lnk by computing fill-in for each pivot row to be merged in */
2504:       prow = jl[k]; /* 1st pivot row */

2506:       while (prow < k) {
2507:         nextprow = jl[prow];

2509:         /* merge prow into k-th row */
2510:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2511:         jmax  = ui[prow + 1];
2512:         ncols = jmax - jmin;
2513:         i     = jmin - ui[prow];
2514:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2515:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2516:         j     = *(uj - 1);
2517:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2518:         nzk += nlnk;

2520:         /* update il and jl for prow */
2521:         if (jmin < jmax) {
2522:           il[prow] = jmin;
2523:           j        = *cols;
2524:           jl[prow] = jl[j];
2525:           jl[j]    = prow;
2526:         }
2527:         prow = nextprow;
2528:       }

2530:       /* if free space is not available, make more free space */
2531:       if (current_space->local_remaining < nzk) {
2532:         i = am - k + 1;                                    /* num of unfactored rows */
2533:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2534:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2535:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2536:         reallocs++;
2537:       }

2539:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2540:       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2541:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2543:       /* add the k-th row into il and jl */
2544:       if (nzk > 1) {
2545:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2546:         jl[k] = jl[i];
2547:         jl[i] = k;
2548:         il[k] = ui[k] + 1;
2549:       }
2550:       uj_ptr[k]     = current_space->array;
2551:       uj_lvl_ptr[k] = current_space_lvl->array;

2553:       current_space->array += nzk;
2554:       current_space->local_used += nzk;
2555:       current_space->local_remaining -= nzk;

2557:       current_space_lvl->array += nzk;
2558:       current_space_lvl->local_used += nzk;
2559:       current_space_lvl->local_remaining -= nzk;

2561:       ui[k + 1] = ui[k] + nzk;
2562:     }

2564:     PetscCall(ISRestoreIndices(perm, &rip));
2565:     PetscCall(ISRestoreIndices(iperm, &riip));
2566:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2567:     PetscCall(PetscFree(ajtmp));

2569:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2570:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2571:     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2572:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2573:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2575:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2577:   /* put together the new matrix in MATSEQSBAIJ format */
2578:   b               = (Mat_SeqSBAIJ *)(fact)->data;
2579:   b->singlemalloc = PETSC_FALSE;

2581:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

2583:   b->j         = uj;
2584:   b->i         = ui;
2585:   b->diag      = udiag;
2586:   b->free_diag = PETSC_TRUE;
2587:   b->ilen      = NULL;
2588:   b->imax      = NULL;
2589:   b->row       = perm;
2590:   b->col       = perm;
2591:   PetscCall(PetscObjectReference((PetscObject)perm));
2592:   PetscCall(PetscObjectReference((PetscObject)perm));
2593:   b->icol          = iperm;
2594:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2596:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2598:   b->maxnz = b->nz = ui[am];
2599:   b->free_a        = PETSC_TRUE;
2600:   b->free_ij       = PETSC_TRUE;

2602:   fact->info.factor_mallocs   = reallocs;
2603:   fact->info.fill_ratio_given = fill;
2604:   if (ai[am] != 0) {
2605:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2606:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2607:   } else {
2608:     fact->info.fill_ratio_needed = 0.0;
2609:   }
2610: #if defined(PETSC_USE_INFO)
2611:   if (ai[am] != 0) {
2612:     PetscReal af = fact->info.fill_ratio_needed;
2613:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2614:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2615:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2616:   } else {
2617:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2618:   }
2619: #endif
2620:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2621:   PetscFunctionReturn(PETSC_SUCCESS);
2622: }

2624: #if 0
2625: // unused
2626: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2627: {
2628:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2629:   Mat_SeqSBAIJ      *b;
2630:   PetscBool          perm_identity, missing;
2631:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2632:   const PetscInt    *rip, *riip;
2633:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2634:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2635:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2636:   PetscReal          fill = info->fill, levels = info->levels;
2637:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2638:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2639:   PetscBT            lnkbt;
2640:   IS                 iperm;

2642:   PetscFunctionBegin;
2643:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2644:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2645:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2646:   PetscCall(ISIdentity(perm, &perm_identity));
2647:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2649:   PetscCall(PetscMalloc1(am + 1, &ui));
2650:   PetscCall(PetscMalloc1(am + 1, &udiag));
2651:   ui[0] = 0;

2653:   /* ICC(0) without matrix ordering: simply copies fill pattern */
2654:   if (!levels && perm_identity) {
2655:     for (i = 0; i < am; i++) {
2656:       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2657:       udiag[i]  = ui[i];
2658:     }
2659:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2660:     cols = uj;
2661:     for (i = 0; i < am; i++) {
2662:       aj    = a->j + a->diag[i];
2663:       ncols = ui[i + 1] - ui[i];
2664:       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2665:     }
2666:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2667:     PetscCall(ISGetIndices(iperm, &riip));
2668:     PetscCall(ISGetIndices(perm, &rip));

2670:     /* initialization */
2671:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2673:     /* jl: linked list for storing indices of the pivot rows
2674:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2675:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2676:     for (i = 0; i < am; i++) {
2677:       jl[i] = am;
2678:       il[i] = 0;
2679:     }

2681:     /* create and initialize a linked list for storing column indices of the active row k */
2682:     nlnk = am + 1;
2683:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2685:     /* initial FreeSpace size is fill*(ai[am]+1) */
2686:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2687:     current_space = free_space;
2688:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2689:     current_space_lvl = free_space_lvl;

2691:     for (k = 0; k < am; k++) { /* for each active row k */
2692:       /* initialize lnk by the column indices of row rip[k] of A */
2693:       nzk   = 0;
2694:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2695:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2696:       ncols_upper = 0;
2697:       for (j = 0; j < ncols; j++) {
2698:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2699:         if (riip[i] >= k) {         /* only take upper triangular entry */
2700:           ajtmp[ncols_upper] = i;
2701:           ncols_upper++;
2702:         }
2703:       }
2704:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2705:       nzk += nlnk;

2707:       /* update lnk by computing fill-in for each pivot row to be merged in */
2708:       prow = jl[k]; /* 1st pivot row */

2710:       while (prow < k) {
2711:         nextprow = jl[prow];

2713:         /* merge prow into k-th row */
2714:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2715:         jmax  = ui[prow + 1];
2716:         ncols = jmax - jmin;
2717:         i     = jmin - ui[prow];
2718:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2719:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2720:         j     = *(uj - 1);
2721:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2722:         nzk += nlnk;

2724:         /* update il and jl for prow */
2725:         if (jmin < jmax) {
2726:           il[prow] = jmin;
2727:           j        = *cols;
2728:           jl[prow] = jl[j];
2729:           jl[j]    = prow;
2730:         }
2731:         prow = nextprow;
2732:       }

2734:       /* if free space is not available, make more free space */
2735:       if (current_space->local_remaining < nzk) {
2736:         i = am - k + 1;                                    /* num of unfactored rows */
2737:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2738:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2739:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2740:         reallocs++;
2741:       }

2743:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2744:       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2745:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2747:       /* add the k-th row into il and jl */
2748:       if (nzk > 1) {
2749:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2750:         jl[k] = jl[i];
2751:         jl[i] = k;
2752:         il[k] = ui[k] + 1;
2753:       }
2754:       uj_ptr[k]     = current_space->array;
2755:       uj_lvl_ptr[k] = current_space_lvl->array;

2757:       current_space->array += nzk;
2758:       current_space->local_used += nzk;
2759:       current_space->local_remaining -= nzk;

2761:       current_space_lvl->array += nzk;
2762:       current_space_lvl->local_used += nzk;
2763:       current_space_lvl->local_remaining -= nzk;

2765:       ui[k + 1] = ui[k] + nzk;
2766:     }

2768:   #if defined(PETSC_USE_INFO)
2769:     if (ai[am] != 0) {
2770:       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2771:       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2772:       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2773:       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2774:     } else {
2775:       PetscCall(PetscInfo(A, "Empty matrix\n"));
2776:     }
2777:   #endif

2779:     PetscCall(ISRestoreIndices(perm, &rip));
2780:     PetscCall(ISRestoreIndices(iperm, &riip));
2781:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2782:     PetscCall(PetscFree(ajtmp));

2784:     /* destroy list of free space and other temporary array(s) */
2785:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2786:     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2787:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2788:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2790:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2792:   /* put together the new matrix in MATSEQSBAIJ format */

2794:   b               = (Mat_SeqSBAIJ *)fact->data;
2795:   b->singlemalloc = PETSC_FALSE;

2797:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

2799:   b->j         = uj;
2800:   b->i         = ui;
2801:   b->diag      = udiag;
2802:   b->free_diag = PETSC_TRUE;
2803:   b->ilen      = NULL;
2804:   b->imax      = NULL;
2805:   b->row       = perm;
2806:   b->col       = perm;

2808:   PetscCall(PetscObjectReference((PetscObject)perm));
2809:   PetscCall(PetscObjectReference((PetscObject)perm));

2811:   b->icol          = iperm;
2812:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2813:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2814:   b->maxnz = b->nz = ui[am];
2815:   b->free_a        = PETSC_TRUE;
2816:   b->free_ij       = PETSC_TRUE;

2818:   fact->info.factor_mallocs   = reallocs;
2819:   fact->info.fill_ratio_given = fill;
2820:   if (ai[am] != 0) {
2821:     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2822:   } else {
2823:     fact->info.fill_ratio_needed = 0.0;
2824:   }
2825:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2826:   PetscFunctionReturn(PETSC_SUCCESS);
2827: }
2828: #endif

2830: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2831: {
2832:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2833:   Mat_SeqSBAIJ      *b;
2834:   PetscBool          perm_identity, missing;
2835:   PetscReal          fill = info->fill;
2836:   const PetscInt    *rip, *riip;
2837:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2838:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2839:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2840:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2841:   PetscBT            lnkbt;
2842:   IS                 iperm;

2844:   PetscFunctionBegin;
2845:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2846:   PetscCall(MatMissingDiagonal(A, &missing, &i));
2847:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

2849:   /* check whether perm is the identity mapping */
2850:   PetscCall(ISIdentity(perm, &perm_identity));
2851:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2852:   PetscCall(ISGetIndices(iperm, &riip));
2853:   PetscCall(ISGetIndices(perm, &rip));

2855:   /* initialization */
2856:   PetscCall(PetscMalloc1(am + 1, &ui));
2857:   PetscCall(PetscMalloc1(am + 1, &udiag));
2858:   ui[0] = 0;

2860:   /* jl: linked list for storing indices of the pivot rows
2861:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2862:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2863:   for (i = 0; i < am; i++) {
2864:     jl[i] = am;
2865:     il[i] = 0;
2866:   }

2868:   /* create and initialize a linked list for storing column indices of the active row k */
2869:   nlnk = am + 1;
2870:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

2872:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2873:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2874:   current_space = free_space;

2876:   for (k = 0; k < am; k++) { /* for each active row k */
2877:     /* initialize lnk by the column indices of row rip[k] of A */
2878:     nzk   = 0;
2879:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2880:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2881:     ncols_upper = 0;
2882:     for (j = 0; j < ncols; j++) {
2883:       i = riip[*(aj + ai[rip[k]] + j)];
2884:       if (i >= k) { /* only take upper triangular entry */
2885:         cols[ncols_upper] = i;
2886:         ncols_upper++;
2887:       }
2888:     }
2889:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2890:     nzk += nlnk;

2892:     /* update lnk by computing fill-in for each pivot row to be merged in */
2893:     prow = jl[k]; /* 1st pivot row */

2895:     while (prow < k) {
2896:       nextprow = jl[prow];
2897:       /* merge prow into k-th row */
2898:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2899:       jmax   = ui[prow + 1];
2900:       ncols  = jmax - jmin;
2901:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2902:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2903:       nzk += nlnk;

2905:       /* update il and jl for prow */
2906:       if (jmin < jmax) {
2907:         il[prow] = jmin;
2908:         j        = *uj_ptr;
2909:         jl[prow] = jl[j];
2910:         jl[j]    = prow;
2911:       }
2912:       prow = nextprow;
2913:     }

2915:     /* if free space is not available, make more free space */
2916:     if (current_space->local_remaining < nzk) {
2917:       i = am - k + 1;                                    /* num of unfactored rows */
2918:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2919:       PetscCall(PetscFreeSpaceGet(i, &current_space));
2920:       reallocs++;
2921:     }

2923:     /* copy data into free space, then initialize lnk */
2924:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

2926:     /* add the k-th row into il and jl */
2927:     if (nzk > 1) {
2928:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2929:       jl[k] = jl[i];
2930:       jl[i] = k;
2931:       il[k] = ui[k] + 1;
2932:     }
2933:     ui_ptr[k] = current_space->array;

2935:     current_space->array += nzk;
2936:     current_space->local_used += nzk;
2937:     current_space->local_remaining -= nzk;

2939:     ui[k + 1] = ui[k] + nzk;
2940:   }

2942:   PetscCall(ISRestoreIndices(perm, &rip));
2943:   PetscCall(ISRestoreIndices(iperm, &riip));
2944:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

2946:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2947:   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2948:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2949:   PetscCall(PetscLLDestroy(lnk, lnkbt));

2951:   /* put together the new matrix in MATSEQSBAIJ format */

2953:   b               = (Mat_SeqSBAIJ *)fact->data;
2954:   b->singlemalloc = PETSC_FALSE;
2955:   b->free_a       = PETSC_TRUE;
2956:   b->free_ij      = PETSC_TRUE;

2958:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

2960:   b->j         = uj;
2961:   b->i         = ui;
2962:   b->diag      = udiag;
2963:   b->free_diag = PETSC_TRUE;
2964:   b->ilen      = NULL;
2965:   b->imax      = NULL;
2966:   b->row       = perm;
2967:   b->col       = perm;

2969:   PetscCall(PetscObjectReference((PetscObject)perm));
2970:   PetscCall(PetscObjectReference((PetscObject)perm));

2972:   b->icol          = iperm;
2973:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2975:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2977:   b->maxnz = b->nz = ui[am];

2979:   fact->info.factor_mallocs   = reallocs;
2980:   fact->info.fill_ratio_given = fill;
2981:   if (ai[am] != 0) {
2982:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2983:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2984:   } else {
2985:     fact->info.fill_ratio_needed = 0.0;
2986:   }
2987: #if defined(PETSC_USE_INFO)
2988:   if (ai[am] != 0) {
2989:     PetscReal af = fact->info.fill_ratio_needed;
2990:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2991:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2992:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2993:   } else {
2994:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2995:   }
2996: #endif
2997:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2998:   PetscFunctionReturn(PETSC_SUCCESS);
2999: }

3001: #if 0
3002: // unused
3003: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
3004: {
3005:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
3006:   Mat_SeqSBAIJ      *b;
3007:   PetscBool          perm_identity, missing;
3008:   PetscReal          fill = info->fill;
3009:   const PetscInt    *rip, *riip;
3010:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
3011:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
3012:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
3013:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
3014:   PetscBT            lnkbt;
3015:   IS                 iperm;

3017:   PetscFunctionBegin;
3018:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
3019:   PetscCall(MatMissingDiagonal(A, &missing, &i));
3020:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

3022:   /* check whether perm is the identity mapping */
3023:   PetscCall(ISIdentity(perm, &perm_identity));
3024:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3025:   PetscCall(ISGetIndices(iperm, &riip));
3026:   PetscCall(ISGetIndices(perm, &rip));

3028:   /* initialization */
3029:   PetscCall(PetscMalloc1(am + 1, &ui));
3030:   ui[0] = 0;

3032:   /* jl: linked list for storing indices of the pivot rows
3033:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3034:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3035:   for (i = 0; i < am; i++) {
3036:     jl[i] = am;
3037:     il[i] = 0;
3038:   }

3040:   /* create and initialize a linked list for storing column indices of the active row k */
3041:   nlnk = am + 1;
3042:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

3044:   /* initial FreeSpace size is fill*(ai[am]+1) */
3045:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3046:   current_space = free_space;

3048:   for (k = 0; k < am; k++) { /* for each active row k */
3049:     /* initialize lnk by the column indices of row rip[k] of A */
3050:     nzk   = 0;
3051:     ncols = ai[rip[k] + 1] - ai[rip[k]];
3052:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3053:     ncols_upper = 0;
3054:     for (j = 0; j < ncols; j++) {
3055:       i = riip[*(aj + ai[rip[k]] + j)];
3056:       if (i >= k) { /* only take upper triangular entry */
3057:         cols[ncols_upper] = i;
3058:         ncols_upper++;
3059:       }
3060:     }
3061:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3062:     nzk += nlnk;

3064:     /* update lnk by computing fill-in for each pivot row to be merged in */
3065:     prow = jl[k]; /* 1st pivot row */

3067:     while (prow < k) {
3068:       nextprow = jl[prow];
3069:       /* merge prow into k-th row */
3070:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3071:       jmax   = ui[prow + 1];
3072:       ncols  = jmax - jmin;
3073:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3074:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3075:       nzk += nlnk;

3077:       /* update il and jl for prow */
3078:       if (jmin < jmax) {
3079:         il[prow] = jmin;
3080:         j        = *uj_ptr;
3081:         jl[prow] = jl[j];
3082:         jl[j]    = prow;
3083:       }
3084:       prow = nextprow;
3085:     }

3087:     /* if free space is not available, make more free space */
3088:     if (current_space->local_remaining < nzk) {
3089:       i = am - k + 1;                     /* num of unfactored rows */
3090:       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3091:       PetscCall(PetscFreeSpaceGet(i, &current_space));
3092:       reallocs++;
3093:     }

3095:     /* copy data into free space, then initialize lnk */
3096:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

3098:     /* add the k-th row into il and jl */
3099:     if (nzk - 1 > 0) {
3100:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3101:       jl[k] = jl[i];
3102:       jl[i] = k;
3103:       il[k] = ui[k] + 1;
3104:     }
3105:     ui_ptr[k] = current_space->array;

3107:     current_space->array += nzk;
3108:     current_space->local_used += nzk;
3109:     current_space->local_remaining -= nzk;

3111:     ui[k + 1] = ui[k] + nzk;
3112:   }

3114:   #if defined(PETSC_USE_INFO)
3115:   if (ai[am] != 0) {
3116:     PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3117:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3118:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3119:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3120:   } else {
3121:     PetscCall(PetscInfo(A, "Empty matrix\n"));
3122:   }
3123:   #endif

3125:   PetscCall(ISRestoreIndices(perm, &rip));
3126:   PetscCall(ISRestoreIndices(iperm, &riip));
3127:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

3129:   /* destroy list of free space and other temporary array(s) */
3130:   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
3131:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3132:   PetscCall(PetscLLDestroy(lnk, lnkbt));

3134:   /* put together the new matrix in MATSEQSBAIJ format */

3136:   b               = (Mat_SeqSBAIJ *)fact->data;
3137:   b->singlemalloc = PETSC_FALSE;
3138:   b->free_a       = PETSC_TRUE;
3139:   b->free_ij      = PETSC_TRUE;

3141:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

3143:   b->j    = uj;
3144:   b->i    = ui;
3145:   b->diag = NULL;
3146:   b->ilen = NULL;
3147:   b->imax = NULL;
3148:   b->row  = perm;
3149:   b->col  = perm;

3151:   PetscCall(PetscObjectReference((PetscObject)perm));
3152:   PetscCall(PetscObjectReference((PetscObject)perm));

3154:   b->icol          = iperm;
3155:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

3157:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3158:   b->maxnz = b->nz = ui[am];

3160:   fact->info.factor_mallocs   = reallocs;
3161:   fact->info.fill_ratio_given = fill;
3162:   if (ai[am] != 0) {
3163:     fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3164:   } else {
3165:     fact->info.fill_ratio_needed = 0.0;
3166:   }
3167:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3168:   PetscFunctionReturn(PETSC_SUCCESS);
3169: }
3170: #endif

3172: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3173: {
3174:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3175:   PetscInt           n  = A->rmap->n;
3176:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3177:   PetscScalar       *x, sum;
3178:   const PetscScalar *b;
3179:   const MatScalar   *aa = a->a, *v;
3180:   PetscInt           i, nz;

3182:   PetscFunctionBegin;
3183:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3185:   PetscCall(VecGetArrayRead(bb, &b));
3186:   PetscCall(VecGetArrayWrite(xx, &x));

3188:   /* forward solve the lower triangular */
3189:   x[0] = b[0];
3190:   v    = aa;
3191:   vi   = aj;
3192:   for (i = 1; i < n; i++) {
3193:     nz  = ai[i + 1] - ai[i];
3194:     sum = b[i];
3195:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3196:     v += nz;
3197:     vi += nz;
3198:     x[i] = sum;
3199:   }

3201:   /* backward solve the upper triangular */
3202:   for (i = n - 1; i >= 0; i--) {
3203:     v   = aa + adiag[i + 1] + 1;
3204:     vi  = aj + adiag[i + 1] + 1;
3205:     nz  = adiag[i] - adiag[i + 1] - 1;
3206:     sum = x[i];
3207:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3208:     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3209:   }

3211:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3212:   PetscCall(VecRestoreArrayRead(bb, &b));
3213:   PetscCall(VecRestoreArrayWrite(xx, &x));
3214:   PetscFunctionReturn(PETSC_SUCCESS);
3215: }

3217: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3218: {
3219:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3220:   IS                 iscol = a->col, isrow = a->row;
3221:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3222:   const PetscInt    *rout, *cout, *r, *c;
3223:   PetscScalar       *x, *tmp, sum;
3224:   const PetscScalar *b;
3225:   const MatScalar   *aa = a->a, *v;

3227:   PetscFunctionBegin;
3228:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3230:   PetscCall(VecGetArrayRead(bb, &b));
3231:   PetscCall(VecGetArrayWrite(xx, &x));
3232:   tmp = a->solve_work;

3234:   PetscCall(ISGetIndices(isrow, &rout));
3235:   r = rout;
3236:   PetscCall(ISGetIndices(iscol, &cout));
3237:   c = cout;

3239:   /* forward solve the lower triangular */
3240:   tmp[0] = b[r[0]];
3241:   v      = aa;
3242:   vi     = aj;
3243:   for (i = 1; i < n; i++) {
3244:     nz  = ai[i + 1] - ai[i];
3245:     sum = b[r[i]];
3246:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3247:     tmp[i] = sum;
3248:     v += nz;
3249:     vi += nz;
3250:   }

3252:   /* backward solve the upper triangular */
3253:   for (i = n - 1; i >= 0; i--) {
3254:     v   = aa + adiag[i + 1] + 1;
3255:     vi  = aj + adiag[i + 1] + 1;
3256:     nz  = adiag[i] - adiag[i + 1] - 1;
3257:     sum = tmp[i];
3258:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3259:     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3260:   }

3262:   PetscCall(ISRestoreIndices(isrow, &rout));
3263:   PetscCall(ISRestoreIndices(iscol, &cout));
3264:   PetscCall(VecRestoreArrayRead(bb, &b));
3265:   PetscCall(VecRestoreArrayWrite(xx, &x));
3266:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3267:   PetscFunctionReturn(PETSC_SUCCESS);
3268: }

3270: #if 0
3271: // unused
3272: /*
3273:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3274: */
3275: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3276: {
3277:   Mat             B = *fact;
3278:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3279:   IS              isicol;
3280:   const PetscInt *r, *ic;
3281:   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3282:   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3283:   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3284:   PetscInt        nlnk, *lnk;
3285:   PetscBT         lnkbt;
3286:   PetscBool       row_identity, icol_identity;
3287:   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3288:   const PetscInt *ics;
3289:   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3290:   PetscReal       dt = info->dt, shift = info->shiftamount;
3291:   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3292:   PetscBool       missing;

3294:   PetscFunctionBegin;
3295:   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3296:   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);

3298:   /* symbolic factorization, can be reused */
3299:   PetscCall(MatMissingDiagonal(A, &missing, &i));
3300:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3301:   adiag = a->diag;

3303:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

3305:   /* bdiag is location of diagonal in factor */
3306:   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3307:   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */

3309:   /* allocate row pointers bi */
3310:   PetscCall(PetscMalloc1(2 * n + 2, &bi));

3312:   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3313:   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3314:   nnz_max = ai[n] + 2 * n * dtcount + 2;

3316:   PetscCall(PetscMalloc1(nnz_max + 1, &bj));
3317:   PetscCall(PetscMalloc1(nnz_max + 1, &ba));

3319:   /* put together the new matrix */
3320:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3321:   b = (Mat_SeqAIJ *)B->data;

3323:   b->free_a       = PETSC_TRUE;
3324:   b->free_ij      = PETSC_TRUE;
3325:   b->singlemalloc = PETSC_FALSE;

3327:   b->a    = ba;
3328:   b->j    = bj;
3329:   b->i    = bi;
3330:   b->diag = bdiag;
3331:   b->ilen = NULL;
3332:   b->imax = NULL;
3333:   b->row  = isrow;
3334:   b->col  = iscol;
3335:   PetscCall(PetscObjectReference((PetscObject)isrow));
3336:   PetscCall(PetscObjectReference((PetscObject)iscol));
3337:   b->icol = isicol;

3339:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3340:   b->maxnz = nnz_max;

3342:   B->factortype            = MAT_FACTOR_ILUDT;
3343:   B->info.factor_mallocs   = 0;
3344:   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3345:   /*  end of symbolic factorization */

3347:   PetscCall(ISGetIndices(isrow, &r));
3348:   PetscCall(ISGetIndices(isicol, &ic));
3349:   ics = ic;

3351:   /* linked list for storing column indices of the active row */
3352:   nlnk = n + 1;
3353:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

3355:   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3356:   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3357:   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3358:   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3359:   PetscCall(PetscArrayzero(rtmp, n));

3361:   bi[0]         = 0;
3362:   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3363:   bdiag_rev[n]  = bdiag[0];
3364:   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3365:   for (i = 0; i < n; i++) {
3366:     /* copy initial fill into linked list */
3367:     nzi = ai[r[i] + 1] - ai[r[i]];
3368:     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3369:     nzi_al = adiag[r[i]] - ai[r[i]];
3370:     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3371:     ajtmp  = aj + ai[r[i]];
3372:     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));

3374:     /* load in initial (unfactored row) */
3375:     aatmp = a->a + ai[r[i]];
3376:     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;

3378:     /* add pivot rows into linked list */
3379:     row = lnk[n];
3380:     while (row < i) {
3381:       nzi_bl = bi[row + 1] - bi[row] + 1;
3382:       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3383:       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3384:       nzi += nlnk;
3385:       row = lnk[row];
3386:     }

3388:     /* copy data from lnk into jtmp, then initialize lnk */
3389:     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));

3391:     /* numerical factorization */
3392:     bjtmp = jtmp;
3393:     row   = *bjtmp++; /* 1st pivot row */
3394:     while (row < i) {
3395:       pc         = rtmp + row;
3396:       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3397:       multiplier = (*pc) * (*pv);
3398:       *pc        = multiplier;
3399:       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3400:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3401:         pv = ba + bdiag[row + 1] + 1;
3402:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3403:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3404:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3405:       }
3406:       row = *bjtmp++;
3407:     }

3409:     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3410:     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3411:     nzi_bl   = 0;
3412:     j        = 0;
3413:     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3414:       vtmp[j]       = rtmp[jtmp[j]];
3415:       rtmp[jtmp[j]] = 0.0;
3416:       nzi_bl++;
3417:       j++;
3418:     }
3419:     nzi_bu = nzi - nzi_bl - 1;
3420:     while (j < nzi) {
3421:       vtmp[j]       = rtmp[jtmp[j]];
3422:       rtmp[jtmp[j]] = 0.0;
3423:       j++;
3424:     }

3426:     bjtmp = bj + bi[i];
3427:     batmp = ba + bi[i];
3428:     /* apply level dropping rule to L part */
3429:     ncut = nzi_al + dtcount;
3430:     if (ncut < nzi_bl) {
3431:       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3432:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3433:     } else {
3434:       ncut = nzi_bl;
3435:     }
3436:     for (j = 0; j < ncut; j++) {
3437:       bjtmp[j] = jtmp[j];
3438:       batmp[j] = vtmp[j];
3439:     }
3440:     bi[i + 1] = bi[i] + ncut;
3441:     nzi       = ncut + 1;

3443:     /* apply level dropping rule to U part */
3444:     ncut = nzi_au + dtcount;
3445:     if (ncut < nzi_bu) {
3446:       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3447:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3448:     } else {
3449:       ncut = nzi_bu;
3450:     }
3451:     nzi += ncut;

3453:     /* mark bdiagonal */
3454:     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3455:     bdiag_rev[n - i - 1] = bdiag[i + 1];
3456:     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3457:     bjtmp                = bj + bdiag[i];
3458:     batmp                = ba + bdiag[i];
3459:     *bjtmp               = i;
3460:     *batmp               = diag_tmp; /* rtmp[i]; */
3461:     if (*batmp == 0.0) *batmp = dt + shift;
3462:     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */

3464:     bjtmp = bj + bdiag[i + 1] + 1;
3465:     batmp = ba + bdiag[i + 1] + 1;
3466:     for (k = 0; k < ncut; k++) {
3467:       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3468:       batmp[k] = vtmp[nzi_bl + 1 + k];
3469:     }

3471:     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3472:   }              /* for (i=0; i<n; i++) */
3473:   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);

3475:   PetscCall(ISRestoreIndices(isrow, &r));
3476:   PetscCall(ISRestoreIndices(isicol, &ic));

3478:   PetscCall(PetscLLDestroy(lnk, lnkbt));
3479:   PetscCall(PetscFree2(im, jtmp));
3480:   PetscCall(PetscFree2(rtmp, vtmp));
3481:   PetscCall(PetscFree(bdiag_rev));

3483:   PetscCall(PetscLogFlops(B->cmap->n));
3484:   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];

3486:   PetscCall(ISIdentity(isrow, &row_identity));
3487:   PetscCall(ISIdentity(isicol, &icol_identity));
3488:   if (row_identity && icol_identity) {
3489:     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3490:   } else {
3491:     B->ops->solve = MatSolve_SeqAIJ;
3492:   }

3494:   B->ops->solveadd          = NULL;
3495:   B->ops->solvetranspose    = NULL;
3496:   B->ops->solvetransposeadd = NULL;
3497:   B->ops->matsolve          = NULL;
3498:   B->assembled              = PETSC_TRUE;
3499:   B->preallocated           = PETSC_TRUE;
3500:   PetscFunctionReturn(PETSC_SUCCESS);
3501: }

3503: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3504: /*
3505:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3506: */

3508: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3509: {
3510:   PetscFunctionBegin;
3511:   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3512:   PetscFunctionReturn(PETSC_SUCCESS);
3513: }

3515: /*
3516:    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3517:    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3518: */
3519: /*
3520:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3521: */

3523: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3524: {
3525:   Mat             C = fact;
3526:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3527:   IS              isrow = b->row, isicol = b->icol;
3528:   const PetscInt *r, *ic, *ics;
3529:   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3530:   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3531:   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3532:   PetscReal       dt = info->dt, shift = info->shiftamount;
3533:   PetscBool       row_identity, col_identity;

3535:   PetscFunctionBegin;
3536:   PetscCall(ISGetIndices(isrow, &r));
3537:   PetscCall(ISGetIndices(isicol, &ic));
3538:   PetscCall(PetscMalloc1(n + 1, &rtmp));
3539:   ics = ic;

3541:   for (i = 0; i < n; i++) {
3542:     /* initialize rtmp array */
3543:     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3544:     bjtmp = bj + bi[i];
3545:     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3546:     rtmp[i] = 0.0;
3547:     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3548:     bjtmp   = bj + bdiag[i + 1] + 1;
3549:     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;

3551:     /* load in initial unfactored row of A */
3552:     nz    = ai[r[i] + 1] - ai[r[i]];
3553:     ajtmp = aj + ai[r[i]];
3554:     v     = aa + ai[r[i]];
3555:     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];

3557:     /* numerical factorization */
3558:     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3559:     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3560:     k     = 0;
3561:     while (k < nzl) {
3562:       row        = *bjtmp++;
3563:       pc         = rtmp + row;
3564:       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3565:       multiplier = (*pc) * (*pv);
3566:       *pc        = multiplier;
3567:       if (PetscAbsScalar(multiplier) > dt) {
3568:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3569:         pv = b->a + bdiag[row + 1] + 1;
3570:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3571:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3572:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3573:       }
3574:       k++;
3575:     }

3577:     /* finished row so stick it into b->a */
3578:     /* L-part */
3579:     pv  = b->a + bi[i];
3580:     pj  = bj + bi[i];
3581:     nzl = bi[i + 1] - bi[i];
3582:     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];

3584:     /* diagonal: invert diagonal entries for simpler triangular solves */
3585:     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3586:     b->a[bdiag[i]] = 1.0 / rtmp[i];

3588:     /* U-part */
3589:     pv  = b->a + bdiag[i + 1] + 1;
3590:     pj  = bj + bdiag[i + 1] + 1;
3591:     nzu = bdiag[i] - bdiag[i + 1] - 1;
3592:     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3593:   }

3595:   PetscCall(PetscFree(rtmp));
3596:   PetscCall(ISRestoreIndices(isicol, &ic));
3597:   PetscCall(ISRestoreIndices(isrow, &r));

3599:   PetscCall(ISIdentity(isrow, &row_identity));
3600:   PetscCall(ISIdentity(isicol, &col_identity));
3601:   if (row_identity && col_identity) {
3602:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3603:   } else {
3604:     C->ops->solve = MatSolve_SeqAIJ;
3605:   }
3606:   C->ops->solveadd          = NULL;
3607:   C->ops->solvetranspose    = NULL;
3608:   C->ops->solvetransposeadd = NULL;
3609:   C->ops->matsolve          = NULL;
3610:   C->assembled              = PETSC_TRUE;
3611:   C->preallocated           = PETSC_TRUE;

3613:   PetscCall(PetscLogFlops(C->cmap->n));
3614:   PetscFunctionReturn(PETSC_SUCCESS);
3615: }
3616: #endif