Actual source code: spbas.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/aij/seq/bas/spbas.h>

  4: /*MC
  5:   MATSOLVERBAS -  Provides ICC(k) with drop tolerance

  7:   Works with `MATAIJ`  matrices

  9:   Options Database Keys:
 10: + -pc_factor_levels <l> - number of levels of fill
 11: - -pc_factor_drop_tolerance - is not currently hooked up to do anything

 13:   Level: intermediate

 15:    Contributed by: Bas van 't Hof

 17:    Note:
 18:     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
 19:      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
 20:      we recommend not using this functionality.

 22: .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()`
 23: M*/

 25: /*
 26:   spbas_memory_requirement:
 27:     Calculate the number of bytes needed to store the matrix
 28: */
 29: size_t spbas_memory_requirement(spbas_matrix matrix)
 30: {
 31:   size_t memreq = 6 * sizeof(PetscInt) +             /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
 32:                   sizeof(PetscBool) +                /* block_data */
 33:                   sizeof(PetscScalar **) +           /* values */
 34:                   sizeof(PetscScalar *) +            /* alloc_val */
 35:                   2 * sizeof(PetscInt **) +          /* icols, icols0 */
 36:                   2 * sizeof(PetscInt *) +           /* row_nnz, alloc_icol */
 37:                   matrix.nrows * sizeof(PetscInt) +  /* row_nnz[*] */
 38:                   matrix.nrows * sizeof(PetscInt *); /* icols[*] */

 40:   /* icol0[*] */
 41:   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);

 43:   /* icols[*][*] */
 44:   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
 45:   else memreq += matrix.nnz * sizeof(PetscInt);

 47:   if (matrix.values) {
 48:     memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */
 49:     /* values[*][*] */
 50:     if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar);
 51:     else memreq += matrix.nnz * sizeof(PetscScalar);
 52:   }
 53:   return memreq;
 54: }

 56: /*
 57:   spbas_allocate_pattern:
 58:     allocate the pattern arrays row_nnz, icols and optionally values
 59: */
 60: static PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values)
 61: {
 62:   PetscInt nrows        = result->nrows;
 63:   PetscInt col_idx_type = result->col_idx_type;

 65:   PetscFunctionBegin;
 66:   /* Allocate sparseness pattern */
 67:   PetscCall(PetscMalloc1(nrows, &result->row_nnz));
 68:   PetscCall(PetscMalloc1(nrows, &result->icols));

 70:   /* If offsets are given wrt an array, create array */
 71:   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
 72:     PetscCall(PetscMalloc1(nrows, &result->icol0));
 73:   } else {
 74:     result->icol0 = NULL;
 75:   }

 77:   /* If values are given, allocate values array */
 78:   if (do_values) {
 79:     PetscCall(PetscMalloc1(nrows, &result->values));
 80:   } else {
 81:     result->values = NULL;
 82:   }
 83:   PetscFunctionReturn(PETSC_SUCCESS);
 84: }

 86: /*
 87: spbas_allocate_data:
 88:    in case of block_data:
 89:        Allocate the data arrays alloc_icol and optionally alloc_val,
 90:        set appropriate pointers from icols and values;
 91:    in case of !block_data:
 92:        Allocate the arrays icols[i] and optionally values[i]
 93: */
 94: static PetscErrorCode spbas_allocate_data(spbas_matrix *result)
 95: {
 96:   PetscInt  i;
 97:   PetscInt  nnz   = result->nnz;
 98:   PetscInt  nrows = result->nrows;
 99:   PetscInt  r_nnz;
100:   PetscBool do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
101:   PetscBool block_data = result->block_data;

103:   PetscFunctionBegin;
104:   if (block_data) {
105:     /* Allocate the column number array and point to it */
106:     result->n_alloc_icol = nnz;

108:     PetscCall(PetscMalloc1(nnz, &result->alloc_icol));

110:     result->icols[0] = result->alloc_icol;
111:     for (i = 1; i < nrows; i++) result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1];

113:     /* Allocate the value array and point to it */
114:     if (do_values) {
115:       result->n_alloc_val = nnz;

117:       PetscCall(PetscMalloc1(nnz, &result->alloc_val));

119:       result->values[0] = result->alloc_val;
120:       for (i = 1; i < nrows; i++) result->values[i] = result->values[i - 1] + result->row_nnz[i - 1];
121:     }
122:   } else {
123:     for (i = 0; i < nrows; i++) {
124:       r_nnz = result->row_nnz[i];
125:       PetscCall(PetscMalloc1(r_nnz, &result->icols[i]));
126:     }
127:     if (do_values) {
128:       for (i = 0; i < nrows; i++) {
129:         r_nnz = result->row_nnz[i];
130:         PetscCall(PetscMalloc1(r_nnz, &result->values[i]));
131:       }
132:     }
133:   }
134:   PetscFunctionReturn(PETSC_SUCCESS);
135: }

137: /*
138:   spbas_row_order_icol
139:      determine if row i1 should come
140:        + before row i2 in the sorted rows (return -1),
141:        + after (return 1)
142:        + is identical (return 0).
143: */
144: static int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type)
145: {
146:   PetscInt  j;
147:   PetscInt  nnz1  = irow_in[i1 + 1] - irow_in[i1];
148:   PetscInt  nnz2  = irow_in[i2 + 1] - irow_in[i2];
149:   PetscInt *icol1 = &icol_in[irow_in[i1]];
150:   PetscInt *icol2 = &icol_in[irow_in[i2]];

152:   if (nnz1 < nnz2) return -1;
153:   if (nnz1 > nnz2) return 1;

155:   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
156:     for (j = 0; j < nnz1; j++) {
157:       if (icol1[j] < icol2[j]) return -1;
158:       if (icol1[j] > icol2[j]) return 1;
159:     }
160:   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
161:     for (j = 0; j < nnz1; j++) {
162:       if (icol1[j] - i1 < icol2[j] - i2) return -1;
163:       if (icol1[j] - i1 > icol2[j] - i2) return 1;
164:     }
165:   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
166:     for (j = 1; j < nnz1; j++) {
167:       if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1;
168:       if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1;
169:     }
170:   }
171:   return 0;
172: }

174: /*
175:   spbas_mergesort_icols:
176:     return a sorting of the rows in which identical sparseness patterns are
177:     next to each other
178: */
179: static PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort)
180: {
181:   PetscInt  istep;                /* Chunk-sizes of already sorted parts of arrays */
182:   PetscInt  i, i1, i2;            /* Loop counters for (partly) sorted arrays */
183:   PetscInt  istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
184:   PetscInt *ialloc;               /* Allocated arrays */
185:   PetscInt *iswap;                /* auxiliary pointers for swapping */
186:   PetscInt *ihlp1;                /* Pointers to new version of arrays, */
187:   PetscInt *ihlp2;                /* Pointers to previous version of arrays, */

189:   PetscFunctionBegin;
190:   PetscCall(PetscMalloc1(nrows, &ialloc));

192:   ihlp1 = ialloc;
193:   ihlp2 = isort;

195:   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
196:   for (istep = 1; istep < nrows; istep *= 2) {
197:     /*
198:       Combine sorted parts
199:           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
200:       of ihlp2 and vhlp2

202:       into one sorted part
203:           istart:istart+2*istep-1
204:       of ihlp1 and vhlp1
205:     */
206:     for (istart = 0; istart < nrows; istart += 2 * istep) {
207:       /* Set counters and bound array part endings */
208:       i1    = istart;
209:       i1end = i1 + istep;
210:       if (i1end > nrows) i1end = nrows;
211:       i2    = istart + istep;
212:       i2end = i2 + istep;
213:       if (i2end > nrows) i2end = nrows;

215:       /* Merge the two array parts */
216:       for (i = istart; i < i2end; i++) {
217:         if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
218:           ihlp1[i] = ihlp2[i1];
219:           i1++;
220:         } else if (i2 < i2end) {
221:           ihlp1[i] = ihlp2[i2];
222:           i2++;
223:         } else {
224:           ihlp1[i] = ihlp2[i1];
225:           i1++;
226:         }
227:       }
228:     }

230:     /* Swap the two array sets */
231:     iswap = ihlp2;
232:     ihlp2 = ihlp1;
233:     ihlp1 = iswap;
234:   }

236:   /* Copy one more time in case the sorted arrays are the temporary ones */
237:   if (ihlp2 != isort) {
238:     for (i = 0; i < nrows; i++) isort[i] = ihlp2[i];
239:   }
240:   PetscCall(PetscFree(ialloc));
241:   PetscFunctionReturn(PETSC_SUCCESS);
242: }

244: /*
245:   spbas_compress_pattern:
246:      calculate a compressed sparseness pattern for a sparseness pattern
247:      given in compressed row storage. The compressed sparseness pattern may
248:      require (much) less memory.
249: */
250: PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction)
251: {
252:   PetscInt        nnz      = irow_in[nrows];
253:   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
254:   size_t          mem_compressed;
255:   PetscInt       *isort;
256:   PetscInt       *icols;
257:   PetscInt        row_nnz;
258:   PetscInt       *ipoint;
259:   PetscBool      *used;
260:   PetscInt        ptr;
261:   PetscInt        i, j;
262:   const PetscBool no_values = PETSC_FALSE;

264:   PetscFunctionBegin;
265:   /* Allocate the structure of the new matrix */
266:   B->nrows        = nrows;
267:   B->ncols        = ncols;
268:   B->nnz          = nnz;
269:   B->col_idx_type = col_idx_type;
270:   B->block_data   = PETSC_TRUE;

272:   PetscCall(spbas_allocate_pattern(B, no_values));

274:   /* When using an offset array, set it */
275:   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
276:     for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
277:   }

279:   /* Allocate the ordering for the rows */
280:   PetscCall(PetscMalloc1(nrows, &isort));
281:   PetscCall(PetscMalloc1(nrows, &ipoint));
282:   PetscCall(PetscCalloc1(nrows, &used));

284:   for (i = 0; i < nrows; i++) {
285:     B->row_nnz[i] = irow_in[i + 1] - irow_in[i];
286:     isort[i]      = i;
287:     ipoint[i]     = i;
288:   }

290:   /* Sort the rows so that identical columns will be next to each other */
291:   PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort));
292:   PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n"));

294:   /* Replace identical rows with the first one in the list */
295:   for (i = 1; i < nrows; i++) {
296:     if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) ipoint[isort[i]] = ipoint[isort[i - 1]];
297:   }

299:   /* Collect the rows which are used*/
300:   for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE;

302:   /* Calculate needed memory */
303:   B->n_alloc_icol = 0;
304:   for (i = 0; i < nrows; i++) {
305:     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
306:   }
307:   PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol));

309:   /* Fill in the diagonal offsets for the rows which store their own data */
310:   ptr = 0;
311:   for (i = 0; i < B->nrows; i++) {
312:     if (used[i]) {
313:       B->icols[i] = &B->alloc_icol[ptr];
314:       icols       = &icol_in[irow_in[i]];
315:       row_nnz     = B->row_nnz[i];
316:       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
317:         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j];
318:       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
319:         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - i;
320:       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
321:         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - icols[0];
322:       }
323:       ptr += B->row_nnz[i];
324:     }
325:   }

327:   /* Point to the right places for all data */
328:   for (i = 0; i < nrows; i++) B->icols[i] = B->icols[ipoint[i]];
329:   PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n"));
330:   PetscCall(PetscInfo(NULL, "         (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows)));

332:   PetscCall(PetscFree(isort));
333:   PetscCall(PetscFree(used));
334:   PetscCall(PetscFree(ipoint));

336:   mem_compressed = spbas_memory_requirement(*B);
337:   *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig;
338:   PetscFunctionReturn(PETSC_SUCCESS);
339: }

341: /*
342:    spbas_incomplete_cholesky
343:        Incomplete Cholesky decomposition
344: */
345: #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>

347: /*
348:   spbas_delete : de-allocate the arrays owned by this matrix
349: */
350: PetscErrorCode spbas_delete(spbas_matrix matrix)
351: {
352:   PetscInt i;

354:   PetscFunctionBegin;
355:   if (matrix.block_data) {
356:     PetscCall(PetscFree(matrix.alloc_icol));
357:     if (matrix.values) PetscCall(PetscFree(matrix.alloc_val));
358:   } else {
359:     for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i]));
360:     PetscCall(PetscFree(matrix.icols));
361:     if (matrix.values) {
362:       for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i]));
363:     }
364:   }

366:   PetscCall(PetscFree(matrix.row_nnz));
367:   PetscCall(PetscFree(matrix.icols));
368:   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0));
369:   PetscCall(PetscFree(matrix.values));
370:   PetscFunctionReturn(PETSC_SUCCESS);
371: }

373: /*
374: spbas_matrix_to_crs:
375:    Convert an spbas_matrix to compessed row storage
376: */
377: PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
378: {
379:   PetscInt     nrows = matrix_A.nrows;
380:   PetscInt     nnz   = matrix_A.nnz;
381:   PetscInt     i, j, r_nnz, i0;
382:   PetscInt    *irow;
383:   PetscInt    *icol;
384:   PetscInt    *icol_A;
385:   MatScalar   *val;
386:   PetscScalar *val_A;
387:   PetscInt     col_idx_type = matrix_A.col_idx_type;
388:   PetscBool    do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;

390:   PetscFunctionBegin;
391:   PetscCall(PetscMalloc1(nrows + 1, &irow));
392:   PetscCall(PetscMalloc1(nnz, &icol));
393:   *icol_out = icol;
394:   *irow_out = irow;
395:   if (do_values) {
396:     PetscCall(PetscMalloc1(nnz, &val));
397:     *val_out  = val;
398:     *icol_out = icol;
399:     *irow_out = irow;
400:   }

402:   irow[0] = 0;
403:   for (i = 0; i < nrows; i++) {
404:     r_nnz       = matrix_A.row_nnz[i];
405:     i0          = irow[i];
406:     irow[i + 1] = i0 + r_nnz;
407:     icol_A      = matrix_A.icols[i];

409:     if (do_values) {
410:       val_A = matrix_A.values[i];
411:       for (j = 0; j < r_nnz; j++) {
412:         icol[i0 + j] = icol_A[j];
413:         val[i0 + j]  = val_A[j];
414:       }
415:     } else {
416:       for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j];
417:     }

419:     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
420:       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i;
421:     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
422:       i0 = matrix_A.icol0[i];
423:       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0;
424:     }
425:   }
426:   PetscFunctionReturn(PETSC_SUCCESS);
427: }

429: /*
430:     spbas_transpose
431:        return the transpose of a matrix
432: */
433: PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result)
434: {
435:   PetscInt     col_idx_type = in_matrix.col_idx_type;
436:   PetscInt     nnz          = in_matrix.nnz;
437:   PetscInt     ncols        = in_matrix.nrows;
438:   PetscInt     nrows        = in_matrix.ncols;
439:   PetscInt     i, j, k;
440:   PetscInt     r_nnz;
441:   PetscInt    *irow;
442:   PetscInt     icol0 = 0;
443:   PetscScalar *val;

445:   PetscFunctionBegin;
446:   /* Copy input values */
447:   result->nrows        = nrows;
448:   result->ncols        = ncols;
449:   result->nnz          = nnz;
450:   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
451:   result->block_data   = PETSC_TRUE;

453:   /* Allocate sparseness pattern */
454:   PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));

456:   /*  Count the number of nonzeros in each row */
457:   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;

459:   for (i = 0; i < ncols; i++) {
460:     r_nnz = in_matrix.row_nnz[i];
461:     irow  = in_matrix.icols[i];
462:     if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
463:       for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++;
464:     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
465:       for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++;
466:     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
467:       icol0 = in_matrix.icol0[i];
468:       for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++;
469:     }
470:   }

472:   /* Set the pointers to the data */
473:   PetscCall(spbas_allocate_data(result));

475:   /* Reset the number of nonzeros in each row */
476:   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;

478:   /* Fill the data arrays */
479:   if (in_matrix.values) {
480:     for (i = 0; i < ncols; i++) {
481:       r_nnz = in_matrix.row_nnz[i];
482:       irow  = in_matrix.icols[i];
483:       val   = in_matrix.values[i];

485:       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
486:       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
487:       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
488:       for (j = 0; j < r_nnz; j++) {
489:         k                                     = icol0 + irow[j];
490:         result->icols[k][result->row_nnz[k]]  = i;
491:         result->values[k][result->row_nnz[k]] = val[j];
492:         result->row_nnz[k]++;
493:       }
494:     }
495:   } else {
496:     for (i = 0; i < ncols; i++) {
497:       r_nnz = in_matrix.row_nnz[i];
498:       irow  = in_matrix.icols[i];

500:       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
501:       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
502:       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];

504:       for (j = 0; j < r_nnz; j++) {
505:         k                                    = icol0 + irow[j];
506:         result->icols[k][result->row_nnz[k]] = i;
507:         result->row_nnz[k]++;
508:       }
509:     }
510:   }
511:   PetscFunctionReturn(PETSC_SUCCESS);
512: }

514: /*
515:    spbas_mergesort

517:       mergesort for an array of integers and an array of associated
518:       reals

520:       on output, icol[0..nnz-1] is increasing;
521:                   val[0..nnz-1] has undergone the same permutation as icol

523:       NB: val may be NULL: in that case, only the integers are sorted

525: */
526: static PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
527: {
528:   PetscInt     istep;                /* Chunk-sizes of already sorted parts of arrays */
529:   PetscInt     i, i1, i2;            /* Loop counters for (partly) sorted arrays */
530:   PetscInt     istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
531:   PetscInt    *ialloc;               /* Allocated arrays */
532:   PetscScalar *valloc = NULL;
533:   PetscInt    *iswap; /* auxiliary pointers for swapping */
534:   PetscScalar *vswap;
535:   PetscInt    *ihlp1;        /* Pointers to new version of arrays, */
536:   PetscScalar *vhlp1 = NULL; /* (arrays under construction) */
537:   PetscInt    *ihlp2;        /* Pointers to previous version of arrays, */
538:   PetscScalar *vhlp2 = NULL;

540:   PetscFunctionBegin;
541:   PetscCall(PetscMalloc1(nnz, &ialloc));
542:   ihlp1 = ialloc;
543:   ihlp2 = icol;

545:   if (val) {
546:     PetscCall(PetscMalloc1(nnz, &valloc));
547:     vhlp1 = valloc;
548:     vhlp2 = val;
549:   }

551:   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
552:   for (istep = 1; istep < nnz; istep *= 2) {
553:     /*
554:       Combine sorted parts
555:           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
556:       of ihlp2 and vhlp2

558:       into one sorted part
559:           istart:istart+2*istep-1
560:       of ihlp1 and vhlp1
561:     */
562:     for (istart = 0; istart < nnz; istart += 2 * istep) {
563:       /* Set counters and bound array part endings */
564:       i1    = istart;
565:       i1end = i1 + istep;
566:       if (i1end > nnz) i1end = nnz;
567:       i2    = istart + istep;
568:       i2end = i2 + istep;
569:       if (i2end > nnz) i2end = nnz;

571:       /* Merge the two array parts */
572:       if (val) {
573:         for (i = istart; i < i2end; i++) {
574:           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
575:             ihlp1[i] = ihlp2[i1];
576:             vhlp1[i] = vhlp2[i1];
577:             i1++;
578:           } else if (i2 < i2end) {
579:             ihlp1[i] = ihlp2[i2];
580:             vhlp1[i] = vhlp2[i2];
581:             i2++;
582:           } else {
583:             ihlp1[i] = ihlp2[i1];
584:             vhlp1[i] = vhlp2[i1];
585:             i1++;
586:           }
587:         }
588:       } else {
589:         for (i = istart; i < i2end; i++) {
590:           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
591:             ihlp1[i] = ihlp2[i1];
592:             i1++;
593:           } else if (i2 < i2end) {
594:             ihlp1[i] = ihlp2[i2];
595:             i2++;
596:           } else {
597:             ihlp1[i] = ihlp2[i1];
598:             i1++;
599:           }
600:         }
601:       }
602:     }

604:     /* Swap the two array sets */
605:     iswap = ihlp2;
606:     ihlp2 = ihlp1;
607:     ihlp1 = iswap;
608:     vswap = vhlp2;
609:     vhlp2 = vhlp1;
610:     vhlp1 = vswap;
611:   }

613:   /* Copy one more time in case the sorted arrays are the temporary ones */
614:   if (ihlp2 != icol) {
615:     for (i = 0; i < nnz; i++) icol[i] = ihlp2[i];
616:     if (val) {
617:       for (i = 0; i < nnz; i++) val[i] = vhlp2[i];
618:     }
619:   }

621:   PetscCall(PetscFree(ialloc));
622:   if (val) PetscCall(PetscFree(valloc));
623:   PetscFunctionReturn(PETSC_SUCCESS);
624: }

626: /*
627:   spbas_apply_reordering_rows:
628:     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
629: */
630: static PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
631: {
632:   PetscInt      i, j, ip;
633:   PetscInt      nrows = matrix_A->nrows;
634:   PetscInt     *row_nnz;
635:   PetscInt    **icols;
636:   PetscBool     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
637:   PetscScalar **vals      = NULL;

639:   PetscFunctionBegin;
640:   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");

642:   if (do_values) PetscCall(PetscMalloc1(nrows, &vals));
643:   PetscCall(PetscMalloc1(nrows, &row_nnz));
644:   PetscCall(PetscMalloc1(nrows, &icols));

646:   for (i = 0; i < nrows; i++) {
647:     ip = permutation[i];
648:     if (do_values) vals[i] = matrix_A->values[ip];
649:     icols[i]   = matrix_A->icols[ip];
650:     row_nnz[i] = matrix_A->row_nnz[ip];
651:     for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i;
652:   }

654:   if (do_values) PetscCall(PetscFree(matrix_A->values));
655:   PetscCall(PetscFree(matrix_A->icols));
656:   PetscCall(PetscFree(matrix_A->row_nnz));

658:   if (do_values) matrix_A->values = vals;
659:   matrix_A->icols   = icols;
660:   matrix_A->row_nnz = row_nnz;
661:   PetscFunctionReturn(PETSC_SUCCESS);
662: }

664: /*
665:   spbas_apply_reordering_cols:
666:     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
667: */
668: static PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation)
669: {
670:   PetscInt     i, j;
671:   PetscInt     nrows = matrix_A->nrows;
672:   PetscInt     row_nnz;
673:   PetscInt    *icols;
674:   PetscBool    do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
675:   PetscScalar *vals      = NULL;

677:   PetscFunctionBegin;
678:   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");

680:   for (i = 0; i < nrows; i++) {
681:     icols   = matrix_A->icols[i];
682:     row_nnz = matrix_A->row_nnz[i];
683:     if (do_values) vals = matrix_A->values[i];

685:     for (j = 0; j < row_nnz; j++) icols[j] = permutation[i + icols[j]] - i;
686:     PetscCall(spbas_mergesort(row_nnz, icols, vals));
687:   }
688:   PetscFunctionReturn(PETSC_SUCCESS);
689: }

691: /*
692:   spbas_apply_reordering:
693:     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
694: */
695: PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm)
696: {
697:   PetscFunctionBegin;
698:   PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm));
699:   PetscCall(spbas_apply_reordering_cols(matrix_A, permutation));
700:   PetscFunctionReturn(PETSC_SUCCESS);
701: }

703: PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result)
704: {
705:   spbas_matrix retval;
706:   PetscInt     i, j, i0, r_nnz;

708:   PetscFunctionBegin;
709:   /* Copy input values */
710:   retval.nrows = nrows;
711:   retval.ncols = ncols;
712:   retval.nnz   = ai[nrows];

714:   retval.block_data   = PETSC_TRUE;
715:   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;

717:   /* Allocate output matrix */
718:   PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE));
719:   for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i];
720:   PetscCall(spbas_allocate_data(&retval));
721:   /* Copy the structure */
722:   for (i = 0; i < retval.nrows; i++) {
723:     i0    = ai[i];
724:     r_nnz = ai[i + 1] - i0;

726:     for (j = 0; j < r_nnz; j++) retval.icols[i][j] = aj[i0 + j] - i;
727:   }
728:   *result = retval;
729:   PetscFunctionReturn(PETSC_SUCCESS);
730: }

732: /*
733:    spbas_mark_row_power:
734:       Mark the columns in row 'row' which are nonzero in
735:           matrix^2log(marker).
736: */
737: static PetscErrorCode spbas_mark_row_power(PetscInt     *iwork,     /* marker-vector */
738:                                            PetscInt      row,       /* row for which the columns are marked */
739:                                            spbas_matrix *in_matrix, /* matrix for which the power is being  calculated */
740:                                            PetscInt      marker,    /* marker-value: 2^power */
741:                                            PetscInt      minmrk,    /* lower bound for marked points */
742:                                            PetscInt      maxmrk)         /* upper bound for marked points */
743: {
744:   PetscInt i, j, nnz;

746:   PetscFunctionBegin;
747:   nnz = in_matrix->row_nnz[row];

749:   /* For higher powers, call this function recursively */
750:   if (marker > 1) {
751:     for (i = 0; i < nnz; i++) {
752:       j = row + in_matrix->icols[row][i];
753:       if (minmrk <= j && j < maxmrk && iwork[j] < marker) {
754:         PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk));
755:         iwork[j] |= marker;
756:       }
757:     }
758:   } else {
759:     /*  Mark the columns reached */
760:     for (i = 0; i < nnz; i++) {
761:       j = row + in_matrix->icols[row][i];
762:       if (minmrk <= j && j < maxmrk) iwork[j] |= 1;
763:     }
764:   }
765:   PetscFunctionReturn(PETSC_SUCCESS);
766: }

768: /*
769:    spbas_power
770:       Calculate sparseness patterns for incomplete Cholesky decompositions
771:       of a given order: (almost) all nonzeros of the matrix^(order+1) which
772:       are inside the band width are found and stored in the output sparseness
773:       pattern.
774: */
775: PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result)
776: {
777:   spbas_matrix retval;
778:   PetscInt     nrows = in_matrix.nrows;
779:   PetscInt     ncols = in_matrix.ncols;
780:   PetscInt     i, j, kend;
781:   PetscInt     nnz, inz;
782:   PetscInt    *iwork;
783:   PetscInt     marker;
784:   PetscInt     maxmrk = 0;

786:   PetscFunctionBegin;
787:   PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
788:   PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error");
789:   PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
790:   PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");

792:   /* Copy input values*/
793:   retval.nrows        = ncols;
794:   retval.ncols        = nrows;
795:   retval.nnz          = 0;
796:   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
797:   retval.block_data   = PETSC_FALSE;

799:   /* Allocate sparseness pattern */
800:   PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));

802:   /* Allocate marker array: note sure the max needed so use the max of the two */
803:   PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork));

805:   /* Calculate marker values */
806:   marker = 1;
807:   for (i = 1; i < power; i++) marker *= 2;

809:   for (i = 0; i < nrows; i++) {
810:     /* Calculate the pattern for each row */

812:     nnz  = in_matrix.row_nnz[i];
813:     kend = i + in_matrix.icols[i][nnz - 1];
814:     if (maxmrk <= kend) maxmrk = kend + 1;
815:     PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk));

817:     /* Count the columns*/
818:     nnz = 0;
819:     for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0);

821:     /* Allocate the column indices */
822:     retval.row_nnz[i] = nnz;
823:     PetscCall(PetscMalloc1(nnz, &retval.icols[i]));

825:     /* Administrate the column indices */
826:     inz = 0;
827:     for (j = i; j < maxmrk; j++) {
828:       if (iwork[j]) {
829:         retval.icols[i][inz] = j - i;
830:         inz++;
831:         iwork[j] = 0;
832:       }
833:     }
834:     retval.nnz += nnz;
835:   }
836:   PetscCall(PetscFree(iwork));
837:   *result = retval;
838:   PetscFunctionReturn(PETSC_SUCCESS);
839: }

841: /*
842:    spbas_keep_upper:
843:       remove the lower part of the matrix: keep the upper part
844: */
845: PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix)
846: {
847:   PetscInt i, j;
848:   PetscInt jstart;

850:   PetscFunctionBegin;
851:   PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices");
852:   for (i = 0; i < inout_matrix->nrows; i++) {
853:     for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { }
854:     if (jstart > 0) {
855:       for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart];

857:       if (inout_matrix->values) {
858:         for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart];
859:       }

861:       inout_matrix->row_nnz[i] -= jstart;

863:       inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt));

865:       if (inout_matrix->values) inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar));
866:       inout_matrix->nnz -= jstart;
867:     }
868:   }
869:   PetscFunctionReturn(PETSC_SUCCESS);
870: }