Actual source code: baijsolvnat11.c

  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <petsc/private/kernels/blockinvert.h>

  4: /* Block operations are done by accessing one column at a time */
  5: /* Default MatSolve for block size 11 */

  7: PetscErrorCode MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A, Vec bb, Vec xx)
  8: {
  9:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 10:   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
 11:   PetscInt           i, k, nz, idx, idt, m;
 12:   const MatScalar   *aa = a->a, *v;
 13:   PetscScalar        s[11];
 14:   PetscScalar       *x, xv;
 15:   const PetscScalar *b;

 17:   PetscFunctionBegin;
 18:   PetscCall(VecGetArrayRead(bb, &b));
 19:   PetscCall(VecGetArray(xx, &x));

 21:   /* forward solve the lower triangular */
 22:   for (i = 0; i < n; i++) {
 23:     v           = aa + bs2 * ai[i];
 24:     vi          = aj + ai[i];
 25:     nz          = ai[i + 1] - ai[i];
 26:     idt         = bs * i;
 27:     x[idt]      = b[idt];
 28:     x[1 + idt]  = b[1 + idt];
 29:     x[2 + idt]  = b[2 + idt];
 30:     x[3 + idt]  = b[3 + idt];
 31:     x[4 + idt]  = b[4 + idt];
 32:     x[5 + idt]  = b[5 + idt];
 33:     x[6 + idt]  = b[6 + idt];
 34:     x[7 + idt]  = b[7 + idt];
 35:     x[8 + idt]  = b[8 + idt];
 36:     x[9 + idt]  = b[9 + idt];
 37:     x[10 + idt] = b[10 + idt];
 38:     for (m = 0; m < nz; m++) {
 39:       idx = bs * vi[m];
 40:       for (k = 0; k < 11; k++) {
 41:         xv = x[k + idx];
 42:         x[idt] -= v[0] * xv;
 43:         x[1 + idt] -= v[1] * xv;
 44:         x[2 + idt] -= v[2] * xv;
 45:         x[3 + idt] -= v[3] * xv;
 46:         x[4 + idt] -= v[4] * xv;
 47:         x[5 + idt] -= v[5] * xv;
 48:         x[6 + idt] -= v[6] * xv;
 49:         x[7 + idt] -= v[7] * xv;
 50:         x[8 + idt] -= v[8] * xv;
 51:         x[9 + idt] -= v[9] * xv;
 52:         x[10 + idt] -= v[10] * xv;
 53:         v += 11;
 54:       }
 55:     }
 56:   }
 57:   /* backward solve the upper triangular */
 58:   for (i = n - 1; i >= 0; i--) {
 59:     v     = aa + bs2 * (adiag[i + 1] + 1);
 60:     vi    = aj + adiag[i + 1] + 1;
 61:     nz    = adiag[i] - adiag[i + 1] - 1;
 62:     idt   = bs * i;
 63:     s[0]  = x[idt];
 64:     s[1]  = x[1 + idt];
 65:     s[2]  = x[2 + idt];
 66:     s[3]  = x[3 + idt];
 67:     s[4]  = x[4 + idt];
 68:     s[5]  = x[5 + idt];
 69:     s[6]  = x[6 + idt];
 70:     s[7]  = x[7 + idt];
 71:     s[8]  = x[8 + idt];
 72:     s[9]  = x[9 + idt];
 73:     s[10] = x[10 + idt];

 75:     for (m = 0; m < nz; m++) {
 76:       idx = bs * vi[m];
 77:       for (k = 0; k < 11; k++) {
 78:         xv = x[k + idx];
 79:         s[0] -= v[0] * xv;
 80:         s[1] -= v[1] * xv;
 81:         s[2] -= v[2] * xv;
 82:         s[3] -= v[3] * xv;
 83:         s[4] -= v[4] * xv;
 84:         s[5] -= v[5] * xv;
 85:         s[6] -= v[6] * xv;
 86:         s[7] -= v[7] * xv;
 87:         s[8] -= v[8] * xv;
 88:         s[9] -= v[9] * xv;
 89:         s[10] -= v[10] * xv;
 90:         v += 11;
 91:       }
 92:     }
 93:     PetscCall(PetscArrayzero(x + idt, bs));
 94:     for (k = 0; k < 11; k++) {
 95:       x[idt] += v[0] * s[k];
 96:       x[1 + idt] += v[1] * s[k];
 97:       x[2 + idt] += v[2] * s[k];
 98:       x[3 + idt] += v[3] * s[k];
 99:       x[4 + idt] += v[4] * s[k];
100:       x[5 + idt] += v[5] * s[k];
101:       x[6 + idt] += v[6] * s[k];
102:       x[7 + idt] += v[7] * s[k];
103:       x[8 + idt] += v[8] * s[k];
104:       x[9 + idt] += v[9] * s[k];
105:       x[10 + idt] += v[10] * s[k];
106:       v += 11;
107:     }
108:   }
109:   PetscCall(VecRestoreArrayRead(bb, &b));
110:   PetscCall(VecRestoreArray(xx, &x));
111:   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
112:   PetscFunctionReturn(PETSC_SUCCESS);
113: }