Actual source code: baijsolvnat3.c

  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <petsc/private/kernels/blockinvert.h>

  4: /*
  5:       Special case where the matrix was ILU(0) factored in the natural
  6:    ordering. This eliminates the need for the column and row permutation.
  7: */
  8: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
  9: {
 10:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 11:   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j;
 12:   const PetscInt    *diag = a->diag, *vi;
 13:   const MatScalar   *aa   = a->a, *v;
 14:   PetscScalar       *x, s1, s2, s3, x1, x2, x3;
 15:   const PetscScalar *b;
 16:   PetscInt           jdx, idt, idx, nz, i;

 18:   PetscFunctionBegin;
 19:   PetscCall(VecGetArrayRead(bb, &b));
 20:   PetscCall(VecGetArray(xx, &x));

 22:   /* forward solve the lower triangular */
 23:   idx  = 0;
 24:   x[0] = b[0];
 25:   x[1] = b[1];
 26:   x[2] = b[2];
 27:   for (i = 1; i < n; i++) {
 28:     v  = aa + 9 * ai[i];
 29:     vi = aj + ai[i];
 30:     nz = diag[i] - ai[i];
 31:     idx += 3;
 32:     s1 = b[idx];
 33:     s2 = b[1 + idx];
 34:     s3 = b[2 + idx];
 35:     while (nz--) {
 36:       jdx = 3 * (*vi++);
 37:       x1  = x[jdx];
 38:       x2  = x[1 + jdx];
 39:       x3  = x[2 + jdx];
 40:       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
 41:       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
 42:       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
 43:       v += 9;
 44:     }
 45:     x[idx]     = s1;
 46:     x[1 + idx] = s2;
 47:     x[2 + idx] = s3;
 48:   }
 49:   /* backward solve the upper triangular */
 50:   for (i = n - 1; i >= 0; i--) {
 51:     v   = aa + 9 * diag[i] + 9;
 52:     vi  = aj + diag[i] + 1;
 53:     nz  = ai[i + 1] - diag[i] - 1;
 54:     idt = 3 * i;
 55:     s1  = x[idt];
 56:     s2  = x[1 + idt];
 57:     s3  = x[2 + idt];
 58:     while (nz--) {
 59:       idx = 3 * (*vi++);
 60:       x1  = x[idx];
 61:       x2  = x[1 + idx];
 62:       x3  = x[2 + idx];
 63:       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
 64:       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
 65:       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
 66:       v += 9;
 67:     }
 68:     v          = aa + 9 * diag[i];
 69:     x[idt]     = v[0] * s1 + v[3] * s2 + v[6] * s3;
 70:     x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
 71:     x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
 72:   }

 74:   PetscCall(VecRestoreArrayRead(bb, &b));
 75:   PetscCall(VecRestoreArray(xx, &x));
 76:   PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n));
 77:   PetscFunctionReturn(PETSC_SUCCESS);
 78: }

 80: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
 81: {
 82:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 83:   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
 84:   PetscInt           i, k, nz, idx, jdx, idt;
 85:   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
 86:   const MatScalar   *aa = a->a, *v;
 87:   PetscScalar       *x;
 88:   const PetscScalar *b;
 89:   PetscScalar        s1, s2, s3, x1, x2, x3;

 91:   PetscFunctionBegin;
 92:   PetscCall(VecGetArrayRead(bb, &b));
 93:   PetscCall(VecGetArray(xx, &x));
 94:   /* forward solve the lower triangular */
 95:   idx  = 0;
 96:   x[0] = b[idx];
 97:   x[1] = b[1 + idx];
 98:   x[2] = b[2 + idx];
 99:   for (i = 1; i < n; i++) {
100:     v   = aa + bs2 * ai[i];
101:     vi  = aj + ai[i];
102:     nz  = ai[i + 1] - ai[i];
103:     idx = bs * i;
104:     s1  = b[idx];
105:     s2  = b[1 + idx];
106:     s3  = b[2 + idx];
107:     for (k = 0; k < nz; k++) {
108:       jdx = bs * vi[k];
109:       x1  = x[jdx];
110:       x2  = x[1 + jdx];
111:       x3  = x[2 + jdx];
112:       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
113:       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
114:       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;

116:       v += bs2;
117:     }

119:     x[idx]     = s1;
120:     x[1 + idx] = s2;
121:     x[2 + idx] = s3;
122:   }

124:   /* backward solve the upper triangular */
125:   for (i = n - 1; i >= 0; i--) {
126:     v   = aa + bs2 * (adiag[i + 1] + 1);
127:     vi  = aj + adiag[i + 1] + 1;
128:     nz  = adiag[i] - adiag[i + 1] - 1;
129:     idt = bs * i;
130:     s1  = x[idt];
131:     s2  = x[1 + idt];
132:     s3  = x[2 + idt];

134:     for (k = 0; k < nz; k++) {
135:       idx = bs * vi[k];
136:       x1  = x[idx];
137:       x2  = x[1 + idx];
138:       x3  = x[2 + idx];
139:       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
140:       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
141:       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;

143:       v += bs2;
144:     }
145:     /* x = inv_diagonal*x */
146:     x[idt]     = v[0] * s1 + v[3] * s2 + v[6] * s3;
147:     x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
148:     x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
149:   }

151:   PetscCall(VecRestoreArrayRead(bb, &b));
152:   PetscCall(VecRestoreArray(xx, &x));
153:   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
154:   PetscFunctionReturn(PETSC_SUCCESS);
155: }

157: PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
158: {
159:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
160:   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
161:   PetscInt           i, k, nz, idx, jdx;
162:   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
163:   const MatScalar   *aa = a->a, *v;
164:   PetscScalar       *x;
165:   const PetscScalar *b;
166:   PetscScalar        s1, s2, s3, x1, x2, x3;

168:   PetscFunctionBegin;
169:   PetscCall(VecGetArrayRead(bb, &b));
170:   PetscCall(VecGetArray(xx, &x));
171:   /* forward solve the lower triangular */
172:   idx  = 0;
173:   x[0] = b[idx];
174:   x[1] = b[1 + idx];
175:   x[2] = b[2 + idx];
176:   for (i = 1; i < n; i++) {
177:     v   = aa + bs2 * ai[i];
178:     vi  = aj + ai[i];
179:     nz  = ai[i + 1] - ai[i];
180:     idx = bs * i;
181:     s1  = b[idx];
182:     s2  = b[1 + idx];
183:     s3  = b[2 + idx];
184:     for (k = 0; k < nz; k++) {
185:       jdx = bs * vi[k];
186:       x1  = x[jdx];
187:       x2  = x[1 + jdx];
188:       x3  = x[2 + jdx];
189:       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
190:       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
191:       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;

193:       v += bs2;
194:     }

196:     x[idx]     = s1;
197:     x[1 + idx] = s2;
198:     x[2 + idx] = s3;
199:   }

201:   PetscCall(VecRestoreArrayRead(bb, &b));
202:   PetscCall(VecRestoreArray(xx, &x));
203:   PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz) - bs * A->cmap->n));
204:   PetscFunctionReturn(PETSC_SUCCESS);
205: }

207: PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
208: {
209:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
210:   const PetscInt     n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
211:   PetscInt           i, k, nz, idx, idt;
212:   const PetscInt     bs = A->rmap->bs, bs2 = a->bs2;
213:   const MatScalar   *aa = a->a, *v;
214:   PetscScalar       *x;
215:   const PetscScalar *b;
216:   PetscScalar        s1, s2, s3, x1, x2, x3;

218:   PetscFunctionBegin;
219:   PetscCall(VecGetArrayRead(bb, &b));
220:   PetscCall(VecGetArray(xx, &x));

222:   /* backward solve the upper triangular */
223:   for (i = n - 1; i >= 0; i--) {
224:     v   = aa + bs2 * (adiag[i + 1] + 1);
225:     vi  = aj + adiag[i + 1] + 1;
226:     nz  = adiag[i] - adiag[i + 1] - 1;
227:     idt = bs * i;
228:     s1  = b[idt];
229:     s2  = b[1 + idt];
230:     s3  = b[2 + idt];

232:     for (k = 0; k < nz; k++) {
233:       idx = bs * vi[k];
234:       x1  = x[idx];
235:       x2  = x[1 + idx];
236:       x3  = x[2 + idx];
237:       s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
238:       s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
239:       s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;

241:       v += bs2;
242:     }
243:     /* x = inv_diagonal*x */
244:     x[idt]     = v[0] * s1 + v[3] * s2 + v[6] * s3;
245:     x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
246:     x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
247:   }

249:   PetscCall(VecRestoreArrayRead(bb, &b));
250:   PetscCall(VecRestoreArray(xx, &x));
251:   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
252:   PetscFunctionReturn(PETSC_SUCCESS);
253: }