Actual source code: baijsolvnat3.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: /*
5: Special case where the matrix was ILU(0) factored in the natural
6: ordering. This eliminates the need for the column and row permutation.
7: */
8: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
9: {
10: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
11: const PetscInt n = a->mbs, *ai = a->i, *aj = a->j;
12: const PetscInt *diag = a->diag, *vi;
13: const MatScalar *aa = a->a, *v;
14: PetscScalar *x, s1, s2, s3, x1, x2, x3;
15: const PetscScalar *b;
16: PetscInt jdx, idt, idx, nz, i;
18: PetscFunctionBegin;
19: PetscCall(VecGetArrayRead(bb, &b));
20: PetscCall(VecGetArray(xx, &x));
22: /* forward solve the lower triangular */
23: idx = 0;
24: x[0] = b[0];
25: x[1] = b[1];
26: x[2] = b[2];
27: for (i = 1; i < n; i++) {
28: v = aa + 9 * ai[i];
29: vi = aj + ai[i];
30: nz = diag[i] - ai[i];
31: idx += 3;
32: s1 = b[idx];
33: s2 = b[1 + idx];
34: s3 = b[2 + idx];
35: while (nz--) {
36: jdx = 3 * (*vi++);
37: x1 = x[jdx];
38: x2 = x[1 + jdx];
39: x3 = x[2 + jdx];
40: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
41: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
42: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
43: v += 9;
44: }
45: x[idx] = s1;
46: x[1 + idx] = s2;
47: x[2 + idx] = s3;
48: }
49: /* backward solve the upper triangular */
50: for (i = n - 1; i >= 0; i--) {
51: v = aa + 9 * diag[i] + 9;
52: vi = aj + diag[i] + 1;
53: nz = ai[i + 1] - diag[i] - 1;
54: idt = 3 * i;
55: s1 = x[idt];
56: s2 = x[1 + idt];
57: s3 = x[2 + idt];
58: while (nz--) {
59: idx = 3 * (*vi++);
60: x1 = x[idx];
61: x2 = x[1 + idx];
62: x3 = x[2 + idx];
63: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
64: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
65: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
66: v += 9;
67: }
68: v = aa + 9 * diag[i];
69: x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
70: x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
71: x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
72: }
74: PetscCall(VecRestoreArrayRead(bb, &b));
75: PetscCall(VecRestoreArray(xx, &x));
76: PetscCall(PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n));
77: PetscFunctionReturn(PETSC_SUCCESS);
78: }
80: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
81: {
82: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
83: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
84: PetscInt i, k, nz, idx, jdx, idt;
85: const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
86: const MatScalar *aa = a->a, *v;
87: PetscScalar *x;
88: const PetscScalar *b;
89: PetscScalar s1, s2, s3, x1, x2, x3;
91: PetscFunctionBegin;
92: PetscCall(VecGetArrayRead(bb, &b));
93: PetscCall(VecGetArray(xx, &x));
94: /* forward solve the lower triangular */
95: idx = 0;
96: x[0] = b[idx];
97: x[1] = b[1 + idx];
98: x[2] = b[2 + idx];
99: for (i = 1; i < n; i++) {
100: v = aa + bs2 * ai[i];
101: vi = aj + ai[i];
102: nz = ai[i + 1] - ai[i];
103: idx = bs * i;
104: s1 = b[idx];
105: s2 = b[1 + idx];
106: s3 = b[2 + idx];
107: for (k = 0; k < nz; k++) {
108: jdx = bs * vi[k];
109: x1 = x[jdx];
110: x2 = x[1 + jdx];
111: x3 = x[2 + jdx];
112: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
113: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
114: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
116: v += bs2;
117: }
119: x[idx] = s1;
120: x[1 + idx] = s2;
121: x[2 + idx] = s3;
122: }
124: /* backward solve the upper triangular */
125: for (i = n - 1; i >= 0; i--) {
126: v = aa + bs2 * (adiag[i + 1] + 1);
127: vi = aj + adiag[i + 1] + 1;
128: nz = adiag[i] - adiag[i + 1] - 1;
129: idt = bs * i;
130: s1 = x[idt];
131: s2 = x[1 + idt];
132: s3 = x[2 + idt];
134: for (k = 0; k < nz; k++) {
135: idx = bs * vi[k];
136: x1 = x[idx];
137: x2 = x[1 + idx];
138: x3 = x[2 + idx];
139: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
140: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
141: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
143: v += bs2;
144: }
145: /* x = inv_diagonal*x */
146: x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
147: x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
148: x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
149: }
151: PetscCall(VecRestoreArrayRead(bb, &b));
152: PetscCall(VecRestoreArray(xx, &x));
153: PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
154: PetscFunctionReturn(PETSC_SUCCESS);
155: }
157: PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
158: {
159: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
160: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j;
161: PetscInt i, k, nz, idx, jdx;
162: const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
163: const MatScalar *aa = a->a, *v;
164: PetscScalar *x;
165: const PetscScalar *b;
166: PetscScalar s1, s2, s3, x1, x2, x3;
168: PetscFunctionBegin;
169: PetscCall(VecGetArrayRead(bb, &b));
170: PetscCall(VecGetArray(xx, &x));
171: /* forward solve the lower triangular */
172: idx = 0;
173: x[0] = b[idx];
174: x[1] = b[1 + idx];
175: x[2] = b[2 + idx];
176: for (i = 1; i < n; i++) {
177: v = aa + bs2 * ai[i];
178: vi = aj + ai[i];
179: nz = ai[i + 1] - ai[i];
180: idx = bs * i;
181: s1 = b[idx];
182: s2 = b[1 + idx];
183: s3 = b[2 + idx];
184: for (k = 0; k < nz; k++) {
185: jdx = bs * vi[k];
186: x1 = x[jdx];
187: x2 = x[1 + jdx];
188: x3 = x[2 + jdx];
189: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
190: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
191: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
193: v += bs2;
194: }
196: x[idx] = s1;
197: x[1 + idx] = s2;
198: x[2 + idx] = s3;
199: }
201: PetscCall(VecRestoreArrayRead(bb, &b));
202: PetscCall(VecRestoreArray(xx, &x));
203: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz) - bs * A->cmap->n));
204: PetscFunctionReturn(PETSC_SUCCESS);
205: }
207: PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
208: {
209: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
210: const PetscInt n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
211: PetscInt i, k, nz, idx, idt;
212: const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
213: const MatScalar *aa = a->a, *v;
214: PetscScalar *x;
215: const PetscScalar *b;
216: PetscScalar s1, s2, s3, x1, x2, x3;
218: PetscFunctionBegin;
219: PetscCall(VecGetArrayRead(bb, &b));
220: PetscCall(VecGetArray(xx, &x));
222: /* backward solve the upper triangular */
223: for (i = n - 1; i >= 0; i--) {
224: v = aa + bs2 * (adiag[i + 1] + 1);
225: vi = aj + adiag[i + 1] + 1;
226: nz = adiag[i] - adiag[i + 1] - 1;
227: idt = bs * i;
228: s1 = b[idt];
229: s2 = b[1 + idt];
230: s3 = b[2 + idt];
232: for (k = 0; k < nz; k++) {
233: idx = bs * vi[k];
234: x1 = x[idx];
235: x2 = x[1 + idx];
236: x3 = x[2 + idx];
237: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
238: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
239: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
241: v += bs2;
242: }
243: /* x = inv_diagonal*x */
244: x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
245: x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
246: x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
247: }
249: PetscCall(VecRestoreArrayRead(bb, &b));
250: PetscCall(VecRestoreArray(xx, &x));
251: PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }