Actual source code: baijsolvnat6.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
5: {
6: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
7: PetscInt i, nz, idx, idt, jdx;
8: const PetscInt *diag = a->diag, *vi, n = a->mbs, *ai = a->i, *aj = a->j;
9: const MatScalar *aa = a->a, *v;
10: PetscScalar *x, s1, s2, s3, s4, s5, s6, x1, x2, x3, x4, x5, x6;
11: const PetscScalar *b;
13: PetscFunctionBegin;
14: PetscCall(VecGetArrayRead(bb, &b));
15: PetscCall(VecGetArray(xx, &x));
16: /* forward solve the lower triangular */
17: idx = 0;
18: x[0] = b[idx];
19: x[1] = b[1 + idx];
20: x[2] = b[2 + idx];
21: x[3] = b[3 + idx];
22: x[4] = b[4 + idx];
23: x[5] = b[5 + idx];
24: for (i = 1; i < n; i++) {
25: v = aa + 36 * ai[i];
26: vi = aj + ai[i];
27: nz = diag[i] - ai[i];
28: idx = 6 * i;
29: s1 = b[idx];
30: s2 = b[1 + idx];
31: s3 = b[2 + idx];
32: s4 = b[3 + idx];
33: s5 = b[4 + idx];
34: s6 = b[5 + idx];
35: while (nz--) {
36: jdx = 6 * (*vi++);
37: x1 = x[jdx];
38: x2 = x[1 + jdx];
39: x3 = x[2 + jdx];
40: x4 = x[3 + jdx];
41: x5 = x[4 + jdx];
42: x6 = x[5 + jdx];
43: s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6;
44: s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6;
45: s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6;
46: s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6;
47: s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6;
48: s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6;
49: v += 36;
50: }
51: x[idx] = s1;
52: x[1 + idx] = s2;
53: x[2 + idx] = s3;
54: x[3 + idx] = s4;
55: x[4 + idx] = s5;
56: x[5 + idx] = s6;
57: }
58: /* backward solve the upper triangular */
59: for (i = n - 1; i >= 0; i--) {
60: v = aa + 36 * diag[i] + 36;
61: vi = aj + diag[i] + 1;
62: nz = ai[i + 1] - diag[i] - 1;
63: idt = 6 * i;
64: s1 = x[idt];
65: s2 = x[1 + idt];
66: s3 = x[2 + idt];
67: s4 = x[3 + idt];
68: s5 = x[4 + idt];
69: s6 = x[5 + idt];
70: while (nz--) {
71: idx = 6 * (*vi++);
72: x1 = x[idx];
73: x2 = x[1 + idx];
74: x3 = x[2 + idx];
75: x4 = x[3 + idx];
76: x5 = x[4 + idx];
77: x6 = x[5 + idx];
78: s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6;
79: s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6;
80: s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6;
81: s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6;
82: s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6;
83: s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6;
84: v += 36;
85: }
86: v = aa + 36 * diag[i];
87: x[idt] = v[0] * s1 + v[6] * s2 + v[12] * s3 + v[18] * s4 + v[24] * s5 + v[30] * s6;
88: x[1 + idt] = v[1] * s1 + v[7] * s2 + v[13] * s3 + v[19] * s4 + v[25] * s5 + v[31] * s6;
89: x[2 + idt] = v[2] * s1 + v[8] * s2 + v[14] * s3 + v[20] * s4 + v[26] * s5 + v[32] * s6;
90: x[3 + idt] = v[3] * s1 + v[9] * s2 + v[15] * s3 + v[21] * s4 + v[27] * s5 + v[33] * s6;
91: x[4 + idt] = v[4] * s1 + v[10] * s2 + v[16] * s3 + v[22] * s4 + v[28] * s5 + v[34] * s6;
92: x[5 + idt] = v[5] * s1 + v[11] * s2 + v[17] * s3 + v[23] * s4 + v[29] * s5 + v[35] * s6;
93: }
95: PetscCall(VecRestoreArrayRead(bb, &b));
96: PetscCall(VecRestoreArray(xx, &x));
97: PetscCall(PetscLogFlops(2.0 * 36 * (a->nz) - 6.0 * A->cmap->n));
98: PetscFunctionReturn(PETSC_SUCCESS);
99: }
101: PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A, Vec bb, Vec xx)
102: {
103: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
104: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
105: PetscInt i, k, nz, idx, jdx, idt;
106: const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
107: const MatScalar *aa = a->a, *v;
108: PetscScalar *x;
109: const PetscScalar *b;
110: PetscScalar s1, s2, s3, s4, s5, s6, x1, x2, x3, x4, x5, x6;
112: PetscFunctionBegin;
113: PetscCall(VecGetArrayRead(bb, &b));
114: PetscCall(VecGetArray(xx, &x));
115: /* forward solve the lower triangular */
116: idx = 0;
117: x[0] = b[idx];
118: x[1] = b[1 + idx];
119: x[2] = b[2 + idx];
120: x[3] = b[3 + idx];
121: x[4] = b[4 + idx];
122: x[5] = b[5 + idx];
123: for (i = 1; i < n; i++) {
124: v = aa + bs2 * ai[i];
125: vi = aj + ai[i];
126: nz = ai[i + 1] - ai[i];
127: idx = bs * i;
128: s1 = b[idx];
129: s2 = b[1 + idx];
130: s3 = b[2 + idx];
131: s4 = b[3 + idx];
132: s5 = b[4 + idx];
133: s6 = b[5 + idx];
134: for (k = 0; k < nz; k++) {
135: jdx = bs * vi[k];
136: x1 = x[jdx];
137: x2 = x[1 + jdx];
138: x3 = x[2 + jdx];
139: x4 = x[3 + jdx];
140: x5 = x[4 + jdx];
141: x6 = x[5 + jdx];
142: s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6;
143: s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6;
144: s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6;
145: s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6;
146: s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6;
147: s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6;
148: v += bs2;
149: }
151: x[idx] = s1;
152: x[1 + idx] = s2;
153: x[2 + idx] = s3;
154: x[3 + idx] = s4;
155: x[4 + idx] = s5;
156: x[5 + idx] = s6;
157: }
159: /* backward solve the upper triangular */
160: for (i = n - 1; i >= 0; i--) {
161: v = aa + bs2 * (adiag[i + 1] + 1);
162: vi = aj + adiag[i + 1] + 1;
163: nz = adiag[i] - adiag[i + 1] - 1;
164: idt = bs * i;
165: s1 = x[idt];
166: s2 = x[1 + idt];
167: s3 = x[2 + idt];
168: s4 = x[3 + idt];
169: s5 = x[4 + idt];
170: s6 = x[5 + idt];
171: for (k = 0; k < nz; k++) {
172: idx = bs * vi[k];
173: x1 = x[idx];
174: x2 = x[1 + idx];
175: x3 = x[2 + idx];
176: x4 = x[3 + idx];
177: x5 = x[4 + idx];
178: x6 = x[5 + idx];
179: s1 -= v[0] * x1 + v[6] * x2 + v[12] * x3 + v[18] * x4 + v[24] * x5 + v[30] * x6;
180: s2 -= v[1] * x1 + v[7] * x2 + v[13] * x3 + v[19] * x4 + v[25] * x5 + v[31] * x6;
181: s3 -= v[2] * x1 + v[8] * x2 + v[14] * x3 + v[20] * x4 + v[26] * x5 + v[32] * x6;
182: s4 -= v[3] * x1 + v[9] * x2 + v[15] * x3 + v[21] * x4 + v[27] * x5 + v[33] * x6;
183: s5 -= v[4] * x1 + v[10] * x2 + v[16] * x3 + v[22] * x4 + v[28] * x5 + v[34] * x6;
184: s6 -= v[5] * x1 + v[11] * x2 + v[17] * x3 + v[23] * x4 + v[29] * x5 + v[35] * x6;
185: v += bs2;
186: }
187: /* x = inv_diagonal*x */
188: x[idt] = v[0] * s1 + v[6] * s2 + v[12] * s3 + v[18] * s4 + v[24] * s5 + v[30] * s6;
189: x[1 + idt] = v[1] * s1 + v[7] * s2 + v[13] * s3 + v[19] * s4 + v[25] * s5 + v[31] * s6;
190: x[2 + idt] = v[2] * s1 + v[8] * s2 + v[14] * s3 + v[20] * s4 + v[26] * s5 + v[32] * s6;
191: x[3 + idt] = v[3] * s1 + v[9] * s2 + v[15] * s3 + v[21] * s4 + v[27] * s5 + v[33] * s6;
192: x[4 + idt] = v[4] * s1 + v[10] * s2 + v[16] * s3 + v[22] * s4 + v[28] * s5 + v[34] * s6;
193: x[5 + idt] = v[5] * s1 + v[11] * s2 + v[17] * s3 + v[23] * s4 + v[29] * s5 + v[35] * s6;
194: }
196: PetscCall(VecRestoreArrayRead(bb, &b));
197: PetscCall(VecRestoreArray(xx, &x));
198: PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
199: PetscFunctionReturn(PETSC_SUCCESS);
200: }