Actual source code: baijsolvnat5.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
5: {
6: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
7: const PetscInt *diag = a->diag, n = a->mbs, *vi, *ai = a->i, *aj = a->j;
8: PetscInt i, nz, idx, idt, jdx;
9: const MatScalar *aa = a->a, *v;
10: PetscScalar *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5;
11: const PetscScalar *b;
13: PetscFunctionBegin;
14: PetscCall(VecGetArrayRead(bb, &b));
15: PetscCall(VecGetArray(xx, &x));
16: /* forward solve the lower triangular */
17: idx = 0;
18: x[0] = b[idx];
19: x[1] = b[1 + idx];
20: x[2] = b[2 + idx];
21: x[3] = b[3 + idx];
22: x[4] = b[4 + idx];
23: for (i = 1; i < n; i++) {
24: v = aa + 25 * ai[i];
25: vi = aj + ai[i];
26: nz = diag[i] - ai[i];
27: idx = 5 * i;
28: s1 = b[idx];
29: s2 = b[1 + idx];
30: s3 = b[2 + idx];
31: s4 = b[3 + idx];
32: s5 = b[4 + idx];
33: while (nz--) {
34: jdx = 5 * (*vi++);
35: x1 = x[jdx];
36: x2 = x[1 + jdx];
37: x3 = x[2 + jdx];
38: x4 = x[3 + jdx];
39: x5 = x[4 + jdx];
40: s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
41: s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
42: s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
43: s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
44: s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
45: v += 25;
46: }
47: x[idx] = s1;
48: x[1 + idx] = s2;
49: x[2 + idx] = s3;
50: x[3 + idx] = s4;
51: x[4 + idx] = s5;
52: }
53: /* backward solve the upper triangular */
54: for (i = n - 1; i >= 0; i--) {
55: v = aa + 25 * diag[i] + 25;
56: vi = aj + diag[i] + 1;
57: nz = ai[i + 1] - diag[i] - 1;
58: idt = 5 * i;
59: s1 = x[idt];
60: s2 = x[1 + idt];
61: s3 = x[2 + idt];
62: s4 = x[3 + idt];
63: s5 = x[4 + idt];
64: while (nz--) {
65: idx = 5 * (*vi++);
66: x1 = x[idx];
67: x2 = x[1 + idx];
68: x3 = x[2 + idx];
69: x4 = x[3 + idx];
70: x5 = x[4 + idx];
71: s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
72: s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
73: s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
74: s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
75: s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
76: v += 25;
77: }
78: v = aa + 25 * diag[i];
79: x[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5;
80: x[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5;
81: x[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5;
82: x[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5;
83: x[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5;
84: }
86: PetscCall(VecRestoreArrayRead(bb, &b));
87: PetscCall(VecRestoreArray(xx, &x));
88: PetscCall(PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n));
89: PetscFunctionReturn(PETSC_SUCCESS);
90: }
92: PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A, Vec bb, Vec xx)
93: {
94: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
95: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
96: PetscInt i, k, nz, idx, idt, jdx;
97: const MatScalar *aa = a->a, *v;
98: PetscScalar *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5;
99: const PetscScalar *b;
101: PetscFunctionBegin;
102: PetscCall(VecGetArrayRead(bb, &b));
103: PetscCall(VecGetArray(xx, &x));
104: /* forward solve the lower triangular */
105: idx = 0;
106: x[0] = b[idx];
107: x[1] = b[1 + idx];
108: x[2] = b[2 + idx];
109: x[3] = b[3 + idx];
110: x[4] = b[4 + idx];
111: for (i = 1; i < n; i++) {
112: v = aa + 25 * ai[i];
113: vi = aj + ai[i];
114: nz = ai[i + 1] - ai[i];
115: idx = 5 * i;
116: s1 = b[idx];
117: s2 = b[1 + idx];
118: s3 = b[2 + idx];
119: s4 = b[3 + idx];
120: s5 = b[4 + idx];
121: for (k = 0; k < nz; k++) {
122: jdx = 5 * vi[k];
123: x1 = x[jdx];
124: x2 = x[1 + jdx];
125: x3 = x[2 + jdx];
126: x4 = x[3 + jdx];
127: x5 = x[4 + jdx];
128: s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
129: s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
130: s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
131: s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
132: s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
133: v += 25;
134: }
135: x[idx] = s1;
136: x[1 + idx] = s2;
137: x[2 + idx] = s3;
138: x[3 + idx] = s4;
139: x[4 + idx] = s5;
140: }
142: /* backward solve the upper triangular */
143: for (i = n - 1; i >= 0; i--) {
144: v = aa + 25 * (adiag[i + 1] + 1);
145: vi = aj + adiag[i + 1] + 1;
146: nz = adiag[i] - adiag[i + 1] - 1;
147: idt = 5 * i;
148: s1 = x[idt];
149: s2 = x[1 + idt];
150: s3 = x[2 + idt];
151: s4 = x[3 + idt];
152: s5 = x[4 + idt];
153: for (k = 0; k < nz; k++) {
154: idx = 5 * vi[k];
155: x1 = x[idx];
156: x2 = x[1 + idx];
157: x3 = x[2 + idx];
158: x4 = x[3 + idx];
159: x5 = x[4 + idx];
160: s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
161: s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
162: s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
163: s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
164: s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
165: v += 25;
166: }
167: /* x = inv_diagonal*x */
168: x[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5;
169: x[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5;
170: x[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5;
171: x[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5;
172: x[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5;
173: }
175: PetscCall(VecRestoreArrayRead(bb, &b));
176: PetscCall(VecRestoreArray(xx, &x));
177: PetscCall(PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n));
178: PetscFunctionReturn(PETSC_SUCCESS);
179: }