Actual source code: dgefa2.c
1: /*
2: Inverts 2 by 2 matrix using gaussian elimination with partial pivoting.
4: Used by the sparse factorization routines in
5: src/mat/impls/baij/seq
7: This is a combination of the Linpack routines
8: dgefa() and dgedi() specialized for a size of 2.
10: */
11: #include <petscsys.h>
13: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
14: {
15: PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[2], k3;
16: PetscInt k4, j3;
17: MatScalar *aa, *ax, *ay, work[4], stmp;
18: MatReal tmp, max;
20: PetscFunctionBegin;
21: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
22: shift = .25 * shift * (1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
24: /* Parameter adjustments */
25: a -= 3;
27: k = 1;
28: kp1 = k + 1;
29: k3 = 2 * k;
30: k4 = k3 + k;
32: /* find l = pivot index */
33: i__2 = 3 - k;
34: aa = &a[k4];
35: max = PetscAbsScalar(aa[0]);
36: l = 1;
37: for (ll = 1; ll < i__2; ll++) {
38: tmp = PetscAbsScalar(aa[ll]);
39: if (tmp > max) {
40: max = tmp;
41: l = ll + 1;
42: }
43: }
44: l += k - 1;
45: ipvt[k - 1] = l;
47: if (a[l + k3] == 0.0) {
48: if (shift == 0.0) {
49: if (allowzeropivot) {
50: PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1));
51: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
52: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
53: } else {
54: a[l + k3] = shift;
55: }
56: }
58: /* interchange if necessary */
59: if (l != k) {
60: stmp = a[l + k3];
61: a[l + k3] = a[k4];
62: a[k4] = stmp;
63: }
65: /* compute multipliers */
66: stmp = -1. / a[k4];
67: i__2 = 2 - k;
68: aa = &a[1 + k4];
69: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
71: /* row elimination with column indexing */
72: ax = &a[k4 + 1];
73: for (j = kp1; j <= 2; ++j) {
74: j3 = 2 * j;
75: stmp = a[l + j3];
76: if (l != k) {
77: a[l + j3] = a[k + j3];
78: a[k + j3] = stmp;
79: }
81: i__3 = 2 - k;
82: ay = &a[1 + k + j3];
83: for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
84: }
86: ipvt[1] = 2;
87: if (a[6] == 0.0) {
88: if (PetscLikely(allowzeropivot)) {
89: PetscCall(PetscInfo(NULL, "Zero pivot, row 1\n"));
90: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
91: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 1");
92: }
94: /* Now form the inverse */
95: /* compute inverse(u) */
96: for (k = 1; k <= 2; ++k) {
97: k3 = 2 * k;
98: k4 = k3 + k;
99: a[k4] = 1.0 / a[k4];
100: stmp = -a[k4];
101: i__2 = k - 1;
102: aa = &a[k3 + 1];
103: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
104: kp1 = k + 1;
105: if (2 < kp1) continue;
106: ax = aa;
107: for (j = kp1; j <= 2; ++j) {
108: j3 = 2 * j;
109: stmp = a[k + j3];
110: a[k + j3] = 0.0;
111: ay = &a[j3 + 1];
112: for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
113: }
114: }
116: /* form inverse(u)*inverse(l) */
117: k = 1;
118: k3 = 2 * k;
119: kp1 = k + 1;
120: aa = a + k3;
121: for (i = kp1; i <= 2; ++i) {
122: work[i - 1] = aa[i];
123: aa[i] = 0.0;
124: }
125: for (j = kp1; j <= 2; ++j) {
126: stmp = work[j - 1];
127: ax = &a[2 * j + 1];
128: ay = &a[k3 + 1];
129: ay[0] += stmp * ax[0];
130: ay[1] += stmp * ax[1];
131: }
132: l = ipvt[k - 1];
133: if (l != k) {
134: ax = &a[k3 + 1];
135: ay = &a[2 * l + 1];
136: stmp = ax[0];
137: ax[0] = ay[0];
138: ay[0] = stmp;
139: stmp = ax[1];
140: ax[1] = ay[1];
141: ay[1] = stmp;
142: }
143: PetscFunctionReturn(PETSC_SUCCESS);
144: }
146: /* gaussian elimination with partial pivoting */
147: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
148: {
149: PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[9], kb, k3;
150: PetscInt k4, j3;
151: MatScalar *aa, *ax, *ay, work[81], stmp;
152: MatReal tmp, max;
154: PetscFunctionBegin;
155: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
157: /* Parameter adjustments */
158: a -= 10;
160: for (k = 1; k <= 8; ++k) {
161: kp1 = k + 1;
162: k3 = 9 * k;
163: k4 = k3 + k;
165: /* find l = pivot index */
166: i__2 = 10 - k;
167: aa = &a[k4];
168: max = PetscAbsScalar(aa[0]);
169: l = 1;
170: for (ll = 1; ll < i__2; ll++) {
171: tmp = PetscAbsScalar(aa[ll]);
172: if (tmp > max) {
173: max = tmp;
174: l = ll + 1;
175: }
176: }
177: l += k - 1;
178: ipvt[k - 1] = l;
180: if (a[l + k3] == 0.0) {
181: if (shift == 0.0) {
182: if (allowzeropivot) {
183: PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1));
184: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
185: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
186: } else {
187: a[l + k3] = shift;
188: }
189: }
191: /* interchange if necessary */
192: if (l != k) {
193: stmp = a[l + k3];
194: a[l + k3] = a[k4];
195: a[k4] = stmp;
196: }
198: /* compute multipliers */
199: stmp = -1. / a[k4];
200: i__2 = 9 - k;
201: aa = &a[1 + k4];
202: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
204: /* row elimination with column indexing */
205: ax = &a[k4 + 1];
206: for (j = kp1; j <= 9; ++j) {
207: j3 = 9 * j;
208: stmp = a[l + j3];
209: if (l != k) {
210: a[l + j3] = a[k + j3];
211: a[k + j3] = stmp;
212: }
214: i__3 = 9 - k;
215: ay = &a[1 + k + j3];
216: for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
217: }
218: }
219: ipvt[8] = 9;
220: if (a[90] == 0.0) {
221: if (PetscLikely(allowzeropivot)) {
222: PetscCall(PetscInfo(NULL, "Zero pivot, row 8\n"));
223: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
224: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 8");
225: }
227: /* Now form the inverse */
228: /* compute inverse(u) */
229: for (k = 1; k <= 9; ++k) {
230: k3 = 9 * k;
231: k4 = k3 + k;
232: a[k4] = 1.0 / a[k4];
233: stmp = -a[k4];
234: i__2 = k - 1;
235: aa = &a[k3 + 1];
236: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
237: kp1 = k + 1;
238: if (9 < kp1) continue;
239: ax = aa;
240: for (j = kp1; j <= 9; ++j) {
241: j3 = 9 * j;
242: stmp = a[k + j3];
243: a[k + j3] = 0.0;
244: ay = &a[j3 + 1];
245: for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
246: }
247: }
249: /* form inverse(u)*inverse(l) */
250: for (kb = 1; kb <= 8; ++kb) {
251: k = 9 - kb;
252: k3 = 9 * k;
253: kp1 = k + 1;
254: aa = a + k3;
255: for (i = kp1; i <= 9; ++i) {
256: work[i - 1] = aa[i];
257: aa[i] = 0.0;
258: }
259: for (j = kp1; j <= 9; ++j) {
260: stmp = work[j - 1];
261: ax = &a[9 * j + 1];
262: ay = &a[k3 + 1];
263: ay[0] += stmp * ax[0];
264: ay[1] += stmp * ax[1];
265: ay[2] += stmp * ax[2];
266: ay[3] += stmp * ax[3];
267: ay[4] += stmp * ax[4];
268: ay[5] += stmp * ax[5];
269: ay[6] += stmp * ax[6];
270: ay[7] += stmp * ax[7];
271: ay[8] += stmp * ax[8];
272: }
273: l = ipvt[k - 1];
274: if (l != k) {
275: ax = &a[k3 + 1];
276: ay = &a[9 * l + 1];
277: stmp = ax[0];
278: ax[0] = ay[0];
279: ay[0] = stmp;
280: stmp = ax[1];
281: ax[1] = ay[1];
282: ay[1] = stmp;
283: stmp = ax[2];
284: ax[2] = ay[2];
285: ay[2] = stmp;
286: stmp = ax[3];
287: ax[3] = ay[3];
288: ay[3] = stmp;
289: stmp = ax[4];
290: ax[4] = ay[4];
291: ay[4] = stmp;
292: stmp = ax[5];
293: ax[5] = ay[5];
294: ay[5] = stmp;
295: stmp = ax[6];
296: ax[6] = ay[6];
297: ay[6] = stmp;
298: stmp = ax[7];
299: ax[7] = ay[7];
300: ay[7] = stmp;
301: stmp = ax[8];
302: ax[8] = ay[8];
303: ay[8] = stmp;
304: }
305: }
306: PetscFunctionReturn(PETSC_SUCCESS);
307: }
309: /*
310: Inverts 15 by 15 matrix using gaussian elimination with partial pivoting.
312: Used by the sparse factorization routines in
313: src/mat/impls/baij/seq
315: This is a combination of the Linpack routines
316: dgefa() and dgedi() specialized for a size of 15.
318: */
320: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a, PetscInt *ipvt, MatScalar *work, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
321: {
322: PetscInt i__2, i__3, kp1, j, k, l, ll, i, kb, k3;
323: PetscInt k4, j3;
324: MatScalar *aa, *ax, *ay, stmp;
325: MatReal tmp, max;
327: PetscFunctionBegin;
328: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
330: /* Parameter adjustments */
331: a -= 16;
333: for (k = 1; k <= 14; ++k) {
334: kp1 = k + 1;
335: k3 = 15 * k;
336: k4 = k3 + k;
338: /* find l = pivot index */
339: i__2 = 16 - k;
340: aa = &a[k4];
341: max = PetscAbsScalar(aa[0]);
342: l = 1;
343: for (ll = 1; ll < i__2; ll++) {
344: tmp = PetscAbsScalar(aa[ll]);
345: if (tmp > max) {
346: max = tmp;
347: l = ll + 1;
348: }
349: }
350: l += k - 1;
351: ipvt[k - 1] = l;
353: if (a[l + k3] == 0.0) {
354: if (shift == 0.0) {
355: if (allowzeropivot) {
356: PetscCall(PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1));
357: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
358: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
359: } else {
360: a[l + k3] = shift;
361: }
362: }
364: /* interchange if necessary */
365: if (l != k) {
366: stmp = a[l + k3];
367: a[l + k3] = a[k4];
368: a[k4] = stmp;
369: }
371: /* compute multipliers */
372: stmp = -1. / a[k4];
373: i__2 = 15 - k;
374: aa = &a[1 + k4];
375: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
377: /* row elimination with column indexing */
378: ax = &a[k4 + 1];
379: for (j = kp1; j <= 15; ++j) {
380: j3 = 15 * j;
381: stmp = a[l + j3];
382: if (l != k) {
383: a[l + j3] = a[k + j3];
384: a[k + j3] = stmp;
385: }
387: i__3 = 15 - k;
388: ay = &a[1 + k + j3];
389: for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
390: }
391: }
392: ipvt[14] = 15;
393: if (a[240] == 0.0) {
394: if (PetscLikely(allowzeropivot)) {
395: PetscCall(PetscInfo(NULL, "Zero pivot, row 14\n"));
396: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
397: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 14");
398: }
400: /* Now form the inverse */
401: /* compute inverse(u) */
402: for (k = 1; k <= 15; ++k) {
403: k3 = 15 * k;
404: k4 = k3 + k;
405: a[k4] = 1.0 / a[k4];
406: stmp = -a[k4];
407: i__2 = k - 1;
408: aa = &a[k3 + 1];
409: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
410: kp1 = k + 1;
411: if (15 < kp1) continue;
412: ax = aa;
413: for (j = kp1; j <= 15; ++j) {
414: j3 = 15 * j;
415: stmp = a[k + j3];
416: a[k + j3] = 0.0;
417: ay = &a[j3 + 1];
418: for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
419: }
420: }
422: /* form inverse(u)*inverse(l) */
423: for (kb = 1; kb <= 14; ++kb) {
424: k = 15 - kb;
425: k3 = 15 * k;
426: kp1 = k + 1;
427: aa = a + k3;
428: for (i = kp1; i <= 15; ++i) {
429: work[i - 1] = aa[i];
430: aa[i] = 0.0;
431: }
432: for (j = kp1; j <= 15; ++j) {
433: stmp = work[j - 1];
434: ax = &a[15 * j + 1];
435: ay = &a[k3 + 1];
436: ay[0] += stmp * ax[0];
437: ay[1] += stmp * ax[1];
438: ay[2] += stmp * ax[2];
439: ay[3] += stmp * ax[3];
440: ay[4] += stmp * ax[4];
441: ay[5] += stmp * ax[5];
442: ay[6] += stmp * ax[6];
443: ay[7] += stmp * ax[7];
444: ay[8] += stmp * ax[8];
445: ay[9] += stmp * ax[9];
446: ay[10] += stmp * ax[10];
447: ay[11] += stmp * ax[11];
448: ay[12] += stmp * ax[12];
449: ay[13] += stmp * ax[13];
450: ay[14] += stmp * ax[14];
451: }
452: l = ipvt[k - 1];
453: if (l != k) {
454: ax = &a[k3 + 1];
455: ay = &a[15 * l + 1];
456: stmp = ax[0];
457: ax[0] = ay[0];
458: ay[0] = stmp;
459: stmp = ax[1];
460: ax[1] = ay[1];
461: ay[1] = stmp;
462: stmp = ax[2];
463: ax[2] = ay[2];
464: ay[2] = stmp;
465: stmp = ax[3];
466: ax[3] = ay[3];
467: ay[3] = stmp;
468: stmp = ax[4];
469: ax[4] = ay[4];
470: ay[4] = stmp;
471: stmp = ax[5];
472: ax[5] = ay[5];
473: ay[5] = stmp;
474: stmp = ax[6];
475: ax[6] = ay[6];
476: ay[6] = stmp;
477: stmp = ax[7];
478: ax[7] = ay[7];
479: ay[7] = stmp;
480: stmp = ax[8];
481: ax[8] = ay[8];
482: ay[8] = stmp;
483: stmp = ax[9];
484: ax[9] = ay[9];
485: ay[9] = stmp;
486: stmp = ax[10];
487: ax[10] = ay[10];
488: ay[10] = stmp;
489: stmp = ax[11];
490: ax[11] = ay[11];
491: ay[11] = stmp;
492: stmp = ax[12];
493: ax[12] = ay[12];
494: ay[12] = stmp;
495: stmp = ax[13];
496: ax[13] = ay[13];
497: ay[13] = stmp;
498: stmp = ax[14];
499: ax[14] = ay[14];
500: ay[14] = stmp;
501: }
502: }
503: PetscFunctionReturn(PETSC_SUCCESS);
504: }