Actual source code: dvec2.c
1: /*
2: Defines some vector operation functions that are shared by
3: sequential and parallel vectors.
4: */
5: #include <../src/vec/vec/impls/dvecimpl.h>
6: #include <petsc/private/kernels/petscaxpy.h>
8: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
9: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
10: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
11: {
12: const PetscInt n = xin->map->n;
13: PetscInt i = nv, nv_rem = nv & 0x3;
14: PetscScalar sum0 = 0., sum1 = 0., sum2 = 0., sum3;
15: const PetscScalar *yy0, *yy1, *yy2, *yy3, *x;
16: Vec *yy = (Vec *)yin;
18: PetscFunctionBegin;
19: PetscCall(VecGetArrayRead(xin, &x));
20: switch (nv_rem) {
21: case 3:
22: PetscCall(VecGetArrayRead(yy[0], &yy0));
23: PetscCall(VecGetArrayRead(yy[1], &yy1));
24: PetscCall(VecGetArrayRead(yy[2], &yy2));
25: fortranmdot3_(x, yy0, yy1, yy2, &n, &sum0, &sum1, &sum2);
26: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
27: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
28: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
29: z[0] = sum0;
30: z[1] = sum1;
31: z[2] = sum2;
32: break;
33: case 2:
34: PetscCall(VecGetArrayRead(yy[0], &yy0));
35: PetscCall(VecGetArrayRead(yy[1], &yy1));
36: fortranmdot2_(x, yy0, yy1, &n, &sum0, &sum1);
37: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
38: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
39: z[0] = sum0;
40: z[1] = sum1;
41: break;
42: case 1:
43: PetscCall(VecGetArrayRead(yy[0], &yy0));
44: fortranmdot1_(x, yy0, &n, &sum0);
45: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
46: z[0] = sum0;
47: break;
48: case 0:
49: break;
50: }
51: z += nv_rem;
52: i -= nv_rem;
53: yy += nv_rem;
55: while (i > 0) {
56: sum0 = 0.;
57: sum1 = 0.;
58: sum2 = 0.;
59: sum3 = 0.;
60: PetscCall(VecGetArrayRead(yy[0], &yy0));
61: PetscCall(VecGetArrayRead(yy[1], &yy1));
62: PetscCall(VecGetArrayRead(yy[2], &yy2));
63: PetscCall(VecGetArrayRead(yy[3], &yy3));
64: fortranmdot4_(x, yy0, yy1, yy2, yy3, &n, &sum0, &sum1, &sum2, &sum3);
65: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
66: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
67: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
68: PetscCall(VecRestoreArrayRead(yy[3], &yy3));
69: yy += 4;
70: z[0] = sum0;
71: z[1] = sum1;
72: z[2] = sum2;
73: z[3] = sum3;
74: z += 4;
75: i -= 4;
76: }
77: PetscCall(VecRestoreArrayRead(xin, &x));
78: PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
79: PetscFunctionReturn(PETSC_SUCCESS);
80: }
82: #else
83: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
84: {
85: const PetscInt n = xin->map->n;
86: PetscInt i = nv, j = n, nv_rem = nv & 0x3, j_rem;
87: PetscScalar sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3, x0, x1, x2, x3;
88: const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
89: const Vec *yy = (Vec *)yin;
91: PetscFunctionBegin;
92: if (n == 0) {
93: PetscCall(PetscArrayzero(z, nv));
94: PetscFunctionReturn(PETSC_SUCCESS);
95: }
96: PetscCall(VecGetArrayRead(xin, &xbase));
97: x = xbase;
98: switch (nv_rem) {
99: case 3:
100: PetscCall(VecGetArrayRead(yy[0], &yy0));
101: PetscCall(VecGetArrayRead(yy[1], &yy1));
102: PetscCall(VecGetArrayRead(yy[2], &yy2));
103: switch (j_rem = j & 0x3) {
104: case 3:
105: x2 = x[2];
106: sum0 += x2 * PetscConj(yy0[2]);
107: sum1 += x2 * PetscConj(yy1[2]);
108: sum2 += x2 * PetscConj(yy2[2]); /* fall through */
109: case 2:
110: x1 = x[1];
111: sum0 += x1 * PetscConj(yy0[1]);
112: sum1 += x1 * PetscConj(yy1[1]);
113: sum2 += x1 * PetscConj(yy2[1]); /* fall through */
114: case 1:
115: x0 = x[0];
116: sum0 += x0 * PetscConj(yy0[0]);
117: sum1 += x0 * PetscConj(yy1[0]);
118: sum2 += x0 * PetscConj(yy2[0]); /* fall through */
119: case 0:
120: x += j_rem;
121: yy0 += j_rem;
122: yy1 += j_rem;
123: yy2 += j_rem;
124: j -= j_rem;
125: break;
126: }
127: while (j > 0) {
128: x0 = x[0];
129: x1 = x[1];
130: x2 = x[2];
131: x3 = x[3];
132: x += 4;
134: sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
135: yy0 += 4;
136: sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
137: yy1 += 4;
138: sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
139: yy2 += 4;
140: j -= 4;
141: }
142: z[0] = sum0;
143: z[1] = sum1;
144: z[2] = sum2;
145: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
146: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
147: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
148: break;
149: case 2:
150: PetscCall(VecGetArrayRead(yy[0], &yy0));
151: PetscCall(VecGetArrayRead(yy[1], &yy1));
152: switch (j_rem = j & 0x3) {
153: case 3:
154: x2 = x[2];
155: sum0 += x2 * PetscConj(yy0[2]);
156: sum1 += x2 * PetscConj(yy1[2]); /* fall through */
157: case 2:
158: x1 = x[1];
159: sum0 += x1 * PetscConj(yy0[1]);
160: sum1 += x1 * PetscConj(yy1[1]); /* fall through */
161: case 1:
162: x0 = x[0];
163: sum0 += x0 * PetscConj(yy0[0]);
164: sum1 += x0 * PetscConj(yy1[0]); /* fall through */
165: case 0:
166: x += j_rem;
167: yy0 += j_rem;
168: yy1 += j_rem;
169: j -= j_rem;
170: break;
171: }
172: while (j > 0) {
173: x0 = x[0];
174: x1 = x[1];
175: x2 = x[2];
176: x3 = x[3];
177: x += 4;
179: sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
180: yy0 += 4;
181: sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
182: yy1 += 4;
183: j -= 4;
184: }
185: z[0] = sum0;
186: z[1] = sum1;
188: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
189: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
190: break;
191: case 1:
192: PetscCall(VecGetArrayRead(yy[0], &yy0));
193: switch (j_rem = j & 0x3) {
194: case 3:
195: x2 = x[2];
196: sum0 += x2 * PetscConj(yy0[2]); /* fall through */
197: case 2:
198: x1 = x[1];
199: sum0 += x1 * PetscConj(yy0[1]); /* fall through */
200: case 1:
201: x0 = x[0];
202: sum0 += x0 * PetscConj(yy0[0]); /* fall through */
203: case 0:
204: x += j_rem;
205: yy0 += j_rem;
206: j -= j_rem;
207: break;
208: }
209: while (j > 0) {
210: sum0 += x[0] * PetscConj(yy0[0]) + x[1] * PetscConj(yy0[1]) + x[2] * PetscConj(yy0[2]) + x[3] * PetscConj(yy0[3]);
211: yy0 += 4;
212: j -= 4;
213: x += 4;
214: }
215: z[0] = sum0;
217: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
218: break;
219: case 0:
220: break;
221: }
222: z += nv_rem;
223: i -= nv_rem;
224: yy += nv_rem;
226: while (i > 0) {
227: sum0 = 0.;
228: sum1 = 0.;
229: sum2 = 0.;
230: sum3 = 0.;
231: PetscCall(VecGetArrayRead(yy[0], &yy0));
232: PetscCall(VecGetArrayRead(yy[1], &yy1));
233: PetscCall(VecGetArrayRead(yy[2], &yy2));
234: PetscCall(VecGetArrayRead(yy[3], &yy3));
236: j = n;
237: x = xbase;
238: switch (j_rem = j & 0x3) {
239: case 3:
240: x2 = x[2];
241: sum0 += x2 * PetscConj(yy0[2]);
242: sum1 += x2 * PetscConj(yy1[2]);
243: sum2 += x2 * PetscConj(yy2[2]);
244: sum3 += x2 * PetscConj(yy3[2]); /* fall through */
245: case 2:
246: x1 = x[1];
247: sum0 += x1 * PetscConj(yy0[1]);
248: sum1 += x1 * PetscConj(yy1[1]);
249: sum2 += x1 * PetscConj(yy2[1]);
250: sum3 += x1 * PetscConj(yy3[1]); /* fall through */
251: case 1:
252: x0 = x[0];
253: sum0 += x0 * PetscConj(yy0[0]);
254: sum1 += x0 * PetscConj(yy1[0]);
255: sum2 += x0 * PetscConj(yy2[0]);
256: sum3 += x0 * PetscConj(yy3[0]); /* fall through */
257: case 0:
258: x += j_rem;
259: yy0 += j_rem;
260: yy1 += j_rem;
261: yy2 += j_rem;
262: yy3 += j_rem;
263: j -= j_rem;
264: break;
265: }
266: while (j > 0) {
267: x0 = x[0];
268: x1 = x[1];
269: x2 = x[2];
270: x3 = x[3];
271: x += 4;
273: sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
274: yy0 += 4;
275: sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
276: yy1 += 4;
277: sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
278: yy2 += 4;
279: sum3 += x0 * PetscConj(yy3[0]) + x1 * PetscConj(yy3[1]) + x2 * PetscConj(yy3[2]) + x3 * PetscConj(yy3[3]);
280: yy3 += 4;
281: j -= 4;
282: }
283: z[0] = sum0;
284: z[1] = sum1;
285: z[2] = sum2;
286: z[3] = sum3;
287: z += 4;
288: i -= 4;
289: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
290: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
291: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
292: PetscCall(VecRestoreArrayRead(yy[3], &yy3));
293: yy += 4;
294: }
295: PetscCall(VecRestoreArrayRead(xin, &xbase));
296: PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
297: PetscFunctionReturn(PETSC_SUCCESS);
298: }
299: #endif
301: /* ----------------------------------------------------------------------------*/
302: PetscErrorCode VecMTDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
303: {
304: const PetscInt n = xin->map->n;
305: PetscInt i = nv, j = n, nv_rem = nv & 0x3, j_rem;
306: PetscScalar sum0 = 0., sum1 = 0., sum2 = 0., sum3, x0, x1, x2, x3;
307: const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
308: const Vec *yy = (Vec *)yin;
310: PetscFunctionBegin;
311: PetscCall(VecGetArrayRead(xin, &xbase));
312: x = xbase;
314: switch (nv_rem) {
315: case 3:
316: PetscCall(VecGetArrayRead(yy[0], &yy0));
317: PetscCall(VecGetArrayRead(yy[1], &yy1));
318: PetscCall(VecGetArrayRead(yy[2], &yy2));
319: switch (j_rem = j & 0x3) {
320: case 3:
321: x2 = x[2];
322: sum0 += x2 * yy0[2];
323: sum1 += x2 * yy1[2];
324: sum2 += x2 * yy2[2]; /* fall through */
325: case 2:
326: x1 = x[1];
327: sum0 += x1 * yy0[1];
328: sum1 += x1 * yy1[1];
329: sum2 += x1 * yy2[1]; /* fall through */
330: case 1:
331: x0 = x[0];
332: sum0 += x0 * yy0[0];
333: sum1 += x0 * yy1[0];
334: sum2 += x0 * yy2[0]; /* fall through */
335: case 0:
336: x += j_rem;
337: yy0 += j_rem;
338: yy1 += j_rem;
339: yy2 += j_rem;
340: j -= j_rem;
341: break;
342: }
343: while (j > 0) {
344: x0 = x[0];
345: x1 = x[1];
346: x2 = x[2];
347: x3 = x[3];
348: x += 4;
350: sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
351: yy0 += 4;
352: sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
353: yy1 += 4;
354: sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
355: yy2 += 4;
356: j -= 4;
357: }
358: z[0] = sum0;
359: z[1] = sum1;
360: z[2] = sum2;
361: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
362: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
363: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
364: break;
365: case 2:
366: PetscCall(VecGetArrayRead(yy[0], &yy0));
367: PetscCall(VecGetArrayRead(yy[1], &yy1));
368: switch (j_rem = j & 0x3) {
369: case 3:
370: x2 = x[2];
371: sum0 += x2 * yy0[2];
372: sum1 += x2 * yy1[2]; /* fall through */
373: case 2:
374: x1 = x[1];
375: sum0 += x1 * yy0[1];
376: sum1 += x1 * yy1[1]; /* fall through */
377: case 1:
378: x0 = x[0];
379: sum0 += x0 * yy0[0];
380: sum1 += x0 * yy1[0]; /* fall through */
381: case 0:
382: x += j_rem;
383: yy0 += j_rem;
384: yy1 += j_rem;
385: j -= j_rem;
386: break;
387: }
388: while (j > 0) {
389: x0 = x[0];
390: x1 = x[1];
391: x2 = x[2];
392: x3 = x[3];
393: x += 4;
395: sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
396: yy0 += 4;
397: sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
398: yy1 += 4;
399: j -= 4;
400: }
401: z[0] = sum0;
402: z[1] = sum1;
404: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
405: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
406: break;
407: case 1:
408: PetscCall(VecGetArrayRead(yy[0], &yy0));
409: switch (j_rem = j & 0x3) {
410: case 3:
411: x2 = x[2];
412: sum0 += x2 * yy0[2]; /* fall through */
413: case 2:
414: x1 = x[1];
415: sum0 += x1 * yy0[1]; /* fall through */
416: case 1:
417: x0 = x[0];
418: sum0 += x0 * yy0[0]; /* fall through */
419: case 0:
420: x += j_rem;
421: yy0 += j_rem;
422: j -= j_rem;
423: break;
424: }
425: while (j > 0) {
426: sum0 += x[0] * yy0[0] + x[1] * yy0[1] + x[2] * yy0[2] + x[3] * yy0[3];
427: yy0 += 4;
428: j -= 4;
429: x += 4;
430: }
431: z[0] = sum0;
433: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
434: break;
435: case 0:
436: break;
437: }
438: z += nv_rem;
439: i -= nv_rem;
440: yy += nv_rem;
442: while (i > 0) {
443: sum0 = 0.;
444: sum1 = 0.;
445: sum2 = 0.;
446: sum3 = 0.;
447: PetscCall(VecGetArrayRead(yy[0], &yy0));
448: PetscCall(VecGetArrayRead(yy[1], &yy1));
449: PetscCall(VecGetArrayRead(yy[2], &yy2));
450: PetscCall(VecGetArrayRead(yy[3], &yy3));
451: x = xbase;
453: j = n;
454: switch (j_rem = j & 0x3) {
455: case 3:
456: x2 = x[2];
457: sum0 += x2 * yy0[2];
458: sum1 += x2 * yy1[2];
459: sum2 += x2 * yy2[2];
460: sum3 += x2 * yy3[2]; /* fall through */
461: case 2:
462: x1 = x[1];
463: sum0 += x1 * yy0[1];
464: sum1 += x1 * yy1[1];
465: sum2 += x1 * yy2[1];
466: sum3 += x1 * yy3[1]; /* fall through */
467: case 1:
468: x0 = x[0];
469: sum0 += x0 * yy0[0];
470: sum1 += x0 * yy1[0];
471: sum2 += x0 * yy2[0];
472: sum3 += x0 * yy3[0]; /* fall through */
473: case 0:
474: x += j_rem;
475: yy0 += j_rem;
476: yy1 += j_rem;
477: yy2 += j_rem;
478: yy3 += j_rem;
479: j -= j_rem;
480: break;
481: }
482: while (j > 0) {
483: x0 = x[0];
484: x1 = x[1];
485: x2 = x[2];
486: x3 = x[3];
487: x += 4;
489: sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
490: yy0 += 4;
491: sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
492: yy1 += 4;
493: sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
494: yy2 += 4;
495: sum3 += x0 * yy3[0] + x1 * yy3[1] + x2 * yy3[2] + x3 * yy3[3];
496: yy3 += 4;
497: j -= 4;
498: }
499: z[0] = sum0;
500: z[1] = sum1;
501: z[2] = sum2;
502: z[3] = sum3;
503: z += 4;
504: i -= 4;
505: PetscCall(VecRestoreArrayRead(yy[0], &yy0));
506: PetscCall(VecRestoreArrayRead(yy[1], &yy1));
507: PetscCall(VecRestoreArrayRead(yy[2], &yy2));
508: PetscCall(VecRestoreArrayRead(yy[3], &yy3));
509: yy += 4;
510: }
511: PetscCall(VecRestoreArrayRead(xin, &xbase));
512: PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
513: PetscFunctionReturn(PETSC_SUCCESS);
514: }
516: static PetscErrorCode VecMinMax_Seq(Vec xin, PetscInt *idx, PetscReal *z, PetscReal minmax, int (*const cmp)(PetscReal, PetscReal))
517: {
518: const PetscInt n = xin->map->n;
519: PetscInt j = -1;
521: PetscFunctionBegin;
522: if (n) {
523: const PetscScalar *xx;
525: PetscCall(VecGetArrayRead(xin, &xx));
526: minmax = PetscRealPart(xx[(j = 0)]);
527: for (PetscInt i = 1; i < n; ++i) {
528: const PetscReal tmp = PetscRealPart(xx[i]);
530: if (cmp(tmp, minmax)) {
531: j = i;
532: minmax = tmp;
533: }
534: }
535: PetscCall(VecRestoreArrayRead(xin, &xx));
536: }
537: *z = minmax;
538: if (idx) *idx = j;
539: PetscFunctionReturn(PETSC_SUCCESS);
540: }
542: static int VecMax_Seq_GT(PetscReal l, PetscReal r)
543: {
544: return l > r;
545: }
547: PetscErrorCode VecMax_Seq(Vec xin, PetscInt *idx, PetscReal *z)
548: {
549: PetscFunctionBegin;
550: PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MIN_REAL, VecMax_Seq_GT));
551: PetscFunctionReturn(PETSC_SUCCESS);
552: }
554: static int VecMin_Seq_LT(PetscReal l, PetscReal r)
555: {
556: return l < r;
557: }
559: PetscErrorCode VecMin_Seq(Vec xin, PetscInt *idx, PetscReal *z)
560: {
561: PetscFunctionBegin;
562: PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MAX_REAL, VecMin_Seq_LT));
563: PetscFunctionReturn(PETSC_SUCCESS);
564: }
566: PetscErrorCode VecSet_Seq(Vec xin, PetscScalar alpha)
567: {
568: const PetscInt n = xin->map->n;
569: PetscScalar *xx;
571: PetscFunctionBegin;
572: PetscCall(VecGetArrayWrite(xin, &xx));
573: if (alpha == (PetscScalar)0.0) {
574: PetscCall(PetscArrayzero(xx, n));
575: } else {
576: for (PetscInt i = 0; i < n; i++) xx[i] = alpha;
577: }
578: PetscCall(VecRestoreArrayWrite(xin, &xx));
579: PetscFunctionReturn(PETSC_SUCCESS);
580: }
582: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
583: {
584: const PetscInt j_rem = nv & 0x3, n = xin->map->n;
585: const PetscScalar *yptr[4];
586: PetscScalar *xx;
587: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
588: #pragma disjoint(*xx, **yptr, *aptr)
589: #endif
591: PetscFunctionBegin;
592: PetscCall(PetscLogFlops(nv * 2.0 * n));
593: PetscCall(VecGetArray(xin, &xx));
594: for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
595: switch (j_rem) {
596: case 3:
597: PetscKernelAXPY3(xx, alpha[0], alpha[1], alpha[2], yptr[0], yptr[1], yptr[2], n);
598: break;
599: case 2:
600: PetscKernelAXPY2(xx, alpha[0], alpha[1], yptr[0], yptr[1], n);
601: break;
602: case 1:
603: PetscKernelAXPY(xx, alpha[0], yptr[0], n);
604: default:
605: break;
606: }
607: for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
608: alpha += j_rem;
609: y += j_rem;
610: for (PetscInt j = j_rem, inc = 4; j < nv; j += inc, alpha += inc, y += inc) {
611: for (PetscInt i = 0; i < inc; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
612: PetscKernelAXPY4(xx, alpha[0], alpha[1], alpha[2], alpha[3], yptr[0], yptr[1], yptr[2], yptr[3], n);
613: for (PetscInt i = 0; i < inc; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
614: }
615: PetscCall(VecRestoreArray(xin, &xx));
616: PetscFunctionReturn(PETSC_SUCCESS);
617: }
619: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
621: PetscErrorCode VecAYPX_Seq(Vec yin, PetscScalar alpha, Vec xin)
622: {
623: PetscFunctionBegin;
624: if (alpha == (PetscScalar)0.0) {
625: PetscCall(VecCopy(xin, yin));
626: } else if (alpha == (PetscScalar)1.0) {
627: PetscCall(VecAXPY_Seq(yin, alpha, xin));
628: } else {
629: const PetscInt n = yin->map->n;
630: const PetscScalar *xx;
631: PetscScalar *yy;
633: PetscCall(VecGetArrayRead(xin, &xx));
634: PetscCall(VecGetArray(yin, &yy));
635: if (alpha == (PetscScalar)-1.0) {
636: for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] - yy[i];
637: PetscCall(PetscLogFlops(n));
638: } else {
639: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
640: fortranaypx_(&n, &alpha, xx, yy);
641: #else
642: for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] + alpha * yy[i];
643: #endif
644: PetscCall(PetscLogFlops(2 * n));
645: }
646: PetscCall(VecRestoreArrayRead(xin, &xx));
647: PetscCall(VecRestoreArray(yin, &yy));
648: }
649: PetscFunctionReturn(PETSC_SUCCESS);
650: }
652: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
653: /*
654: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
655: to be slower than a regular C loop. Hence,we do not include it.
656: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
657: */
659: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha, Vec xin, Vec yin)
660: {
661: const PetscInt n = win->map->n;
662: const PetscScalar *yy, *xx;
663: PetscScalar *ww;
665: PetscFunctionBegin;
666: PetscCall(VecGetArrayRead(xin, &xx));
667: PetscCall(VecGetArrayRead(yin, &yy));
668: PetscCall(VecGetArray(win, &ww));
669: if (alpha == (PetscScalar)1.0) {
670: PetscCall(PetscLogFlops(n));
671: /* could call BLAS axpy after call to memcopy, but may be slower */
672: for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + xx[i];
673: } else if (alpha == (PetscScalar)-1.0) {
674: PetscCall(PetscLogFlops(n));
675: for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] - xx[i];
676: } else if (alpha == (PetscScalar)0.0) {
677: PetscCall(PetscArraycpy(ww, yy, n));
678: } else {
679: PetscCall(PetscLogFlops(2.0 * n));
680: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
681: fortranwaxpy_(&n, &alpha, xx, yy, ww);
682: #else
683: for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + alpha * xx[i];
684: #endif
685: }
686: PetscCall(VecRestoreArrayRead(xin, &xx));
687: PetscCall(VecRestoreArrayRead(yin, &yy));
688: PetscCall(VecRestoreArray(win, &ww));
689: PetscFunctionReturn(PETSC_SUCCESS);
690: }
692: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin, Vec yin, PetscReal *max)
693: {
694: const PetscInt n = xin->map->n;
695: const PetscScalar *xx, *yy;
696: PetscReal m = 0.0;
698: PetscFunctionBegin;
699: PetscCall(VecGetArrayRead(xin, &xx));
700: PetscCall(VecGetArrayRead(yin, &yy));
701: for (PetscInt i = 0; i < n; ++i) {
702: const PetscReal v = PetscAbsScalar(yy[i] == (PetscScalar)0.0 ? xx[i] : xx[i] / yy[i]);
704: // use a separate value to not re-evaluate side-effects
705: m = PetscMax(v, m);
706: }
707: PetscCall(VecRestoreArrayRead(xin, &xx));
708: PetscCall(VecRestoreArrayRead(yin, &yy));
709: PetscCall(PetscLogFlops(n));
710: *max = m;
711: PetscFunctionReturn(PETSC_SUCCESS);
712: }
714: PetscErrorCode VecPlaceArray_Seq(Vec vin, const PetscScalar *a)
715: {
716: Vec_Seq *v = (Vec_Seq *)vin->data;
718: PetscFunctionBegin;
719: PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
720: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
721: v->array = (PetscScalar *)a;
722: PetscFunctionReturn(PETSC_SUCCESS);
723: }
725: PetscErrorCode VecReplaceArray_Seq(Vec vin, const PetscScalar *a)
726: {
727: Vec_Seq *v = (Vec_Seq *)vin->data;
729: PetscFunctionBegin;
730: PetscCall(PetscFree(v->array_allocated));
731: v->array_allocated = v->array = (PetscScalar *)a;
732: PetscFunctionReturn(PETSC_SUCCESS);
733: }