Actual source code: dvec2.c

  1: /*
  2:    Defines some vector operation functions that are shared by
  3:    sequential and parallel vectors.
  4: */
  5: #include <../src/vec/vec/impls/dvecimpl.h>
  6: #include <petsc/private/kernels/petscaxpy.h>

  8: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
  9: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 10: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 11: {
 12:   const PetscInt     n = xin->map->n;
 13:   PetscInt           i = nv, nv_rem = nv & 0x3;
 14:   PetscScalar        sum0 = 0., sum1 = 0., sum2 = 0., sum3;
 15:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x;
 16:   Vec               *yy = (Vec *)yin;

 18:   PetscFunctionBegin;
 19:   PetscCall(VecGetArrayRead(xin, &x));
 20:   switch (nv_rem) {
 21:   case 3:
 22:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 23:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 24:     PetscCall(VecGetArrayRead(yy[2], &yy2));
 25:     fortranmdot3_(x, yy0, yy1, yy2, &n, &sum0, &sum1, &sum2);
 26:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 27:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 28:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
 29:     z[0] = sum0;
 30:     z[1] = sum1;
 31:     z[2] = sum2;
 32:     break;
 33:   case 2:
 34:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 35:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 36:     fortranmdot2_(x, yy0, yy1, &n, &sum0, &sum1);
 37:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 38:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 39:     z[0] = sum0;
 40:     z[1] = sum1;
 41:     break;
 42:   case 1:
 43:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 44:     fortranmdot1_(x, yy0, &n, &sum0);
 45:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 46:     z[0] = sum0;
 47:     break;
 48:   case 0:
 49:     break;
 50:   }
 51:   z += nv_rem;
 52:   i -= nv_rem;
 53:   yy += nv_rem;

 55:   while (i > 0) {
 56:     sum0 = 0.;
 57:     sum1 = 0.;
 58:     sum2 = 0.;
 59:     sum3 = 0.;
 60:     PetscCall(VecGetArrayRead(yy[0], &yy0));
 61:     PetscCall(VecGetArrayRead(yy[1], &yy1));
 62:     PetscCall(VecGetArrayRead(yy[2], &yy2));
 63:     PetscCall(VecGetArrayRead(yy[3], &yy3));
 64:     fortranmdot4_(x, yy0, yy1, yy2, yy3, &n, &sum0, &sum1, &sum2, &sum3);
 65:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
 66:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
 67:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
 68:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
 69:     yy += 4;
 70:     z[0] = sum0;
 71:     z[1] = sum1;
 72:     z[2] = sum2;
 73:     z[3] = sum3;
 74:     z += 4;
 75:     i -= 4;
 76:   }
 77:   PetscCall(VecRestoreArrayRead(xin, &x));
 78:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
 79:   PetscFunctionReturn(PETSC_SUCCESS);
 80: }

 82: #else
 83: PetscErrorCode VecMDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
 84: {
 85:   const PetscInt     n = xin->map->n;
 86:   PetscInt           i = nv, j = n, nv_rem = nv & 0x3, j_rem;
 87:   PetscScalar        sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3, x0, x1, x2, x3;
 88:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
 89:   const Vec         *yy = (Vec *)yin;

 91:   PetscFunctionBegin;
 92:   if (n == 0) {
 93:     PetscCall(PetscArrayzero(z, nv));
 94:     PetscFunctionReturn(PETSC_SUCCESS);
 95:   }
 96:   PetscCall(VecGetArrayRead(xin, &xbase));
 97:   x = xbase;
 98:   switch (nv_rem) {
 99:   case 3:
100:     PetscCall(VecGetArrayRead(yy[0], &yy0));
101:     PetscCall(VecGetArrayRead(yy[1], &yy1));
102:     PetscCall(VecGetArrayRead(yy[2], &yy2));
103:     switch (j_rem = j & 0x3) {
104:     case 3:
105:       x2 = x[2];
106:       sum0 += x2 * PetscConj(yy0[2]);
107:       sum1 += x2 * PetscConj(yy1[2]);
108:       sum2 += x2 * PetscConj(yy2[2]); /* fall through */
109:     case 2:
110:       x1 = x[1];
111:       sum0 += x1 * PetscConj(yy0[1]);
112:       sum1 += x1 * PetscConj(yy1[1]);
113:       sum2 += x1 * PetscConj(yy2[1]); /* fall through */
114:     case 1:
115:       x0 = x[0];
116:       sum0 += x0 * PetscConj(yy0[0]);
117:       sum1 += x0 * PetscConj(yy1[0]);
118:       sum2 += x0 * PetscConj(yy2[0]); /* fall through */
119:     case 0:
120:       x += j_rem;
121:       yy0 += j_rem;
122:       yy1 += j_rem;
123:       yy2 += j_rem;
124:       j -= j_rem;
125:       break;
126:     }
127:     while (j > 0) {
128:       x0 = x[0];
129:       x1 = x[1];
130:       x2 = x[2];
131:       x3 = x[3];
132:       x += 4;

134:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
135:       yy0 += 4;
136:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
137:       yy1 += 4;
138:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
139:       yy2 += 4;
140:       j -= 4;
141:     }
142:     z[0] = sum0;
143:     z[1] = sum1;
144:     z[2] = sum2;
145:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
146:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
147:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
148:     break;
149:   case 2:
150:     PetscCall(VecGetArrayRead(yy[0], &yy0));
151:     PetscCall(VecGetArrayRead(yy[1], &yy1));
152:     switch (j_rem = j & 0x3) {
153:     case 3:
154:       x2 = x[2];
155:       sum0 += x2 * PetscConj(yy0[2]);
156:       sum1 += x2 * PetscConj(yy1[2]); /* fall through */
157:     case 2:
158:       x1 = x[1];
159:       sum0 += x1 * PetscConj(yy0[1]);
160:       sum1 += x1 * PetscConj(yy1[1]); /* fall through */
161:     case 1:
162:       x0 = x[0];
163:       sum0 += x0 * PetscConj(yy0[0]);
164:       sum1 += x0 * PetscConj(yy1[0]); /* fall through */
165:     case 0:
166:       x += j_rem;
167:       yy0 += j_rem;
168:       yy1 += j_rem;
169:       j -= j_rem;
170:       break;
171:     }
172:     while (j > 0) {
173:       x0 = x[0];
174:       x1 = x[1];
175:       x2 = x[2];
176:       x3 = x[3];
177:       x += 4;

179:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
180:       yy0 += 4;
181:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
182:       yy1 += 4;
183:       j -= 4;
184:     }
185:     z[0] = sum0;
186:     z[1] = sum1;

188:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
189:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
190:     break;
191:   case 1:
192:     PetscCall(VecGetArrayRead(yy[0], &yy0));
193:     switch (j_rem = j & 0x3) {
194:     case 3:
195:       x2 = x[2];
196:       sum0 += x2 * PetscConj(yy0[2]); /* fall through */
197:     case 2:
198:       x1 = x[1];
199:       sum0 += x1 * PetscConj(yy0[1]); /* fall through */
200:     case 1:
201:       x0 = x[0];
202:       sum0 += x0 * PetscConj(yy0[0]); /* fall through */
203:     case 0:
204:       x += j_rem;
205:       yy0 += j_rem;
206:       j -= j_rem;
207:       break;
208:     }
209:     while (j > 0) {
210:       sum0 += x[0] * PetscConj(yy0[0]) + x[1] * PetscConj(yy0[1]) + x[2] * PetscConj(yy0[2]) + x[3] * PetscConj(yy0[3]);
211:       yy0 += 4;
212:       j -= 4;
213:       x += 4;
214:     }
215:     z[0] = sum0;

217:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
218:     break;
219:   case 0:
220:     break;
221:   }
222:   z += nv_rem;
223:   i -= nv_rem;
224:   yy += nv_rem;

226:   while (i > 0) {
227:     sum0 = 0.;
228:     sum1 = 0.;
229:     sum2 = 0.;
230:     sum3 = 0.;
231:     PetscCall(VecGetArrayRead(yy[0], &yy0));
232:     PetscCall(VecGetArrayRead(yy[1], &yy1));
233:     PetscCall(VecGetArrayRead(yy[2], &yy2));
234:     PetscCall(VecGetArrayRead(yy[3], &yy3));

236:     j = n;
237:     x = xbase;
238:     switch (j_rem = j & 0x3) {
239:     case 3:
240:       x2 = x[2];
241:       sum0 += x2 * PetscConj(yy0[2]);
242:       sum1 += x2 * PetscConj(yy1[2]);
243:       sum2 += x2 * PetscConj(yy2[2]);
244:       sum3 += x2 * PetscConj(yy3[2]); /* fall through */
245:     case 2:
246:       x1 = x[1];
247:       sum0 += x1 * PetscConj(yy0[1]);
248:       sum1 += x1 * PetscConj(yy1[1]);
249:       sum2 += x1 * PetscConj(yy2[1]);
250:       sum3 += x1 * PetscConj(yy3[1]); /* fall through */
251:     case 1:
252:       x0 = x[0];
253:       sum0 += x0 * PetscConj(yy0[0]);
254:       sum1 += x0 * PetscConj(yy1[0]);
255:       sum2 += x0 * PetscConj(yy2[0]);
256:       sum3 += x0 * PetscConj(yy3[0]); /* fall through */
257:     case 0:
258:       x += j_rem;
259:       yy0 += j_rem;
260:       yy1 += j_rem;
261:       yy2 += j_rem;
262:       yy3 += j_rem;
263:       j -= j_rem;
264:       break;
265:     }
266:     while (j > 0) {
267:       x0 = x[0];
268:       x1 = x[1];
269:       x2 = x[2];
270:       x3 = x[3];
271:       x += 4;

273:       sum0 += x0 * PetscConj(yy0[0]) + x1 * PetscConj(yy0[1]) + x2 * PetscConj(yy0[2]) + x3 * PetscConj(yy0[3]);
274:       yy0 += 4;
275:       sum1 += x0 * PetscConj(yy1[0]) + x1 * PetscConj(yy1[1]) + x2 * PetscConj(yy1[2]) + x3 * PetscConj(yy1[3]);
276:       yy1 += 4;
277:       sum2 += x0 * PetscConj(yy2[0]) + x1 * PetscConj(yy2[1]) + x2 * PetscConj(yy2[2]) + x3 * PetscConj(yy2[3]);
278:       yy2 += 4;
279:       sum3 += x0 * PetscConj(yy3[0]) + x1 * PetscConj(yy3[1]) + x2 * PetscConj(yy3[2]) + x3 * PetscConj(yy3[3]);
280:       yy3 += 4;
281:       j -= 4;
282:     }
283:     z[0] = sum0;
284:     z[1] = sum1;
285:     z[2] = sum2;
286:     z[3] = sum3;
287:     z += 4;
288:     i -= 4;
289:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
290:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
291:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
292:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
293:     yy += 4;
294:   }
295:   PetscCall(VecRestoreArrayRead(xin, &xbase));
296:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
297:   PetscFunctionReturn(PETSC_SUCCESS);
298: }
299: #endif

301: /* ----------------------------------------------------------------------------*/
302: PetscErrorCode VecMTDot_Seq(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
303: {
304:   const PetscInt     n = xin->map->n;
305:   PetscInt           i = nv, j = n, nv_rem = nv & 0x3, j_rem;
306:   PetscScalar        sum0 = 0., sum1 = 0., sum2 = 0., sum3, x0, x1, x2, x3;
307:   const PetscScalar *yy0, *yy1, *yy2, *yy3, *x, *xbase;
308:   const Vec         *yy = (Vec *)yin;

310:   PetscFunctionBegin;
311:   PetscCall(VecGetArrayRead(xin, &xbase));
312:   x = xbase;

314:   switch (nv_rem) {
315:   case 3:
316:     PetscCall(VecGetArrayRead(yy[0], &yy0));
317:     PetscCall(VecGetArrayRead(yy[1], &yy1));
318:     PetscCall(VecGetArrayRead(yy[2], &yy2));
319:     switch (j_rem = j & 0x3) {
320:     case 3:
321:       x2 = x[2];
322:       sum0 += x2 * yy0[2];
323:       sum1 += x2 * yy1[2];
324:       sum2 += x2 * yy2[2]; /* fall through */
325:     case 2:
326:       x1 = x[1];
327:       sum0 += x1 * yy0[1];
328:       sum1 += x1 * yy1[1];
329:       sum2 += x1 * yy2[1]; /* fall through */
330:     case 1:
331:       x0 = x[0];
332:       sum0 += x0 * yy0[0];
333:       sum1 += x0 * yy1[0];
334:       sum2 += x0 * yy2[0]; /* fall through */
335:     case 0:
336:       x += j_rem;
337:       yy0 += j_rem;
338:       yy1 += j_rem;
339:       yy2 += j_rem;
340:       j -= j_rem;
341:       break;
342:     }
343:     while (j > 0) {
344:       x0 = x[0];
345:       x1 = x[1];
346:       x2 = x[2];
347:       x3 = x[3];
348:       x += 4;

350:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
351:       yy0 += 4;
352:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
353:       yy1 += 4;
354:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
355:       yy2 += 4;
356:       j -= 4;
357:     }
358:     z[0] = sum0;
359:     z[1] = sum1;
360:     z[2] = sum2;
361:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
362:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
363:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
364:     break;
365:   case 2:
366:     PetscCall(VecGetArrayRead(yy[0], &yy0));
367:     PetscCall(VecGetArrayRead(yy[1], &yy1));
368:     switch (j_rem = j & 0x3) {
369:     case 3:
370:       x2 = x[2];
371:       sum0 += x2 * yy0[2];
372:       sum1 += x2 * yy1[2]; /* fall through */
373:     case 2:
374:       x1 = x[1];
375:       sum0 += x1 * yy0[1];
376:       sum1 += x1 * yy1[1]; /* fall through */
377:     case 1:
378:       x0 = x[0];
379:       sum0 += x0 * yy0[0];
380:       sum1 += x0 * yy1[0]; /* fall through */
381:     case 0:
382:       x += j_rem;
383:       yy0 += j_rem;
384:       yy1 += j_rem;
385:       j -= j_rem;
386:       break;
387:     }
388:     while (j > 0) {
389:       x0 = x[0];
390:       x1 = x[1];
391:       x2 = x[2];
392:       x3 = x[3];
393:       x += 4;

395:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
396:       yy0 += 4;
397:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
398:       yy1 += 4;
399:       j -= 4;
400:     }
401:     z[0] = sum0;
402:     z[1] = sum1;

404:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
405:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
406:     break;
407:   case 1:
408:     PetscCall(VecGetArrayRead(yy[0], &yy0));
409:     switch (j_rem = j & 0x3) {
410:     case 3:
411:       x2 = x[2];
412:       sum0 += x2 * yy0[2]; /* fall through */
413:     case 2:
414:       x1 = x[1];
415:       sum0 += x1 * yy0[1]; /* fall through */
416:     case 1:
417:       x0 = x[0];
418:       sum0 += x0 * yy0[0]; /* fall through */
419:     case 0:
420:       x += j_rem;
421:       yy0 += j_rem;
422:       j -= j_rem;
423:       break;
424:     }
425:     while (j > 0) {
426:       sum0 += x[0] * yy0[0] + x[1] * yy0[1] + x[2] * yy0[2] + x[3] * yy0[3];
427:       yy0 += 4;
428:       j -= 4;
429:       x += 4;
430:     }
431:     z[0] = sum0;

433:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
434:     break;
435:   case 0:
436:     break;
437:   }
438:   z += nv_rem;
439:   i -= nv_rem;
440:   yy += nv_rem;

442:   while (i > 0) {
443:     sum0 = 0.;
444:     sum1 = 0.;
445:     sum2 = 0.;
446:     sum3 = 0.;
447:     PetscCall(VecGetArrayRead(yy[0], &yy0));
448:     PetscCall(VecGetArrayRead(yy[1], &yy1));
449:     PetscCall(VecGetArrayRead(yy[2], &yy2));
450:     PetscCall(VecGetArrayRead(yy[3], &yy3));
451:     x = xbase;

453:     j = n;
454:     switch (j_rem = j & 0x3) {
455:     case 3:
456:       x2 = x[2];
457:       sum0 += x2 * yy0[2];
458:       sum1 += x2 * yy1[2];
459:       sum2 += x2 * yy2[2];
460:       sum3 += x2 * yy3[2]; /* fall through */
461:     case 2:
462:       x1 = x[1];
463:       sum0 += x1 * yy0[1];
464:       sum1 += x1 * yy1[1];
465:       sum2 += x1 * yy2[1];
466:       sum3 += x1 * yy3[1]; /* fall through */
467:     case 1:
468:       x0 = x[0];
469:       sum0 += x0 * yy0[0];
470:       sum1 += x0 * yy1[0];
471:       sum2 += x0 * yy2[0];
472:       sum3 += x0 * yy3[0]; /* fall through */
473:     case 0:
474:       x += j_rem;
475:       yy0 += j_rem;
476:       yy1 += j_rem;
477:       yy2 += j_rem;
478:       yy3 += j_rem;
479:       j -= j_rem;
480:       break;
481:     }
482:     while (j > 0) {
483:       x0 = x[0];
484:       x1 = x[1];
485:       x2 = x[2];
486:       x3 = x[3];
487:       x += 4;

489:       sum0 += x0 * yy0[0] + x1 * yy0[1] + x2 * yy0[2] + x3 * yy0[3];
490:       yy0 += 4;
491:       sum1 += x0 * yy1[0] + x1 * yy1[1] + x2 * yy1[2] + x3 * yy1[3];
492:       yy1 += 4;
493:       sum2 += x0 * yy2[0] + x1 * yy2[1] + x2 * yy2[2] + x3 * yy2[3];
494:       yy2 += 4;
495:       sum3 += x0 * yy3[0] + x1 * yy3[1] + x2 * yy3[2] + x3 * yy3[3];
496:       yy3 += 4;
497:       j -= 4;
498:     }
499:     z[0] = sum0;
500:     z[1] = sum1;
501:     z[2] = sum2;
502:     z[3] = sum3;
503:     z += 4;
504:     i -= 4;
505:     PetscCall(VecRestoreArrayRead(yy[0], &yy0));
506:     PetscCall(VecRestoreArrayRead(yy[1], &yy1));
507:     PetscCall(VecRestoreArrayRead(yy[2], &yy2));
508:     PetscCall(VecRestoreArrayRead(yy[3], &yy3));
509:     yy += 4;
510:   }
511:   PetscCall(VecRestoreArrayRead(xin, &xbase));
512:   PetscCall(PetscLogFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
513:   PetscFunctionReturn(PETSC_SUCCESS);
514: }

516: static PetscErrorCode VecMinMax_Seq(Vec xin, PetscInt *idx, PetscReal *z, PetscReal minmax, int (*const cmp)(PetscReal, PetscReal))
517: {
518:   const PetscInt n = xin->map->n;
519:   PetscInt       j = -1;

521:   PetscFunctionBegin;
522:   if (n) {
523:     const PetscScalar *xx;

525:     PetscCall(VecGetArrayRead(xin, &xx));
526:     minmax = PetscRealPart(xx[(j = 0)]);
527:     for (PetscInt i = 1; i < n; ++i) {
528:       const PetscReal tmp = PetscRealPart(xx[i]);

530:       if (cmp(tmp, minmax)) {
531:         j      = i;
532:         minmax = tmp;
533:       }
534:     }
535:     PetscCall(VecRestoreArrayRead(xin, &xx));
536:   }
537:   *z = minmax;
538:   if (idx) *idx = j;
539:   PetscFunctionReturn(PETSC_SUCCESS);
540: }

542: static int VecMax_Seq_GT(PetscReal l, PetscReal r)
543: {
544:   return l > r;
545: }

547: PetscErrorCode VecMax_Seq(Vec xin, PetscInt *idx, PetscReal *z)
548: {
549:   PetscFunctionBegin;
550:   PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MIN_REAL, VecMax_Seq_GT));
551:   PetscFunctionReturn(PETSC_SUCCESS);
552: }

554: static int VecMin_Seq_LT(PetscReal l, PetscReal r)
555: {
556:   return l < r;
557: }

559: PetscErrorCode VecMin_Seq(Vec xin, PetscInt *idx, PetscReal *z)
560: {
561:   PetscFunctionBegin;
562:   PetscCall(VecMinMax_Seq(xin, idx, z, PETSC_MAX_REAL, VecMin_Seq_LT));
563:   PetscFunctionReturn(PETSC_SUCCESS);
564: }

566: PetscErrorCode VecSet_Seq(Vec xin, PetscScalar alpha)
567: {
568:   const PetscInt n = xin->map->n;
569:   PetscScalar   *xx;

571:   PetscFunctionBegin;
572:   PetscCall(VecGetArrayWrite(xin, &xx));
573:   if (alpha == (PetscScalar)0.0) {
574:     PetscCall(PetscArrayzero(xx, n));
575:   } else {
576:     for (PetscInt i = 0; i < n; i++) xx[i] = alpha;
577:   }
578:   PetscCall(VecRestoreArrayWrite(xin, &xx));
579:   PetscFunctionReturn(PETSC_SUCCESS);
580: }

582: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
583: {
584:   const PetscInt     j_rem = nv & 0x3, n = xin->map->n;
585:   const PetscScalar *yptr[4];
586:   PetscScalar       *xx;
587: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
588:   #pragma disjoint(*xx, **yptr, *aptr)
589: #endif

591:   PetscFunctionBegin;
592:   PetscCall(PetscLogFlops(nv * 2.0 * n));
593:   PetscCall(VecGetArray(xin, &xx));
594:   for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
595:   switch (j_rem) {
596:   case 3:
597:     PetscKernelAXPY3(xx, alpha[0], alpha[1], alpha[2], yptr[0], yptr[1], yptr[2], n);
598:     break;
599:   case 2:
600:     PetscKernelAXPY2(xx, alpha[0], alpha[1], yptr[0], yptr[1], n);
601:     break;
602:   case 1:
603:     PetscKernelAXPY(xx, alpha[0], yptr[0], n);
604:   default:
605:     break;
606:   }
607:   for (PetscInt i = 0; i < j_rem; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
608:   alpha += j_rem;
609:   y += j_rem;
610:   for (PetscInt j = j_rem, inc = 4; j < nv; j += inc, alpha += inc, y += inc) {
611:     for (PetscInt i = 0; i < inc; ++i) PetscCall(VecGetArrayRead(y[i], yptr + i));
612:     PetscKernelAXPY4(xx, alpha[0], alpha[1], alpha[2], alpha[3], yptr[0], yptr[1], yptr[2], yptr[3], n);
613:     for (PetscInt i = 0; i < inc; ++i) PetscCall(VecRestoreArrayRead(y[i], yptr + i));
614:   }
615:   PetscCall(VecRestoreArray(xin, &xx));
616:   PetscFunctionReturn(PETSC_SUCCESS);
617: }

619: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>

621: PetscErrorCode VecAYPX_Seq(Vec yin, PetscScalar alpha, Vec xin)
622: {
623:   PetscFunctionBegin;
624:   if (alpha == (PetscScalar)0.0) {
625:     PetscCall(VecCopy(xin, yin));
626:   } else if (alpha == (PetscScalar)1.0) {
627:     PetscCall(VecAXPY_Seq(yin, alpha, xin));
628:   } else {
629:     const PetscInt     n = yin->map->n;
630:     const PetscScalar *xx;
631:     PetscScalar       *yy;

633:     PetscCall(VecGetArrayRead(xin, &xx));
634:     PetscCall(VecGetArray(yin, &yy));
635:     if (alpha == (PetscScalar)-1.0) {
636:       for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] - yy[i];
637:       PetscCall(PetscLogFlops(n));
638:     } else {
639: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
640:       fortranaypx_(&n, &alpha, xx, yy);
641: #else
642:       for (PetscInt i = 0; i < n; ++i) yy[i] = xx[i] + alpha * yy[i];
643: #endif
644:       PetscCall(PetscLogFlops(2 * n));
645:     }
646:     PetscCall(VecRestoreArrayRead(xin, &xx));
647:     PetscCall(VecRestoreArray(yin, &yy));
648:   }
649:   PetscFunctionReturn(PETSC_SUCCESS);
650: }

652: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
653: /*
654:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
655:    to be slower than a regular C loop.  Hence,we do not include it.
656:    void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
657: */

659: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha, Vec xin, Vec yin)
660: {
661:   const PetscInt     n = win->map->n;
662:   const PetscScalar *yy, *xx;
663:   PetscScalar       *ww;

665:   PetscFunctionBegin;
666:   PetscCall(VecGetArrayRead(xin, &xx));
667:   PetscCall(VecGetArrayRead(yin, &yy));
668:   PetscCall(VecGetArray(win, &ww));
669:   if (alpha == (PetscScalar)1.0) {
670:     PetscCall(PetscLogFlops(n));
671:     /* could call BLAS axpy after call to memcopy, but may be slower */
672:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + xx[i];
673:   } else if (alpha == (PetscScalar)-1.0) {
674:     PetscCall(PetscLogFlops(n));
675:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] - xx[i];
676:   } else if (alpha == (PetscScalar)0.0) {
677:     PetscCall(PetscArraycpy(ww, yy, n));
678:   } else {
679:     PetscCall(PetscLogFlops(2.0 * n));
680: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
681:     fortranwaxpy_(&n, &alpha, xx, yy, ww);
682: #else
683:     for (PetscInt i = 0; i < n; i++) ww[i] = yy[i] + alpha * xx[i];
684: #endif
685:   }
686:   PetscCall(VecRestoreArrayRead(xin, &xx));
687:   PetscCall(VecRestoreArrayRead(yin, &yy));
688:   PetscCall(VecRestoreArray(win, &ww));
689:   PetscFunctionReturn(PETSC_SUCCESS);
690: }

692: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin, Vec yin, PetscReal *max)
693: {
694:   const PetscInt     n = xin->map->n;
695:   const PetscScalar *xx, *yy;
696:   PetscReal          m = 0.0;

698:   PetscFunctionBegin;
699:   PetscCall(VecGetArrayRead(xin, &xx));
700:   PetscCall(VecGetArrayRead(yin, &yy));
701:   for (PetscInt i = 0; i < n; ++i) {
702:     const PetscReal v = PetscAbsScalar(yy[i] == (PetscScalar)0.0 ? xx[i] : xx[i] / yy[i]);

704:     // use a separate value to not re-evaluate side-effects
705:     m = PetscMax(v, m);
706:   }
707:   PetscCall(VecRestoreArrayRead(xin, &xx));
708:   PetscCall(VecRestoreArrayRead(yin, &yy));
709:   PetscCall(PetscLogFlops(n));
710:   *max = m;
711:   PetscFunctionReturn(PETSC_SUCCESS);
712: }

714: PetscErrorCode VecPlaceArray_Seq(Vec vin, const PetscScalar *a)
715: {
716:   Vec_Seq *v = (Vec_Seq *)vin->data;

718:   PetscFunctionBegin;
719:   PetscCheck(!v->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
720:   v->unplacedarray = v->array; /* save previous array so reset can bring it back */
721:   v->array         = (PetscScalar *)a;
722:   PetscFunctionReturn(PETSC_SUCCESS);
723: }

725: PetscErrorCode VecReplaceArray_Seq(Vec vin, const PetscScalar *a)
726: {
727:   Vec_Seq *v = (Vec_Seq *)vin->data;

729:   PetscFunctionBegin;
730:   PetscCall(PetscFree(v->array_allocated));
731:   v->array_allocated = v->array = (PetscScalar *)a;
732:   PetscFunctionReturn(PETSC_SUCCESS);
733: }