Actual source code: dvec2.c
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <private/petscaxpy.h>
9: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
10: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
13: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
14: {
15: PetscErrorCode ierr;
16: PetscInt i,nv_rem,n = xin->map->n;
17: PetscScalar sum0,sum1,sum2,sum3;
18: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
19: Vec *yy;
22: sum0 = 0.0;
23: sum1 = 0.0;
24: sum2 = 0.0;
26: i = nv;
27: nv_rem = nv&0x3;
28: yy = (Vec*)yin;
29: VecGetArrayRead(xin,&x);
31: switch (nv_rem) {
32: case 3:
33: VecGetArrayRead(yy[0],&yy0);
34: VecGetArrayRead(yy[1],&yy1);
35: VecGetArrayRead(yy[2],&yy2);
36: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
37: VecRestoreArrayRead(yy[0],&yy0);
38: VecRestoreArrayRead(yy[1],&yy1);
39: VecRestoreArrayRead(yy[2],&yy2);
40: z[0] = sum0;
41: z[1] = sum1;
42: z[2] = sum2;
43: break;
44: case 2:
45: VecGetArrayRead(yy[0],&yy0);
46: VecGetArrayRead(yy[1],&yy1);
47: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
48: VecRestoreArrayRead(yy[0],&yy0);
49: VecRestoreArrayRead(yy[1],&yy1);
50: z[0] = sum0;
51: z[1] = sum1;
52: break;
53: case 1:
54: VecGetArrayRead(yy[0],&yy0);
55: fortranmdot1_(x,yy0,&n,&sum0);
56: VecRestoreArrayRead(yy[0],&yy0);
57: z[0] = sum0;
58: break;
59: case 0:
60: break;
61: }
62: z += nv_rem;
63: i -= nv_rem;
64: yy += nv_rem;
66: while (i >0) {
67: sum0 = 0.;
68: sum1 = 0.;
69: sum2 = 0.;
70: sum3 = 0.;
71: VecGetArrayRead(yy[0],&yy0);
72: VecGetArrayRead(yy[1],&yy1);
73: VecGetArrayRead(yy[2],&yy2);
74: VecGetArrayRead(yy[3],&yy3);
75: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
76: VecRestoreArrayRead(yy[0],&yy0);
77: VecRestoreArrayRead(yy[1],&yy1);
78: VecRestoreArrayRead(yy[2],&yy2);
79: VecRestoreArrayRead(yy[3],&yy3);
80: yy += 4;
81: z[0] = sum0;
82: z[1] = sum1;
83: z[2] = sum2;
84: z[3] = sum3;
85: z += 4;
86: i -= 4;
87: }
88: VecRestoreArrayRead(xin,&x);
89: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
90: return(0);
91: }
93: #else
96: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
97: {
98: PetscErrorCode ierr;
99: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
100: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
101: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
102: Vec *yy;
105: sum0 = 0.;
106: sum1 = 0.;
107: sum2 = 0.;
109: i = nv;
110: nv_rem = nv&0x3;
111: yy = (Vec *)yin;
112: j = n;
113: VecGetArrayRead(xin,&xbase);
114: x = xbase;
116: switch (nv_rem) {
117: case 3:
118: VecGetArrayRead(yy[0],&yy0);
119: VecGetArrayRead(yy[1],&yy1);
120: VecGetArrayRead(yy[2],&yy2);
121: switch (j_rem=j&0x3) {
122: case 3:
123: x2 = x[2];
124: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
125: sum2 += x2*PetscConj(yy2[2]);
126: case 2:
127: x1 = x[1];
128: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
129: sum2 += x1*PetscConj(yy2[1]);
130: case 1:
131: x0 = x[0];
132: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
133: sum2 += x0*PetscConj(yy2[0]);
134: case 0:
135: x += j_rem;
136: yy0 += j_rem;
137: yy1 += j_rem;
138: yy2 += j_rem;
139: j -= j_rem;
140: break;
141: }
142: while (j>0) {
143: x0 = x[0];
144: x1 = x[1];
145: x2 = x[2];
146: x3 = x[3];
147: x += 4;
148:
149: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
150: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
151: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
152: j -= 4;
153: }
154: z[0] = sum0;
155: z[1] = sum1;
156: z[2] = sum2;
157: VecRestoreArrayRead(yy[0],&yy0);
158: VecRestoreArrayRead(yy[1],&yy1);
159: VecRestoreArrayRead(yy[2],&yy2);
160: break;
161: case 2:
162: VecGetArrayRead(yy[0],&yy0);
163: VecGetArrayRead(yy[1],&yy1);
164: switch (j_rem=j&0x3) {
165: case 3:
166: x2 = x[2];
167: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
168: case 2:
169: x1 = x[1];
170: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
171: case 1:
172: x0 = x[0];
173: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
174: case 0:
175: x += j_rem;
176: yy0 += j_rem;
177: yy1 += j_rem;
178: j -= j_rem;
179: break;
180: }
181: while (j>0) {
182: x0 = x[0];
183: x1 = x[1];
184: x2 = x[2];
185: x3 = x[3];
186: x += 4;
187:
188: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
189: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
190: j -= 4;
191: }
192: z[0] = sum0;
193: z[1] = sum1;
194:
195: VecRestoreArrayRead(yy[0],&yy0);
196: VecRestoreArrayRead(yy[1],&yy1);
197: break;
198: case 1:
199: VecGetArrayRead(yy[0],&yy0);
200: switch (j_rem=j&0x3) {
201: case 3:
202: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
203: case 2:
204: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
205: case 1:
206: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
207: case 0:
208: x += j_rem;
209: yy0 += j_rem;
210: j -= j_rem;
211: break;
212: }
213: while (j>0) {
214: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
215: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
216: yy0+=4;
217: j -= 4; x+=4;
218: }
219: z[0] = sum0;
221: VecRestoreArrayRead(yy[0],&yy0);
222: break;
223: case 0:
224: break;
225: }
226: z += nv_rem;
227: i -= nv_rem;
228: yy += nv_rem;
230: while (i >0) {
231: sum0 = 0.;
232: sum1 = 0.;
233: sum2 = 0.;
234: sum3 = 0.;
235: VecGetArrayRead(yy[0],&yy0);
236: VecGetArrayRead(yy[1],&yy1);
237: VecGetArrayRead(yy[2],&yy2);
238: VecGetArrayRead(yy[3],&yy3);
240: j = n;
241: x = xbase;
242: switch (j_rem=j&0x3) {
243: case 3:
244: x2 = x[2];
245: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
246: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
247: case 2:
248: x1 = x[1];
249: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
250: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
251: case 1:
252: x0 = x[0];
253: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
254: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
255: case 0:
256: x += j_rem;
257: yy0 += j_rem;
258: yy1 += j_rem;
259: yy2 += j_rem;
260: yy3 += j_rem;
261: j -= j_rem;
262: break;
263: }
264: while (j>0) {
265: x0 = x[0];
266: x1 = x[1];
267: x2 = x[2];
268: x3 = x[3];
269: x += 4;
270:
271: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
272: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
273: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
274: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
275: j -= 4;
276: }
277: z[0] = sum0;
278: z[1] = sum1;
279: z[2] = sum2;
280: z[3] = sum3;
281: z += 4;
282: i -= 4;
283: VecRestoreArrayRead(yy[0],&yy0);
284: VecRestoreArrayRead(yy[1],&yy1);
285: VecRestoreArrayRead(yy[2],&yy2);
286: VecRestoreArrayRead(yy[3],&yy3);
287: yy += 4;
288: }
289: VecRestoreArrayRead(xin,&xbase);
290: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
291: return(0);
292: }
293: #endif
295: /* ----------------------------------------------------------------------------*/
298: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
299: {
300: PetscErrorCode ierr;
301: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
302: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
303: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
304: Vec *yy;
305:
308: sum0 = 0.;
309: sum1 = 0.;
310: sum2 = 0.;
312: i = nv;
313: nv_rem = nv&0x3;
314: yy = (Vec*)yin;
315: j = n;
316: VecGetArrayRead(xin,&x);
318: switch (nv_rem) {
319: case 3:
320: VecGetArrayRead(yy[0],&yy0);
321: VecGetArrayRead(yy[1],&yy1);
322: VecGetArrayRead(yy[2],&yy2);
323: switch (j_rem=j&0x3) {
324: case 3:
325: x2 = x[2];
326: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
327: sum2 += x2*yy2[2];
328: case 2:
329: x1 = x[1];
330: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
331: sum2 += x1*yy2[1];
332: case 1:
333: x0 = x[0];
334: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
335: sum2 += x0*yy2[0];
336: case 0:
337: x += j_rem;
338: yy0 += j_rem;
339: yy1 += j_rem;
340: yy2 += j_rem;
341: j -= j_rem;
342: break;
343: }
344: while (j>0) {
345: x0 = x[0];
346: x1 = x[1];
347: x2 = x[2];
348: x3 = x[3];
349: x += 4;
350:
351: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
352: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
353: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
354: j -= 4;
355: }
356: z[0] = sum0;
357: z[1] = sum1;
358: z[2] = sum2;
359: VecRestoreArrayRead(yy[0],&yy0);
360: VecRestoreArrayRead(yy[1],&yy1);
361: VecRestoreArrayRead(yy[2],&yy2);
362: break;
363: case 2:
364: VecGetArrayRead(yy[0],&yy0);
365: VecGetArrayRead(yy[1],&yy1);
366: switch (j_rem=j&0x3) {
367: case 3:
368: x2 = x[2];
369: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
370: case 2:
371: x1 = x[1];
372: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
373: case 1:
374: x0 = x[0];
375: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
376: case 0:
377: x += j_rem;
378: yy0 += j_rem;
379: yy1 += j_rem;
380: j -= j_rem;
381: break;
382: }
383: while (j>0) {
384: x0 = x[0];
385: x1 = x[1];
386: x2 = x[2];
387: x3 = x[3];
388: x += 4;
389:
390: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
391: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
392: j -= 4;
393: }
394: z[0] = sum0;
395: z[1] = sum1;
396:
397: VecRestoreArrayRead(yy[0],&yy0);
398: VecRestoreArrayRead(yy[1],&yy1);
399: break;
400: case 1:
401: VecGetArrayRead(yy[0],&yy0);
402: switch (j_rem=j&0x3) {
403: case 3:
404: x2 = x[2]; sum0 += x2*yy0[2];
405: case 2:
406: x1 = x[1]; sum0 += x1*yy0[1];
407: case 1:
408: x0 = x[0]; sum0 += x0*yy0[0];
409: case 0:
410: x += j_rem;
411: yy0 += j_rem;
412: j -= j_rem;
413: break;
414: }
415: while (j>0) {
416: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
417: j -= 4; x+=4;
418: }
419: z[0] = sum0;
421: VecRestoreArrayRead(yy[0],&yy0);
422: break;
423: case 0:
424: break;
425: }
426: z += nv_rem;
427: i -= nv_rem;
428: yy += nv_rem;
430: while (i >0) {
431: sum0 = 0.;
432: sum1 = 0.;
433: sum2 = 0.;
434: sum3 = 0.;
435: VecGetArrayRead(yy[0],&yy0);
436: VecGetArrayRead(yy[1],&yy1);
437: VecGetArrayRead(yy[2],&yy2);
438: VecGetArrayRead(yy[3],&yy3);
440: j = n;
441: switch (j_rem=j&0x3) {
442: case 3:
443: x2 = x[2];
444: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
445: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
446: case 2:
447: x1 = x[1];
448: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
449: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
450: case 1:
451: x0 = x[0];
452: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
453: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
454: case 0:
455: x += j_rem;
456: yy0 += j_rem;
457: yy1 += j_rem;
458: yy2 += j_rem;
459: yy3 += j_rem;
460: j -= j_rem;
461: break;
462: }
463: while (j>0) {
464: x0 = x[0];
465: x1 = x[1];
466: x2 = x[2];
467: x3 = x[3];
468: x += 4;
469:
470: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
471: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
472: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
473: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
474: j -= 4;
475: }
476: z[0] = sum0;
477: z[1] = sum1;
478: z[2] = sum2;
479: z[3] = sum3;
480: z += 4;
481: i -= 4;
482: VecRestoreArrayRead(yy[0],&yy0);
483: VecRestoreArrayRead(yy[1],&yy1);
484: VecRestoreArrayRead(yy[2],&yy2);
485: VecRestoreArrayRead(yy[3],&yy3);
486: yy += 4;
487: }
488: VecRestoreArrayRead(xin,&x);
489: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
490: return(0);
491: }
492:
496: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
497: {
498: PetscInt i,j=0,n = xin->map->n;
499: PetscReal max,tmp;
500: const PetscScalar *xx;
501: PetscErrorCode ierr;
504: VecGetArrayRead(xin,&xx);
505: if (!n) {
506: max = PETSC_MIN_REAL;
507: j = -1;
508: } else {
509: #if defined(PETSC_USE_COMPLEX)
510: max = PetscRealPart(*xx++); j = 0;
511: #else
512: max = *xx++; j = 0;
513: #endif
514: for (i=1; i<n; i++) {
515: #if defined(PETSC_USE_COMPLEX)
516: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
517: #else
518: if ((tmp = *xx++) > max) { j = i; max = tmp; }
519: #endif
520: }
521: }
522: *z = max;
523: if (idx) *idx = j;
524: VecRestoreArrayRead(xin,&xx);
525: return(0);
526: }
530: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
531: {
532: PetscInt i,j=0,n = xin->map->n;
533: PetscReal min,tmp;
534: const PetscScalar *xx;
535: PetscErrorCode ierr;
538: VecGetArrayRead(xin,&xx);
539: if (!n) {
540: min = PETSC_MAX_REAL;
541: j = -1;
542: } else {
543: #if defined(PETSC_USE_COMPLEX)
544: min = PetscRealPart(*xx++); j = 0;
545: #else
546: min = *xx++; j = 0;
547: #endif
548: for (i=1; i<n; i++) {
549: #if defined(PETSC_USE_COMPLEX)
550: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
551: #else
552: if ((tmp = *xx++) < min) { j = i; min = tmp; }
553: #endif
554: }
555: }
556: *z = min;
557: if (idx) *idx = j;
558: VecGetArrayRead(xin,&xx);
559: return(0);
560: }
564: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
565: {
566: PetscInt i,n = xin->map->n;
567: PetscScalar *xx;
571: VecGetArray(xin,&xx);
572: if (alpha == (PetscScalar)0.0) {
573: PetscMemzero(xx,n*sizeof(PetscScalar));
574: } else {
575: for (i=0; i<n; i++) xx[i] = alpha;
576: }
577: VecRestoreArray(xin,&xx);
578: return(0);
579: }
584: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
585: {
586: PetscErrorCode ierr;
587: PetscInt n = xin->map->n,j,j_rem;
588: const PetscScalar *yy0,*yy1,*yy2,*yy3;
589: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
591: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
592: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
593: #endif
596: PetscLogFlops(nv*2.0*n);
597: VecGetArray(xin,&xx);
598: //printf("Number of Vectors = %d ** ",(int)nv);
599: switch (j_rem=nv&0x3) {
600: case 3:
601: VecGetArrayRead(y[0],&yy0);
602: VecGetArrayRead(y[1],&yy1);
603: VecGetArrayRead(y[2],&yy2);
604: alpha0 = alpha[0];
605: alpha1 = alpha[1];
606: alpha2 = alpha[2];
607: alpha += 3;
608: PetscAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
609: VecRestoreArrayRead(y[0],&yy0);
610: VecRestoreArrayRead(y[1],&yy1);
611: VecRestoreArrayRead(y[2],&yy2);
612: y += 3;
613: break;
614: case 2:
615: VecGetArrayRead(y[0],&yy0);
616: VecGetArrayRead(y[1],&yy1);
617: alpha0 = alpha[0];
618: alpha1 = alpha[1];
619: alpha +=2;
620: PetscAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
621: VecRestoreArrayRead(y[0],&yy0);
622: VecRestoreArrayRead(y[1],&yy1);
623: y +=2;
624: break;
625: case 1:
626: VecGetArrayRead(y[0],&yy0);
627: alpha0 = *alpha++;
628: PetscAXPY(xx,alpha0,yy0,n);
629: VecRestoreArrayRead(y[0],&yy0);
630: y +=1;
631: break;
632: }
633: for (j=j_rem; j<nv; j+=4) {
634: VecGetArrayRead(y[0],&yy0);
635: VecGetArrayRead(y[1],&yy1);
636: VecGetArrayRead(y[2],&yy2);
637: VecGetArrayRead(y[3],&yy3);
638: alpha0 = alpha[0];
639: alpha1 = alpha[1];
640: alpha2 = alpha[2];
641: alpha3 = alpha[3];
642: alpha += 4;
644: PetscAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
645: VecRestoreArrayRead(y[0],&yy0);
646: VecRestoreArrayRead(y[1],&yy1);
647: VecRestoreArrayRead(y[2],&yy2);
648: VecRestoreArrayRead(y[3],&yy3);
649: y += 4;
650: }
651: VecRestoreArray(xin,&xx);
652: return(0);
653: }
655: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
658: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
659: {
660: PetscErrorCode ierr;
661: PetscInt n = yin->map->n;
662: PetscScalar *yy;
663: const PetscScalar *xx;
666: if (alpha == (PetscScalar)0.0) {
667: VecCopy(xin,yin);
668: } else if (alpha == (PetscScalar)1.0) {
669: VecAXPY_Seq(yin,alpha,xin);
670: } else if (alpha == (PetscScalar)-1.0) {
671: PetscInt i;
672: VecGetArrayRead(xin,&xx);
673: VecGetArray(yin,&yy);
674: for (i=0; i<n; i++) {
675: yy[i] = xx[i] - yy[i];
676: }
677: VecRestoreArrayRead(xin,&xx);
678: VecRestoreArray(yin,&yy);
679: PetscLogFlops(1.0*n);
680: } else {
681: VecGetArrayRead(xin,&xx);
682: VecGetArray(yin,&yy);
683: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
684: {
685: PetscScalar oalpha = alpha;
686: fortranaypx_(&n,&oalpha,xx,yy);
687: }
688: #else
689: {
690: PetscInt i;
691: for (i=0; i<n; i++) {
692: yy[i] = xx[i] + alpha*yy[i];
693: }
694: }
695: #endif
696: VecRestoreArrayRead(xin,&xx);
697: VecRestoreArray(yin,&yy);
698: PetscLogFlops(2.0*n);
699: }
700: return(0);
701: }
703: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
704: /*
705: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
706: to be slower than a regular C loop. Hence,we do not include it.
707: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
708: */
712: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
713: {
714: PetscErrorCode ierr;
715: PetscInt i,n = win->map->n;
716: PetscScalar *ww;
717: const PetscScalar *yy,*xx;
720: VecGetArrayRead(xin,&xx);
721: VecGetArrayRead(yin,&yy);
722: VecGetArray(win,&ww);
723: if (alpha == (PetscScalar)1.0) {
724: PetscLogFlops(n);
725: /* could call BLAS axpy after call to memcopy, but may be slower */
726: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
727: } else if (alpha == (PetscScalar)-1.0) {
728: PetscLogFlops(n);
729: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
730: } else if (alpha == (PetscScalar)0.0) {
731: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
732: } else {
733: PetscScalar oalpha = alpha;
734: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
735: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
736: #else
737: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
738: #endif
739: PetscLogFlops(2.0*n);
740: }
741: VecRestoreArrayRead(xin,&xx);
742: VecRestoreArrayRead(yin,&yy);
743: VecRestoreArray(win,&ww);
744: return(0);
745: }
750: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
751: {
752: PetscErrorCode ierr;
753: PetscInt n = xin->map->n,i;
754: const PetscScalar *xx,*yy;
755: PetscReal m = 0.0;
758: VecGetArrayRead(xin,&xx);
759: VecGetArrayRead(yin,&yy);
760: for(i = 0; i < n; i++) {
761: if (yy[i] != (PetscScalar)0.0) {
762: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
763: } else {
764: m = PetscMax(PetscAbsScalar(xx[i]), m);
765: }
766: }
767: VecRestoreArrayRead(xin,&xx);
768: VecRestoreArrayRead(yin,&yy);
769: MPI_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
770: PetscLogFlops(n);
771: return(0);
772: }
776: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
777: {
778: Vec_Seq *v = (Vec_Seq *)vin->data;
781: if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
782: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
783: v->array = (PetscScalar *)a;
784: return(0);
785: }
789: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
790: {
791: Vec_Seq *v = (Vec_Seq *)vin->data;
795: PetscFree(v->array_allocated);
796: v->array_allocated = v->array = (PetscScalar *)a;
797: return(0);
798: }
800: