Actual source code: dvec2.c
petsc-3.3-p5 2012-12-01
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petsc-private/petscaxpy.h>
8: #include <petscthreadcomm.h>
10: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
11: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
14: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
15: {
16: PetscErrorCode ierr;
17: PetscInt i,nv_rem,n = xin->map->n;
18: PetscScalar sum0,sum1,sum2,sum3;
19: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
20: Vec *yy;
23: sum0 = 0.0;
24: sum1 = 0.0;
25: sum2 = 0.0;
27: i = nv;
28: nv_rem = nv&0x3;
29: yy = (Vec*)yin;
30: VecGetArrayRead(xin,&x);
32: switch (nv_rem) {
33: case 3:
34: VecGetArrayRead(yy[0],&yy0);
35: VecGetArrayRead(yy[1],&yy1);
36: VecGetArrayRead(yy[2],&yy2);
37: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
38: VecRestoreArrayRead(yy[0],&yy0);
39: VecRestoreArrayRead(yy[1],&yy1);
40: VecRestoreArrayRead(yy[2],&yy2);
41: z[0] = sum0;
42: z[1] = sum1;
43: z[2] = sum2;
44: break;
45: case 2:
46: VecGetArrayRead(yy[0],&yy0);
47: VecGetArrayRead(yy[1],&yy1);
48: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
49: VecRestoreArrayRead(yy[0],&yy0);
50: VecRestoreArrayRead(yy[1],&yy1);
51: z[0] = sum0;
52: z[1] = sum1;
53: break;
54: case 1:
55: VecGetArrayRead(yy[0],&yy0);
56: fortranmdot1_(x,yy0,&n,&sum0);
57: VecRestoreArrayRead(yy[0],&yy0);
58: z[0] = sum0;
59: break;
60: case 0:
61: break;
62: }
63: z += nv_rem;
64: i -= nv_rem;
65: yy += nv_rem;
67: while (i >0) {
68: sum0 = 0.;
69: sum1 = 0.;
70: sum2 = 0.;
71: sum3 = 0.;
72: VecGetArrayRead(yy[0],&yy0);
73: VecGetArrayRead(yy[1],&yy1);
74: VecGetArrayRead(yy[2],&yy2);
75: VecGetArrayRead(yy[3],&yy3);
76: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
77: VecRestoreArrayRead(yy[0],&yy0);
78: VecRestoreArrayRead(yy[1],&yy1);
79: VecRestoreArrayRead(yy[2],&yy2);
80: VecRestoreArrayRead(yy[3],&yy3);
81: yy += 4;
82: z[0] = sum0;
83: z[1] = sum1;
84: z[2] = sum2;
85: z[3] = sum3;
86: z += 4;
87: i -= 4;
88: }
89: VecRestoreArrayRead(xin,&x);
90: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
91: return(0);
92: }
94: #else
97: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
98: {
99: PetscErrorCode ierr;
100: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
101: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
102: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
103: Vec *yy;
106: sum0 = 0.;
107: sum1 = 0.;
108: sum2 = 0.;
110: i = nv;
111: nv_rem = nv&0x3;
112: yy = (Vec *)yin;
113: j = n;
114: VecGetArrayRead(xin,&xbase);
115: x = xbase;
117: switch (nv_rem) {
118: case 3:
119: VecGetArrayRead(yy[0],&yy0);
120: VecGetArrayRead(yy[1],&yy1);
121: VecGetArrayRead(yy[2],&yy2);
122: switch (j_rem=j&0x3) {
123: case 3:
124: x2 = x[2];
125: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
126: sum2 += x2*PetscConj(yy2[2]);
127: case 2:
128: x1 = x[1];
129: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
130: sum2 += x1*PetscConj(yy2[1]);
131: case 1:
132: x0 = x[0];
133: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
134: sum2 += x0*PetscConj(yy2[0]);
135: case 0:
136: x += j_rem;
137: yy0 += j_rem;
138: yy1 += j_rem;
139: yy2 += j_rem;
140: j -= j_rem;
141: break;
142: }
143: while (j>0) {
144: x0 = x[0];
145: x1 = x[1];
146: x2 = x[2];
147: x3 = x[3];
148: x += 4;
149:
150: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
151: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
152: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
153: j -= 4;
154: }
155: z[0] = sum0;
156: z[1] = sum1;
157: z[2] = sum2;
158: VecRestoreArrayRead(yy[0],&yy0);
159: VecRestoreArrayRead(yy[1],&yy1);
160: VecRestoreArrayRead(yy[2],&yy2);
161: break;
162: case 2:
163: VecGetArrayRead(yy[0],&yy0);
164: VecGetArrayRead(yy[1],&yy1);
165: switch (j_rem=j&0x3) {
166: case 3:
167: x2 = x[2];
168: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
169: case 2:
170: x1 = x[1];
171: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
172: case 1:
173: x0 = x[0];
174: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
175: case 0:
176: x += j_rem;
177: yy0 += j_rem;
178: yy1 += j_rem;
179: j -= j_rem;
180: break;
181: }
182: while (j>0) {
183: x0 = x[0];
184: x1 = x[1];
185: x2 = x[2];
186: x3 = x[3];
187: x += 4;
188:
189: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
190: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
191: j -= 4;
192: }
193: z[0] = sum0;
194: z[1] = sum1;
195:
196: VecRestoreArrayRead(yy[0],&yy0);
197: VecRestoreArrayRead(yy[1],&yy1);
198: break;
199: case 1:
200: VecGetArrayRead(yy[0],&yy0);
201: switch (j_rem=j&0x3) {
202: case 3:
203: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
204: case 2:
205: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
206: case 1:
207: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
208: case 0:
209: x += j_rem;
210: yy0 += j_rem;
211: j -= j_rem;
212: break;
213: }
214: while (j>0) {
215: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
216: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
217: yy0+=4;
218: j -= 4; x+=4;
219: }
220: z[0] = sum0;
222: VecRestoreArrayRead(yy[0],&yy0);
223: break;
224: case 0:
225: break;
226: }
227: z += nv_rem;
228: i -= nv_rem;
229: yy += nv_rem;
231: while (i >0) {
232: sum0 = 0.;
233: sum1 = 0.;
234: sum2 = 0.;
235: sum3 = 0.;
236: VecGetArrayRead(yy[0],&yy0);
237: VecGetArrayRead(yy[1],&yy1);
238: VecGetArrayRead(yy[2],&yy2);
239: VecGetArrayRead(yy[3],&yy3);
241: j = n;
242: x = xbase;
243: switch (j_rem=j&0x3) {
244: case 3:
245: x2 = x[2];
246: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
247: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
248: case 2:
249: x1 = x[1];
250: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
251: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
252: case 1:
253: x0 = x[0];
254: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
255: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
256: case 0:
257: x += j_rem;
258: yy0 += j_rem;
259: yy1 += j_rem;
260: yy2 += j_rem;
261: yy3 += j_rem;
262: j -= j_rem;
263: break;
264: }
265: while (j>0) {
266: x0 = x[0];
267: x1 = x[1];
268: x2 = x[2];
269: x3 = x[3];
270: x += 4;
271:
272: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
273: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
274: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
275: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
276: j -= 4;
277: }
278: z[0] = sum0;
279: z[1] = sum1;
280: z[2] = sum2;
281: z[3] = sum3;
282: z += 4;
283: i -= 4;
284: VecRestoreArrayRead(yy[0],&yy0);
285: VecRestoreArrayRead(yy[1],&yy1);
286: VecRestoreArrayRead(yy[2],&yy2);
287: VecRestoreArrayRead(yy[3],&yy3);
288: yy += 4;
289: }
290: VecRestoreArrayRead(xin,&xbase);
291: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
292: return(0);
293: }
294: #endif
296: /* ----------------------------------------------------------------------------*/
299: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
300: {
301: PetscErrorCode ierr;
302: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
303: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
304: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
305: Vec *yy;
306:
309: sum0 = 0.;
310: sum1 = 0.;
311: sum2 = 0.;
313: i = nv;
314: nv_rem = nv&0x3;
315: yy = (Vec*)yin;
316: j = n;
317: VecGetArrayRead(xin,&x);
319: switch (nv_rem) {
320: case 3:
321: VecGetArrayRead(yy[0],&yy0);
322: VecGetArrayRead(yy[1],&yy1);
323: VecGetArrayRead(yy[2],&yy2);
324: switch (j_rem=j&0x3) {
325: case 3:
326: x2 = x[2];
327: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
328: sum2 += x2*yy2[2];
329: case 2:
330: x1 = x[1];
331: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
332: sum2 += x1*yy2[1];
333: case 1:
334: x0 = x[0];
335: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
336: sum2 += x0*yy2[0];
337: case 0:
338: x += j_rem;
339: yy0 += j_rem;
340: yy1 += j_rem;
341: yy2 += j_rem;
342: j -= j_rem;
343: break;
344: }
345: while (j>0) {
346: x0 = x[0];
347: x1 = x[1];
348: x2 = x[2];
349: x3 = x[3];
350: x += 4;
351:
352: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
353: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
354: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
355: j -= 4;
356: }
357: z[0] = sum0;
358: z[1] = sum1;
359: z[2] = sum2;
360: VecRestoreArrayRead(yy[0],&yy0);
361: VecRestoreArrayRead(yy[1],&yy1);
362: VecRestoreArrayRead(yy[2],&yy2);
363: break;
364: case 2:
365: VecGetArrayRead(yy[0],&yy0);
366: VecGetArrayRead(yy[1],&yy1);
367: switch (j_rem=j&0x3) {
368: case 3:
369: x2 = x[2];
370: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
371: case 2:
372: x1 = x[1];
373: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
374: case 1:
375: x0 = x[0];
376: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
377: case 0:
378: x += j_rem;
379: yy0 += j_rem;
380: yy1 += j_rem;
381: j -= j_rem;
382: break;
383: }
384: while (j>0) {
385: x0 = x[0];
386: x1 = x[1];
387: x2 = x[2];
388: x3 = x[3];
389: x += 4;
390:
391: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
392: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
393: j -= 4;
394: }
395: z[0] = sum0;
396: z[1] = sum1;
397:
398: VecRestoreArrayRead(yy[0],&yy0);
399: VecRestoreArrayRead(yy[1],&yy1);
400: break;
401: case 1:
402: VecGetArrayRead(yy[0],&yy0);
403: switch (j_rem=j&0x3) {
404: case 3:
405: x2 = x[2]; sum0 += x2*yy0[2];
406: case 2:
407: x1 = x[1]; sum0 += x1*yy0[1];
408: case 1:
409: x0 = x[0]; sum0 += x0*yy0[0];
410: case 0:
411: x += j_rem;
412: yy0 += j_rem;
413: j -= j_rem;
414: break;
415: }
416: while (j>0) {
417: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
418: j -= 4; x+=4;
419: }
420: z[0] = sum0;
422: VecRestoreArrayRead(yy[0],&yy0);
423: break;
424: case 0:
425: break;
426: }
427: z += nv_rem;
428: i -= nv_rem;
429: yy += nv_rem;
431: while (i >0) {
432: sum0 = 0.;
433: sum1 = 0.;
434: sum2 = 0.;
435: sum3 = 0.;
436: VecGetArrayRead(yy[0],&yy0);
437: VecGetArrayRead(yy[1],&yy1);
438: VecGetArrayRead(yy[2],&yy2);
439: VecGetArrayRead(yy[3],&yy3);
441: j = n;
442: switch (j_rem=j&0x3) {
443: case 3:
444: x2 = x[2];
445: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
446: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
447: case 2:
448: x1 = x[1];
449: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
450: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
451: case 1:
452: x0 = x[0];
453: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
454: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
455: case 0:
456: x += j_rem;
457: yy0 += j_rem;
458: yy1 += j_rem;
459: yy2 += j_rem;
460: yy3 += j_rem;
461: j -= j_rem;
462: break;
463: }
464: while (j>0) {
465: x0 = x[0];
466: x1 = x[1];
467: x2 = x[2];
468: x3 = x[3];
469: x += 4;
470:
471: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
472: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
473: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
474: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
475: j -= 4;
476: }
477: z[0] = sum0;
478: z[1] = sum1;
479: z[2] = sum2;
480: z[3] = sum3;
481: z += 4;
482: i -= 4;
483: VecRestoreArrayRead(yy[0],&yy0);
484: VecRestoreArrayRead(yy[1],&yy1);
485: VecRestoreArrayRead(yy[2],&yy2);
486: VecRestoreArrayRead(yy[3],&yy3);
487: yy += 4;
488: }
489: VecRestoreArrayRead(xin,&x);
490: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
491: return(0);
492: }
493:
497: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
498: {
499: PetscInt i,j=0,n = xin->map->n;
500: PetscReal max,tmp;
501: const PetscScalar *xx;
502: PetscErrorCode ierr;
505: VecGetArrayRead(xin,&xx);
506: if (!n) {
507: max = PETSC_MIN_REAL;
508: j = -1;
509: } else {
510: #if defined(PETSC_USE_COMPLEX)
511: max = PetscRealPart(*xx++); j = 0;
512: #else
513: max = *xx++; j = 0;
514: #endif
515: for (i=1; i<n; i++) {
516: #if defined(PETSC_USE_COMPLEX)
517: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
518: #else
519: if ((tmp = *xx++) > max) { j = i; max = tmp; }
520: #endif
521: }
522: }
523: *z = max;
524: if (idx) *idx = j;
525: VecRestoreArrayRead(xin,&xx);
526: return(0);
527: }
531: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
532: {
533: PetscInt i,j=0,n = xin->map->n;
534: PetscReal min,tmp;
535: const PetscScalar *xx;
536: PetscErrorCode ierr;
539: VecGetArrayRead(xin,&xx);
540: if (!n) {
541: min = PETSC_MAX_REAL;
542: j = -1;
543: } else {
544: #if defined(PETSC_USE_COMPLEX)
545: min = PetscRealPart(*xx++); j = 0;
546: #else
547: min = *xx++; j = 0;
548: #endif
549: for (i=1; i<n; i++) {
550: #if defined(PETSC_USE_COMPLEX)
551: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
552: #else
553: if ((tmp = *xx++) < min) { j = i; min = tmp; }
554: #endif
555: }
556: }
557: *z = min;
558: if (idx) *idx = j;
559: VecGetArrayRead(xin,&xx);
560: return(0);
561: }
563: #if defined(PETSC_THREADCOMM_ACTIVE)
564: PetscErrorCode VecSet_kernel(PetscInt thread_id,Vec xin,PetscScalar *alpha_p)
565: {
567: PetscScalar *xx;
568: PetscInt i,start,end;
569: PetscInt *trstarts=xin->map->trstarts;
570: PetscScalar alpha = *alpha_p;
572: start = trstarts[thread_id];
573: end = trstarts[thread_id+1];
574: VecGetArray(xin,&xx);
575: if (alpha == (PetscScalar)0.0) {
576: PetscMemzero(xx+start,(end-start)*sizeof(PetscScalar));
577: } else {
578: for (i=start;i<end;i++) xx[i] = alpha;
579: }
580: VecRestoreArray(xin,&xx);
581: return 0;
582: }
586: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
587: {
589: PetscScalar *scalar;
592: PetscThreadCommGetScalars(((PetscObject)xin)->comm,&scalar,PETSC_NULL,PETSC_NULL);
593: *scalar = alpha;
594: PetscThreadCommRunKernel(((PetscObject)xin)->comm,(PetscThreadKernel)VecSet_kernel,2,xin,scalar);
595: return(0);
596: }
597: #else
600: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
601: {
602: PetscInt i,n = xin->map->n;
603: PetscScalar *xx;
607: VecGetArray(xin,&xx);
608: if (alpha == (PetscScalar)0.0) {
609: PetscMemzero(xx,n*sizeof(PetscScalar));
610: } else {
611: for (i=0; i<n; i++) xx[i] = alpha;
612: }
613: VecRestoreArray(xin,&xx);
614: return(0);
615: }
616: #endif
620: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
621: {
622: PetscErrorCode ierr;
623: PetscInt n = xin->map->n,j,j_rem;
624: const PetscScalar *yy0,*yy1,*yy2,*yy3;
625: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
627: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
628: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
629: #endif
632: PetscLogFlops(nv*2.0*n);
633: VecGetArray(xin,&xx);
634: switch (j_rem=nv&0x3) {
635: case 3:
636: VecGetArrayRead(y[0],&yy0);
637: VecGetArrayRead(y[1],&yy1);
638: VecGetArrayRead(y[2],&yy2);
639: alpha0 = alpha[0];
640: alpha1 = alpha[1];
641: alpha2 = alpha[2];
642: alpha += 3;
643: PetscAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
644: VecRestoreArrayRead(y[0],&yy0);
645: VecRestoreArrayRead(y[1],&yy1);
646: VecRestoreArrayRead(y[2],&yy2);
647: y += 3;
648: break;
649: case 2:
650: VecGetArrayRead(y[0],&yy0);
651: VecGetArrayRead(y[1],&yy1);
652: alpha0 = alpha[0];
653: alpha1 = alpha[1];
654: alpha +=2;
655: PetscAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
656: VecRestoreArrayRead(y[0],&yy0);
657: VecRestoreArrayRead(y[1],&yy1);
658: y +=2;
659: break;
660: case 1:
661: VecGetArrayRead(y[0],&yy0);
662: alpha0 = *alpha++;
663: PetscAXPY(xx,alpha0,yy0,n);
664: VecRestoreArrayRead(y[0],&yy0);
665: y +=1;
666: break;
667: }
668: for (j=j_rem; j<nv; j+=4) {
669: VecGetArrayRead(y[0],&yy0);
670: VecGetArrayRead(y[1],&yy1);
671: VecGetArrayRead(y[2],&yy2);
672: VecGetArrayRead(y[3],&yy3);
673: alpha0 = alpha[0];
674: alpha1 = alpha[1];
675: alpha2 = alpha[2];
676: alpha3 = alpha[3];
677: alpha += 4;
679: PetscAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
680: VecRestoreArrayRead(y[0],&yy0);
681: VecRestoreArrayRead(y[1],&yy1);
682: VecRestoreArrayRead(y[2],&yy2);
683: VecRestoreArrayRead(y[3],&yy3);
684: y += 4;
685: }
686: VecRestoreArray(xin,&xx);
687: return(0);
688: }
690: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
693: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
694: {
695: PetscErrorCode ierr;
696: PetscInt n = yin->map->n;
697: PetscScalar *yy;
698: const PetscScalar *xx;
701: if (alpha == (PetscScalar)0.0) {
702: VecCopy(xin,yin);
703: } else if (alpha == (PetscScalar)1.0) {
704: VecAXPY_Seq(yin,alpha,xin);
705: } else if (alpha == (PetscScalar)-1.0) {
706: PetscInt i;
707: VecGetArrayRead(xin,&xx);
708: VecGetArray(yin,&yy);
709: for (i=0; i<n; i++) {
710: yy[i] = xx[i] - yy[i];
711: }
712: VecRestoreArrayRead(xin,&xx);
713: VecRestoreArray(yin,&yy);
714: PetscLogFlops(1.0*n);
715: } else {
716: VecGetArrayRead(xin,&xx);
717: VecGetArray(yin,&yy);
718: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
719: {
720: PetscScalar oalpha = alpha;
721: fortranaypx_(&n,&oalpha,xx,yy);
722: }
723: #else
724: {
725: PetscInt i;
726: for (i=0; i<n; i++) {
727: yy[i] = xx[i] + alpha*yy[i];
728: }
729: }
730: #endif
731: VecRestoreArrayRead(xin,&xx);
732: VecRestoreArray(yin,&yy);
733: PetscLogFlops(2.0*n);
734: }
735: return(0);
736: }
738: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
739: /*
740: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
741: to be slower than a regular C loop. Hence,we do not include it.
742: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
743: */
747: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
748: {
749: PetscErrorCode ierr;
750: PetscInt i,n = win->map->n;
751: PetscScalar *ww;
752: const PetscScalar *yy,*xx;
755: VecGetArrayRead(xin,&xx);
756: VecGetArrayRead(yin,&yy);
757: VecGetArray(win,&ww);
758: if (alpha == (PetscScalar)1.0) {
759: PetscLogFlops(n);
760: /* could call BLAS axpy after call to memcopy, but may be slower */
761: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
762: } else if (alpha == (PetscScalar)-1.0) {
763: PetscLogFlops(n);
764: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
765: } else if (alpha == (PetscScalar)0.0) {
766: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
767: } else {
768: PetscScalar oalpha = alpha;
769: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
770: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
771: #else
772: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
773: #endif
774: PetscLogFlops(2.0*n);
775: }
776: VecRestoreArrayRead(xin,&xx);
777: VecRestoreArrayRead(yin,&yy);
778: VecRestoreArray(win,&ww);
779: return(0);
780: }
785: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
786: {
787: PetscErrorCode ierr;
788: PetscInt n = xin->map->n,i;
789: const PetscScalar *xx,*yy;
790: PetscReal m = 0.0;
793: VecGetArrayRead(xin,&xx);
794: VecGetArrayRead(yin,&yy);
795: for(i = 0; i < n; i++) {
796: if (yy[i] != (PetscScalar)0.0) {
797: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
798: } else {
799: m = PetscMax(PetscAbsScalar(xx[i]), m);
800: }
801: }
802: VecRestoreArrayRead(xin,&xx);
803: VecRestoreArrayRead(yin,&yy);
804: MPI_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
805: PetscLogFlops(n);
806: return(0);
807: }
811: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
812: {
813: Vec_Seq *v = (Vec_Seq *)vin->data;
816: if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
817: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
818: v->array = (PetscScalar *)a;
819: return(0);
820: }
824: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
825: {
826: Vec_Seq *v = (Vec_Seq *)vin->data;
830: PetscFree(v->array_allocated);
831: v->array_allocated = v->array = (PetscScalar *)a;
832: return(0);
833: }
835: