Actual source code: dvec2.c
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include src/vec/impls/dvecimpl.h
7: #include src/inline/dot.h
8: #include src/inline/setval.h
9: #include src/inline/axpy.h
11: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
14: PetscErrorCode VecMDot_Seq(PetscInt nv,Vec xin,const Vec yin[],PetscScalar *z)
15: {
16: Vec_Seq *xv = (Vec_Seq *)xin->data;
18: PetscInt i,nv_rem,n = xin->n;
19: PetscScalar sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,*x;
20: Vec *yy;
23: sum0 = 0;
24: sum1 = 0;
25: sum2 = 0;
27: i = nv;
28: nv_rem = nv&0x3;
29: yy = (Vec*)yin;
30: x = xv->array;
32: switch (nv_rem) {
33: case 3:
34: VecGetArray(yy[0],&yy0);
35: VecGetArray(yy[1],&yy1);
36: VecGetArray(yy[2],&yy2);
37: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
38: VecRestoreArray(yy[0],&yy0);
39: VecRestoreArray(yy[1],&yy1);
40: VecRestoreArray(yy[2],&yy2);
41: z[0] = sum0;
42: z[1] = sum1;
43: z[2] = sum2;
44: break;
45: case 2:
46: VecGetArray(yy[0],&yy0);
47: VecGetArray(yy[1],&yy1);
48: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
49: VecRestoreArray(yy[0],&yy0);
50: VecRestoreArray(yy[1],&yy1);
51: z[0] = sum0;
52: z[1] = sum1;
53: break;
54: case 1:
55: VecGetArray(yy[0],&yy0);
56: fortranmdot1_(x,yy0,&n,&sum0);
57: VecRestoreArray(yy[0],&yy0);
58: z[0] = sum0;
59: break;
60: case 0:
61: break;
62: }
63: z += nv_rem;
64: i -= nv_rem;
65: yy += nv_rem;
67: while (i >0) {
68: sum0 = 0;
69: sum1 = 0;
70: sum2 = 0;
71: sum3 = 0;
72: VecGetArray(yy[0],&yy0);
73: VecGetArray(yy[1],&yy1);
74: VecGetArray(yy[2],&yy2);
75: VecGetArray(yy[3],&yy3);
76: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
77: VecRestoreArray(yy[0],&yy0);
78: VecRestoreArray(yy[1],&yy1);
79: VecRestoreArray(yy[2],&yy2);
80: VecRestoreArray(yy[3],&yy3);
81: yy += 4;
82: z[0] = sum0;
83: z[1] = sum1;
84: z[2] = sum2;
85: z[3] = sum3;
86: z += 4;
87: i -= 4;
88: }
89: PetscLogFlops(nv*(2*xin->n-1));
90: return(0);
91: }
93: #else
96: PetscErrorCode VecMDot_Seq(PetscInt nv,Vec xin,const Vec yin[],PetscScalar * restrict z)
97: {
98: Vec_Seq *xv = (Vec_Seq *)xin->data;
100: PetscInt n = xin->n,i,j,nv_rem,j_rem;
101: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3,* restrict x;
102: PetscScalar * restrict yy0,* restrict yy1,* restrict yy2,*restrict yy3;
103: Vec *yy;
106: sum0 = 0;
107: sum1 = 0;
108: sum2 = 0;
110: i = nv;
111: nv_rem = nv&0x3;
112: yy = (Vec *)yin;
113: j = n;
114: x = xv->array;
116: switch (nv_rem) {
117: case 3:
118: VecGetArray(yy[0],&yy0);
119: VecGetArray(yy[1],&yy1);
120: VecGetArray(yy[2],&yy2);
121: switch (j_rem=j&0x3) {
122: case 3:
123: x2 = x[2];
124: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
125: sum2 += x2*PetscConj(yy2[2]);
126: case 2:
127: x1 = x[1];
128: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
129: sum2 += x1*PetscConj(yy2[1]);
130: case 1:
131: x0 = x[0];
132: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
133: sum2 += x0*PetscConj(yy2[0]);
134: case 0:
135: x += j_rem;
136: yy0 += j_rem;
137: yy1 += j_rem;
138: yy2 += j_rem;
139: j -= j_rem;
140: break;
141: }
142: while (j>0) {
143: x0 = x[0];
144: x1 = x[1];
145: x2 = x[2];
146: x3 = x[3];
147: x += 4;
148:
149: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
150: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
151: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
152: j -= 4;
153: }
154: z[0] = sum0;
155: z[1] = sum1;
156: z[2] = sum2;
157: VecRestoreArray(yy[0],&yy0);
158: VecRestoreArray(yy[1],&yy1);
159: VecRestoreArray(yy[2],&yy2);
160: break;
161: case 2:
162: VecGetArray(yy[0],&yy0);
163: VecGetArray(yy[1],&yy1);
164: switch (j_rem=j&0x3) {
165: case 3:
166: x2 = x[2];
167: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
168: case 2:
169: x1 = x[1];
170: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
171: case 1:
172: x0 = x[0];
173: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
174: case 0:
175: x += j_rem;
176: yy0 += j_rem;
177: yy1 += j_rem;
178: j -= j_rem;
179: break;
180: }
181: while (j>0) {
182: x0 = x[0];
183: x1 = x[1];
184: x2 = x[2];
185: x3 = x[3];
186: x += 4;
187:
188: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
189: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
190: j -= 4;
191: }
192: z[0] = sum0;
193: z[1] = sum1;
194:
195: VecRestoreArray(yy[0],&yy0);
196: VecRestoreArray(yy[1],&yy1);
197: break;
198: case 1:
199: VecGetArray(yy[0],&yy0);
200: switch (j_rem=j&0x3) {
201: case 3:
202: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
203: case 2:
204: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
205: case 1:
206: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
207: case 0:
208: x += j_rem;
209: yy0 += j_rem;
210: j -= j_rem;
211: break;
212: }
213: while (j>0) {
214: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
215: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
216: yy0+=4;
217: j -= 4; x+=4;
218: }
219: z[0] = sum0;
221: VecRestoreArray(yy[0],&yy0);
222: break;
223: case 0:
224: break;
225: }
226: z += nv_rem;
227: i -= nv_rem;
228: yy += nv_rem;
230: while (i >0) {
231: sum0 = 0;
232: sum1 = 0;
233: sum2 = 0;
234: sum3 = 0;
235: VecGetArray(yy[0],&yy0);
236: VecGetArray(yy[1],&yy1);
237: VecGetArray(yy[2],&yy2);
238: VecGetArray(yy[3],&yy3);
240: j = n;
241: x = xv->array;
242: switch (j_rem=j&0x3) {
243: case 3:
244: x2 = x[2];
245: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
246: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
247: case 2:
248: x1 = x[1];
249: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
250: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
251: case 1:
252: x0 = x[0];
253: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
254: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
255: case 0:
256: x += j_rem;
257: yy0 += j_rem;
258: yy1 += j_rem;
259: yy2 += j_rem;
260: yy3 += j_rem;
261: j -= j_rem;
262: break;
263: }
264: while (j>0) {
265: x0 = x[0];
266: x1 = x[1];
267: x2 = x[2];
268: x3 = x[3];
269: x += 4;
270:
271: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
272: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
273: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
274: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
275: j -= 4;
276: }
277: z[0] = sum0;
278: z[1] = sum1;
279: z[2] = sum2;
280: z[3] = sum3;
281: z += 4;
282: i -= 4;
283: VecRestoreArray(yy[0],&yy0);
284: VecRestoreArray(yy[1],&yy1);
285: VecRestoreArray(yy[2],&yy2);
286: VecRestoreArray(yy[3],&yy3);
287: yy += 4;
288: }
289: PetscLogFlops(nv*(2*xin->n-1));
290: return(0);
291: }
292: #endif
294: /* ----------------------------------------------------------------------------*/
297: PetscErrorCode VecMTDot_Seq(PetscInt nv,Vec xin,const Vec yin[],PetscScalar *z)
298: {
299: Vec_Seq *xv = (Vec_Seq *)xin->data;
301: PetscInt n = xin->n,i,j,nv_rem,j_rem;
302: PetscScalar sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,x0,x1,x2,x3,*x;
303: Vec *yy;
304:
307: sum0 = 0;
308: sum1 = 0;
309: sum2 = 0;
311: i = nv;
312: nv_rem = nv&0x3;
313: yy = (Vec*)yin;
314: j = n;
315: x = xv->array;
317: switch (nv_rem) {
318: case 3:
319: VecGetArray(yy[0],&yy0);
320: VecGetArray(yy[1],&yy1);
321: VecGetArray(yy[2],&yy2);
322: switch (j_rem=j&0x3) {
323: case 3:
324: x2 = x[2];
325: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
326: sum2 += x2*yy2[2];
327: case 2:
328: x1 = x[1];
329: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
330: sum2 += x1*yy2[1];
331: case 1:
332: x0 = x[0];
333: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
334: sum2 += x0*yy2[0];
335: case 0:
336: x += j_rem;
337: yy0 += j_rem;
338: yy1 += j_rem;
339: yy2 += j_rem;
340: j -= j_rem;
341: break;
342: }
343: while (j>0) {
344: x0 = x[0];
345: x1 = x[1];
346: x2 = x[2];
347: x3 = x[3];
348: x += 4;
349:
350: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
351: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
352: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
353: j -= 4;
354: }
355: z[0] = sum0;
356: z[1] = sum1;
357: z[2] = sum2;
358: VecRestoreArray(yy[0],&yy0);
359: VecRestoreArray(yy[1],&yy1);
360: VecRestoreArray(yy[2],&yy2);
361: break;
362: case 2:
363: VecGetArray(yy[0],&yy0);
364: VecGetArray(yy[1],&yy1);
365: switch (j_rem=j&0x3) {
366: case 3:
367: x2 = x[2];
368: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
369: case 2:
370: x1 = x[1];
371: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
372: case 1:
373: x0 = x[0];
374: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
375: case 0:
376: x += j_rem;
377: yy0 += j_rem;
378: yy1 += j_rem;
379: j -= j_rem;
380: break;
381: }
382: while (j>0) {
383: x0 = x[0];
384: x1 = x[1];
385: x2 = x[2];
386: x3 = x[3];
387: x += 4;
388:
389: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
390: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
391: j -= 4;
392: }
393: z[0] = sum0;
394: z[1] = sum1;
395:
396: VecRestoreArray(yy[0],&yy0);
397: VecRestoreArray(yy[1],&yy1);
398: break;
399: case 1:
400: VecGetArray(yy[0],&yy0);
401: switch (j_rem=j&0x3) {
402: case 3:
403: x2 = x[2]; sum0 += x2*yy0[2];
404: case 2:
405: x1 = x[1]; sum0 += x1*yy0[1];
406: case 1:
407: x0 = x[0]; sum0 += x0*yy0[0];
408: case 0:
409: x += j_rem;
410: yy0 += j_rem;
411: j -= j_rem;
412: break;
413: }
414: while (j>0) {
415: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
416: j -= 4; x+=4;
417: }
418: z[0] = sum0;
420: VecRestoreArray(yy[0],&yy0);
421: break;
422: case 0:
423: break;
424: }
425: z += nv_rem;
426: i -= nv_rem;
427: yy += nv_rem;
429: while (i >0) {
430: sum0 = 0;
431: sum1 = 0;
432: sum2 = 0;
433: sum3 = 0;
434: VecGetArray(yy[0],&yy0);
435: VecGetArray(yy[1],&yy1);
436: VecGetArray(yy[2],&yy2);
437: VecGetArray(yy[3],&yy3);
439: j = n;
440: x = xv->array;
441: switch (j_rem=j&0x3) {
442: case 3:
443: x2 = x[2];
444: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
445: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
446: case 2:
447: x1 = x[1];
448: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
449: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
450: case 1:
451: x0 = x[0];
452: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
453: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
454: case 0:
455: x += j_rem;
456: yy0 += j_rem;
457: yy1 += j_rem;
458: yy2 += j_rem;
459: yy3 += j_rem;
460: j -= j_rem;
461: break;
462: }
463: while (j>0) {
464: x0 = x[0];
465: x1 = x[1];
466: x2 = x[2];
467: x3 = x[3];
468: x += 4;
469:
470: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
471: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
472: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
473: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
474: j -= 4;
475: }
476: z[0] = sum0;
477: z[1] = sum1;
478: z[2] = sum2;
479: z[3] = sum3;
480: z += 4;
481: i -= 4;
482: VecRestoreArray(yy[0],&yy0);
483: VecRestoreArray(yy[1],&yy1);
484: VecRestoreArray(yy[2],&yy2);
485: VecRestoreArray(yy[3],&yy3);
486: yy += 4;
487: }
488: PetscLogFlops(nv*(2*xin->n-1));
489: return(0);
490: }
491:
495: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
496: {
497: Vec_Seq *x = (Vec_Seq*)xin->data;
498: PetscInt i,j=0,n = xin->n;
499: PetscReal max,tmp;
500: PetscScalar *xx = x->array;
503: if (!n) {
504: max = PETSC_MIN;
505: j = -1;
506: } else {
507: #if defined(PETSC_USE_COMPLEX)
508: max = PetscRealPart(*xx++); j = 0;
509: #else
510: max = *xx++; j = 0;
511: #endif
512: for (i=1; i<n; i++) {
513: #if defined(PETSC_USE_COMPLEX)
514: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
515: #else
516: if ((tmp = *xx++) > max) { j = i; max = tmp; }
517: #endif
518: }
519: }
520: *z = max;
521: if (idx) *idx = j;
522: return(0);
523: }
527: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
528: {
529: Vec_Seq *x = (Vec_Seq*)xin->data;
530: PetscInt i,j=0,n = xin->n;
531: PetscReal min,tmp;
532: PetscScalar *xx = x->array;
535: if (!n) {
536: min = PETSC_MAX;
537: j = -1;
538: } else {
539: #if defined(PETSC_USE_COMPLEX)
540: min = PetscRealPart(*xx++); j = 0;
541: #else
542: min = *xx++; j = 0;
543: #endif
544: for (i=1; i<n; i++) {
545: #if defined(PETSC_USE_COMPLEX)
546: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
547: #else
548: if ((tmp = *xx++) < min) { j = i; min = tmp; }
549: #endif
550: }
551: }
552: *z = min;
553: if (idx) *idx = j;
554: return(0);
555: }
559: PetscErrorCode VecSet_Seq(const PetscScalar* alpha,Vec xin)
560: {
561: Vec_Seq *x = (Vec_Seq *)xin->data;
563: PetscInt n = xin->n;
564: PetscScalar *xx = x->array,oalpha = *alpha;
567: if (oalpha == 0.0) {
568: PetscMemzero(xx,n*sizeof(PetscScalar));
569: }
570: else {
571: SET(xx,n,oalpha);
572: }
573: return(0);
574: }
578: PetscErrorCode VecSetRandom_Seq(PetscRandom r,Vec xin)
579: {
581: PetscInt n = xin->n,i;
582: PetscScalar *xx;
585: VecGetArray(xin,&xx);
586: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
587: VecRestoreArray(xin,&xx);
588: return(0);
589: }
593: PetscErrorCode VecMAXPY_Seq(PetscInt nv,const PetscScalar *alpha,Vec xin,Vec *y)
594: {
595: Vec_Seq *xdata = (Vec_Seq*)xin->data;
597: PetscInt n = xin->n,j,j_rem;
598: PetscScalar *xx,*yy0,*yy1,*yy2,*yy3,alpha0,alpha1,alpha2,alpha3;
600: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
601: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
602: #endif
605: PetscLogFlops(nv*2*n);
607: xx = xdata->array;
608: switch (j_rem=nv&0x3) {
609: case 3:
610: VecGetArray(y[0],&yy0);
611: VecGetArray(y[1],&yy1);
612: VecGetArray(y[2],&yy2);
613: alpha0 = alpha[0];
614: alpha1 = alpha[1];
615: alpha2 = alpha[2];
616: alpha += 3;
617: APXY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
618: VecRestoreArray(y[0],&yy0);
619: VecRestoreArray(y[1],&yy1);
620: VecRestoreArray(y[2],&yy2);
621: y += 3;
622: break;
623: case 2:
624: VecGetArray(y[0],&yy0);
625: VecGetArray(y[1],&yy1);
626: alpha0 = alpha[0];
627: alpha1 = alpha[1];
628: alpha +=2;
629: APXY2(xx,alpha0,alpha1,yy0,yy1,n);
630: VecRestoreArray(y[0],&yy0);
631: VecRestoreArray(y[1],&yy1);
632: y +=2;
633: break;
634: case 1:
635: VecGetArray(y[0],&yy0);
636: alpha0 = *alpha++; APXY(xx,alpha0,yy0,n);
637: VecRestoreArray(y[0],&yy0);
638: y +=1;
639: break;
640: }
641: for (j=j_rem; j<nv; j+=4) {
642: VecGetArray(y[0],&yy0);
643: VecGetArray(y[1],&yy1);
644: VecGetArray(y[2],&yy2);
645: VecGetArray(y[3],&yy3);
646: alpha0 = alpha[0];
647: alpha1 = alpha[1];
648: alpha2 = alpha[2];
649: alpha3 = alpha[3];
650: alpha += 4;
652: APXY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
653: VecRestoreArray(y[0],&yy0);
654: VecRestoreArray(y[1],&yy1);
655: VecRestoreArray(y[2],&yy2);
656: VecRestoreArray(y[3],&yy3);
657: y += 4;
658: }
659: return(0);
660: }
664: PetscErrorCode VecAYPX_Seq(const PetscScalar *alpha,Vec xin,Vec yin)
665: {
666: Vec_Seq *x = (Vec_Seq *)xin->data;
668: PetscInt n = xin->n;
669: PetscScalar *xx = x->array,*yy;
672: VecGetArray(yin,&yy);
673: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
674: fortranaypx_(&n,alpha,xx,yy);
675: #else
676: {
677: PetscInt i;
678: PetscScalar oalpha = *alpha;
679: for (i=0; i<n; i++) {
680: yy[i] = xx[i] + oalpha*yy[i];
681: }
682: }
683: #endif
684: VecRestoreArray(yin,&yy);
685: PetscLogFlops(2*n);
686: return(0);
687: }
689: /*
690: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
691: to be slower than a regular C loop. Hence,we do not include it.
692: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
693: */
697: PetscErrorCode VecWAXPY_Seq(const PetscScalar* alpha,Vec xin,Vec yin,Vec win)
698: {
699: Vec_Seq *x = (Vec_Seq *)xin->data;
701: PetscInt i,n = xin->n;
702: PetscScalar *xx = x->array,*yy,*ww,oalpha = *alpha;
705: VecGetArray(yin,&yy);
706: VecGetArray(win,&ww);
707: if (oalpha == 1.0) {
708: PetscLogFlops(n);
709: /* could call BLAS axpy after call to memcopy, but may be slower */
710: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
711: } else if (oalpha == -1.0) {
712: PetscLogFlops(n);
713: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
714: } else if (oalpha == 0.0) {
715: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
716: } else {
717: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
718: fortranwaxpy_(&n,alpha,xx,yy,ww);
719: #else
720: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
721: #endif
722: PetscLogFlops(2*n);
723: }
724: VecRestoreArray(yin,&yy);
725: VecRestoreArray(win,&ww);
726: return(0);
727: }
731: PetscErrorCode VecPointwiseMult_Seq(Vec xin,Vec yin,Vec win)
732: {
733: Vec_Seq *x = (Vec_Seq *)xin->data;
735: PetscInt n = xin->n,i;
736: PetscScalar *xx = x->array,*yy,*ww;
739: VecGetArray(yin,&yy);
740: if (yin != win) {VecGetArray(win,&ww);}
741: else ww = yy;
743: if (ww == xx) {
744: for (i=0; i<n; i++) ww[i] *= yy[i];
745: } else if (ww == yy) {
746: for (i=0; i<n; i++) ww[i] *= xx[i];
747: } else {
748: /* This was suppose to help on SGI but didn't really seem to
749: PetscReal * __restrict www = ww;
750: PetscReal * __restrict yyy = yy;
751: PetscReal * __restrict xxx = xx;
752: for (i=0; i<n; i++) www[i] = xxx[i] * yyy[i];
753: */
754: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
755: fortranxtimesy_(xx,yy,ww,&n);
756: #else
757: for (i=0; i<n; i++) ww[i] = xx[i] * yy[i];
758: #endif
759: }
760: VecRestoreArray(yin,&yy);
761: if (yin != win) {VecRestoreArray(win,&ww);}
762: PetscLogFlops(n);
763: return(0);
764: }
768: PetscErrorCode VecPointwiseDivide_Seq(Vec xin,Vec yin,Vec win)
769: {
770: Vec_Seq *x = (Vec_Seq *)xin->data;
772: PetscInt n = xin->n,i;
773: PetscScalar *xx = x->array,*yy,*ww;
776: VecGetArray(yin,&yy);
777: if (yin != win) {VecGetArray(win,&ww);}
778: else {ww = yy;}
779: for (i=0; i<n; i++) ww[i] = xx[i] / yy[i];
780: VecRestoreArray(yin,&yy);
781: if (yin != win) {VecRestoreArray(win,&ww);}
782: PetscLogFlops(n);
783: return(0);
784: }
788: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
789: {
790: Vec_Seq *x = (Vec_Seq *)xin->data;
792: PetscInt n = xin->n,i;
793: PetscScalar *xx = x->array,*yy;
794: PetscReal m = 0.0;
797: VecGetArray(yin,&yy);
798: for(i = 0; i < n; i++) {
799: if (yy[i] != 0.0) {
800: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
801: } else {
802: m = PetscMax(PetscAbsScalar(xx[i]), m);
803: }
804: }
805: MPI_Allreduce(&m,max,1,MPIU_REAL,MPI_MAX,xin->comm);
806: VecRestoreArray(yin,&yy);
807: PetscLogFlops(n);
808: return(0);
809: }
813: PetscErrorCode VecGetArray_Seq(Vec vin,PetscScalar *a[])
814: {
815: Vec_Seq *v = (Vec_Seq *)vin->data;
819: if (vin->array_gotten) {
820: SETERRQ(PETSC_ERR_ORDER,"Array has already been gotten for this vector,you may\n\
821: have forgotten a call to VecRestoreArray()");
822: }
823: vin->array_gotten = PETSC_TRUE;
825: *a = v->array;
826: PetscObjectTakeAccess(vin);
827: return(0);
828: }
832: PetscErrorCode VecRestoreArray_Seq(Vec vin,PetscScalar *a[])
833: {
837: if (!vin->array_gotten) {
838: SETERRQ(PETSC_ERR_ORDER,"Array has not been gotten for this vector, you may\n\
839: have forgotten a call to VecGetArray()");
840: }
841: vin->array_gotten = PETSC_FALSE;
842: if (a) *a = 0; /* now user cannot accidently use it again */
844: PetscObjectGrantAccess(vin);
845: return(0);
846: }
850: PetscErrorCode VecResetArray_Seq(Vec vin)
851: {
852: Vec_Seq *v = (Vec_Seq *)vin->data;
855: v->array = v->array_allocated;
856: return(0);
857: }
861: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
862: {
863: Vec_Seq *v = (Vec_Seq *)vin->data;
866: v->array = (PetscScalar *)a;
867: return(0);
868: }
872: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
873: {
874: Vec_Seq *v = (Vec_Seq *)vin->data;
878: if (v->array_allocated) {PetscFree(v->array_allocated);}
879: v->array_allocated = v->array = (PetscScalar *)a;
880: return(0);
881: }
885: PetscErrorCode VecGetSize_Seq(Vec vin,PetscInt *size)
886: {
888: *size = vin->n;
889: return(0);
890: }
894: PetscErrorCode VecConjugate_Seq(Vec xin)
895: {
896: PetscScalar *x = ((Vec_Seq *)xin->data)->array;
897: PetscInt n = xin->n;
900: while (n-->0) {
901: *x = PetscConj(*x);
902: x++;
903: }
904: return(0);
905: }
906: