Actual source code: dvec2.c

petsc-3.3-p5 2012-12-01
  2: /* 
  3:    Defines some vector operation functions that are shared by 
  4:   sequential and parallel vectors.
  5: */
  6: #include <../src/vec/vec/impls/dvecimpl.h>   
  7: #include <petsc-private/petscaxpy.h>
  8: #include <petscthreadcomm.h>

 10: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 11: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 14: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 15: {
 16:   PetscErrorCode    ierr;
 17:   PetscInt          i,nv_rem,n = xin->map->n;
 18:   PetscScalar       sum0,sum1,sum2,sum3;
 19:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
 20:   Vec               *yy;

 23:   sum0 = 0.0;
 24:   sum1 = 0.0;
 25:   sum2 = 0.0;

 27:   i      = nv;
 28:   nv_rem = nv&0x3;
 29:   yy     = (Vec*)yin;
 30:   VecGetArrayRead(xin,&x);

 32:   switch (nv_rem) {
 33:   case 3:
 34:     VecGetArrayRead(yy[0],&yy0);
 35:     VecGetArrayRead(yy[1],&yy1);
 36:     VecGetArrayRead(yy[2],&yy2);
 37:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 38:     VecRestoreArrayRead(yy[0],&yy0);
 39:     VecRestoreArrayRead(yy[1],&yy1);
 40:     VecRestoreArrayRead(yy[2],&yy2);
 41:     z[0] = sum0;
 42:     z[1] = sum1;
 43:     z[2] = sum2;
 44:     break;
 45:   case 2:
 46:     VecGetArrayRead(yy[0],&yy0);
 47:     VecGetArrayRead(yy[1],&yy1);
 48:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 49:     VecRestoreArrayRead(yy[0],&yy0);
 50:     VecRestoreArrayRead(yy[1],&yy1);
 51:     z[0] = sum0;
 52:     z[1] = sum1;
 53:     break;
 54:   case 1:
 55:     VecGetArrayRead(yy[0],&yy0);
 56:     fortranmdot1_(x,yy0,&n,&sum0);
 57:     VecRestoreArrayRead(yy[0],&yy0);
 58:     z[0] = sum0;
 59:     break;
 60:   case 0:
 61:     break;
 62:   }
 63:   z  += nv_rem;
 64:   i  -= nv_rem;
 65:   yy += nv_rem;

 67:   while (i >0) {
 68:     sum0 = 0.;
 69:     sum1 = 0.;
 70:     sum2 = 0.;
 71:     sum3 = 0.;
 72:     VecGetArrayRead(yy[0],&yy0);
 73:     VecGetArrayRead(yy[1],&yy1);
 74:     VecGetArrayRead(yy[2],&yy2);
 75:     VecGetArrayRead(yy[3],&yy3);
 76:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 77:     VecRestoreArrayRead(yy[0],&yy0);
 78:     VecRestoreArrayRead(yy[1],&yy1);
 79:     VecRestoreArrayRead(yy[2],&yy2);
 80:     VecRestoreArrayRead(yy[3],&yy3);
 81:     yy  += 4;
 82:     z[0] = sum0;
 83:     z[1] = sum1;
 84:     z[2] = sum2;
 85:     z[3] = sum3;
 86:     z   += 4;
 87:     i   -= 4;
 88:   }
 89:   VecRestoreArrayRead(xin,&x);
 90:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
 91:   return(0);
 92: }

 94: #else
 97: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 98: {
 99:   PetscErrorCode    ierr;
100:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
101:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
102:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
103:   Vec               *yy;

106:   sum0 = 0.;
107:   sum1 = 0.;
108:   sum2 = 0.;

110:   i      = nv;
111:   nv_rem = nv&0x3;
112:   yy     = (Vec *)yin;
113:   j      = n;
114:   VecGetArrayRead(xin,&xbase);
115:   x      = xbase;

117:   switch (nv_rem) {
118:   case 3:
119:     VecGetArrayRead(yy[0],&yy0);
120:     VecGetArrayRead(yy[1],&yy1);
121:     VecGetArrayRead(yy[2],&yy2);
122:     switch (j_rem=j&0x3) {
123:     case 3:
124:       x2 = x[2];
125:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
126:       sum2 += x2*PetscConj(yy2[2]);
127:     case 2:
128:       x1 = x[1];
129:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
130:       sum2 += x1*PetscConj(yy2[1]);
131:     case 1:
132:       x0 = x[0];
133:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
134:       sum2 += x0*PetscConj(yy2[0]);
135:     case 0:
136:       x   += j_rem;
137:       yy0 += j_rem;
138:       yy1 += j_rem;
139:       yy2 += j_rem;
140:       j   -= j_rem;
141:       break;
142:     }
143:     while (j>0) {
144:       x0 = x[0];
145:       x1 = x[1];
146:       x2 = x[2];
147:       x3 = x[3];
148:       x += 4;
149: 
150:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
151:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
152:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
153:       j -= 4;
154:     }
155:     z[0] = sum0;
156:     z[1] = sum1;
157:     z[2] = sum2;
158:     VecRestoreArrayRead(yy[0],&yy0);
159:     VecRestoreArrayRead(yy[1],&yy1);
160:     VecRestoreArrayRead(yy[2],&yy2);
161:     break;
162:   case 2:
163:     VecGetArrayRead(yy[0],&yy0);
164:     VecGetArrayRead(yy[1],&yy1);
165:     switch (j_rem=j&0x3) {
166:     case 3:
167:       x2 = x[2];
168:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
169:     case 2:
170:       x1 = x[1];
171:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
172:     case 1:
173:       x0 = x[0];
174:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
175:     case 0:
176:       x   += j_rem;
177:       yy0 += j_rem;
178:       yy1 += j_rem;
179:       j   -= j_rem;
180:       break;
181:     }
182:     while (j>0) {
183:       x0 = x[0];
184:       x1 = x[1];
185:       x2 = x[2];
186:       x3 = x[3];
187:       x += 4;
188: 
189:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
190:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
191:       j -= 4;
192:     }
193:     z[0] = sum0;
194:     z[1] = sum1;
195: 
196:     VecRestoreArrayRead(yy[0],&yy0);
197:     VecRestoreArrayRead(yy[1],&yy1);
198:     break;
199:   case 1:
200:     VecGetArrayRead(yy[0],&yy0);
201:     switch (j_rem=j&0x3) {
202:     case 3:
203:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
204:     case 2:
205:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
206:     case 1:
207:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
208:     case 0:
209:       x   += j_rem;
210:       yy0 += j_rem;
211:       j   -= j_rem;
212:       break;
213:     }
214:     while (j>0) {
215:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
216:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
217:       yy0+=4;
218:       j -= 4; x+=4;
219:     }
220:     z[0] = sum0;

222:     VecRestoreArrayRead(yy[0],&yy0);
223:     break;
224:   case 0:
225:     break;
226:   }
227:   z  += nv_rem;
228:   i  -= nv_rem;
229:   yy += nv_rem;

231:   while (i >0) {
232:     sum0 = 0.;
233:     sum1 = 0.;
234:     sum2 = 0.;
235:     sum3 = 0.;
236:     VecGetArrayRead(yy[0],&yy0);
237:     VecGetArrayRead(yy[1],&yy1);
238:     VecGetArrayRead(yy[2],&yy2);
239:     VecGetArrayRead(yy[3],&yy3);

241:     j = n;
242:     x = xbase;
243:     switch (j_rem=j&0x3) {
244:     case 3:
245:       x2 = x[2];
246:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
247:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
248:     case 2:
249:       x1 = x[1];
250:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
251:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
252:     case 1:
253:       x0 = x[0];
254:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
255:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
256:     case 0:
257:       x   += j_rem;
258:       yy0 += j_rem;
259:       yy1 += j_rem;
260:       yy2 += j_rem;
261:       yy3 += j_rem;
262:       j   -= j_rem;
263:       break;
264:     }
265:     while (j>0) {
266:       x0 = x[0];
267:       x1 = x[1];
268:       x2 = x[2];
269:       x3 = x[3];
270:       x += 4;
271: 
272:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
273:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
274:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
275:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
276:       j -= 4;
277:     }
278:     z[0] = sum0;
279:     z[1] = sum1;
280:     z[2] = sum2;
281:     z[3] = sum3;
282:     z   += 4;
283:     i   -= 4;
284:     VecRestoreArrayRead(yy[0],&yy0);
285:     VecRestoreArrayRead(yy[1],&yy1);
286:     VecRestoreArrayRead(yy[2],&yy2);
287:     VecRestoreArrayRead(yy[3],&yy3);
288:     yy  += 4;
289:   }
290:   VecRestoreArrayRead(xin,&xbase);
291:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
292:   return(0);
293: }
294: #endif

296: /* ----------------------------------------------------------------------------*/
299: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
300: {
301:   PetscErrorCode    ierr;
302:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
303:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
304:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
305:   Vec               *yy;
306: 

309:   sum0 = 0.;
310:   sum1 = 0.;
311:   sum2 = 0.;

313:   i      = nv;
314:   nv_rem = nv&0x3;
315:   yy     = (Vec*)yin;
316:   j      = n;
317:   VecGetArrayRead(xin,&x);

319:   switch (nv_rem) {
320:   case 3:
321:     VecGetArrayRead(yy[0],&yy0);
322:     VecGetArrayRead(yy[1],&yy1);
323:     VecGetArrayRead(yy[2],&yy2);
324:     switch (j_rem=j&0x3) {
325:     case 3:
326:       x2 = x[2];
327:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
328:       sum2 += x2*yy2[2];
329:     case 2:
330:       x1 = x[1];
331:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
332:       sum2 += x1*yy2[1];
333:     case 1:
334:       x0 = x[0];
335:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
336:       sum2 += x0*yy2[0];
337:     case 0:
338:       x  += j_rem;
339:       yy0 += j_rem;
340:       yy1 += j_rem;
341:       yy2 += j_rem;
342:       j  -= j_rem;
343:       break;
344:     }
345:     while (j>0) {
346:       x0 = x[0];
347:       x1 = x[1];
348:       x2 = x[2];
349:       x3 = x[3];
350:       x += 4;
351: 
352:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
353:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
354:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
355:       j -= 4;
356:     }
357:     z[0] = sum0;
358:     z[1] = sum1;
359:     z[2] = sum2;
360:     VecRestoreArrayRead(yy[0],&yy0);
361:     VecRestoreArrayRead(yy[1],&yy1);
362:     VecRestoreArrayRead(yy[2],&yy2);
363:     break;
364:   case 2:
365:     VecGetArrayRead(yy[0],&yy0);
366:     VecGetArrayRead(yy[1],&yy1);
367:     switch (j_rem=j&0x3) {
368:     case 3:
369:       x2 = x[2];
370:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
371:     case 2:
372:       x1 = x[1];
373:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
374:     case 1:
375:       x0 = x[0];
376:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
377:     case 0:
378:       x  += j_rem;
379:       yy0 += j_rem;
380:       yy1 += j_rem;
381:       j  -= j_rem;
382:       break;
383:     }
384:     while (j>0) {
385:       x0 = x[0];
386:       x1 = x[1];
387:       x2 = x[2];
388:       x3 = x[3];
389:       x += 4;
390: 
391:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
392:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
393:       j -= 4;
394:     }
395:     z[0] = sum0;
396:     z[1] = sum1;
397: 
398:     VecRestoreArrayRead(yy[0],&yy0);
399:     VecRestoreArrayRead(yy[1],&yy1);
400:     break;
401:   case 1:
402:     VecGetArrayRead(yy[0],&yy0);
403:     switch (j_rem=j&0x3) {
404:     case 3:
405:       x2 = x[2]; sum0 += x2*yy0[2];
406:     case 2:
407:       x1 = x[1]; sum0 += x1*yy0[1];
408:     case 1:
409:       x0 = x[0]; sum0 += x0*yy0[0];
410:     case 0:
411:       x  += j_rem;
412:       yy0 += j_rem;
413:       j  -= j_rem;
414:       break;
415:     }
416:     while (j>0) {
417:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
418:       j -= 4; x+=4;
419:     }
420:     z[0] = sum0;

422:     VecRestoreArrayRead(yy[0],&yy0);
423:     break;
424:   case 0:
425:     break;
426:   }
427:   z  += nv_rem;
428:   i  -= nv_rem;
429:   yy += nv_rem;

431:   while (i >0) {
432:     sum0 = 0.;
433:     sum1 = 0.;
434:     sum2 = 0.;
435:     sum3 = 0.;
436:     VecGetArrayRead(yy[0],&yy0);
437:     VecGetArrayRead(yy[1],&yy1);
438:     VecGetArrayRead(yy[2],&yy2);
439:     VecGetArrayRead(yy[3],&yy3);

441:     j = n;
442:     switch (j_rem=j&0x3) {
443:     case 3:
444:       x2 = x[2];
445:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
446:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
447:     case 2:
448:       x1 = x[1];
449:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
450:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
451:     case 1:
452:       x0 = x[0];
453:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
454:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
455:     case 0:
456:       x  += j_rem;
457:       yy0 += j_rem;
458:       yy1 += j_rem;
459:       yy2 += j_rem;
460:       yy3 += j_rem;
461:       j  -= j_rem;
462:       break;
463:     }
464:     while (j>0) {
465:       x0 = x[0];
466:       x1 = x[1];
467:       x2 = x[2];
468:       x3 = x[3];
469:       x += 4;
470: 
471:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
472:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
473:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
474:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
475:       j -= 4;
476:     }
477:     z[0] = sum0;
478:     z[1] = sum1;
479:     z[2] = sum2;
480:     z[3] = sum3;
481:     z   += 4;
482:     i   -= 4;
483:     VecRestoreArrayRead(yy[0],&yy0);
484:     VecRestoreArrayRead(yy[1],&yy1);
485:     VecRestoreArrayRead(yy[2],&yy2);
486:     VecRestoreArrayRead(yy[3],&yy3);
487:     yy  += 4;
488:   }
489:   VecRestoreArrayRead(xin,&x);
490:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
491:   return(0);
492: }
493: 

497: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
498: {
499:   PetscInt          i,j=0,n = xin->map->n;
500:   PetscReal         max,tmp;
501:   const PetscScalar *xx;
502:   PetscErrorCode    ierr;

505:   VecGetArrayRead(xin,&xx);
506:   if (!n) {
507:     max = PETSC_MIN_REAL;
508:     j   = -1;
509:   } else {
510: #if defined(PETSC_USE_COMPLEX)
511:       max = PetscRealPart(*xx++); j = 0;
512: #else
513:       max = *xx++; j = 0;
514: #endif
515:     for (i=1; i<n; i++) {
516: #if defined(PETSC_USE_COMPLEX)
517:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
518: #else
519:       if ((tmp = *xx++) > max) { j = i; max = tmp; }
520: #endif
521:     }
522:   }
523:   *z   = max;
524:   if (idx) *idx = j;
525:   VecRestoreArrayRead(xin,&xx);
526:   return(0);
527: }

531: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
532: {
533:   PetscInt          i,j=0,n = xin->map->n;
534:   PetscReal         min,tmp;
535:   const PetscScalar *xx;
536:   PetscErrorCode    ierr;

539:   VecGetArrayRead(xin,&xx);
540:   if (!n) {
541:     min = PETSC_MAX_REAL;
542:     j   = -1;
543:   } else {
544: #if defined(PETSC_USE_COMPLEX)
545:     min = PetscRealPart(*xx++); j = 0;
546: #else
547:     min = *xx++; j = 0;
548: #endif
549:     for (i=1; i<n; i++) {
550: #if defined(PETSC_USE_COMPLEX)
551:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
552: #else
553:       if ((tmp = *xx++) < min) { j = i; min = tmp; }
554: #endif
555:     }
556:   }
557:   *z   = min;
558:   if (idx) *idx = j;
559:   VecGetArrayRead(xin,&xx);
560:   return(0);
561: }

563: #if defined(PETSC_THREADCOMM_ACTIVE)
564: PetscErrorCode VecSet_kernel(PetscInt thread_id,Vec xin,PetscScalar *alpha_p)
565: {
567:   PetscScalar    *xx;
568:   PetscInt       i,start,end;
569:   PetscInt       *trstarts=xin->map->trstarts;
570:   PetscScalar    alpha = *alpha_p;

572:   start = trstarts[thread_id];
573:   end   = trstarts[thread_id+1];
574:   VecGetArray(xin,&xx);
575:   if (alpha == (PetscScalar)0.0) {
576:     PetscMemzero(xx+start,(end-start)*sizeof(PetscScalar));
577:   } else {
578:     for (i=start;i<end;i++) xx[i] = alpha;
579:   }
580:   VecRestoreArray(xin,&xx);
581:   return 0;
582: }

586: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
587: {
589:   PetscScalar    *scalar;

592:   PetscThreadCommGetScalars(((PetscObject)xin)->comm,&scalar,PETSC_NULL,PETSC_NULL);
593:   *scalar = alpha;
594:   PetscThreadCommRunKernel(((PetscObject)xin)->comm,(PetscThreadKernel)VecSet_kernel,2,xin,scalar);
595:   return(0);
596: }
597: #else
600: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
601: {
602:   PetscInt       i,n = xin->map->n;
603:   PetscScalar    *xx;

607:   VecGetArray(xin,&xx);
608:   if (alpha == (PetscScalar)0.0) {
609:     PetscMemzero(xx,n*sizeof(PetscScalar));
610:   } else {
611:     for (i=0; i<n; i++) xx[i] = alpha;
612:   }
613:   VecRestoreArray(xin,&xx);
614:   return(0);
615: }
616: #endif

620: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
621: {
622:   PetscErrorCode    ierr;
623:   PetscInt          n = xin->map->n,j,j_rem;
624:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
625:   PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

627: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
628: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
629: #endif

632:   PetscLogFlops(nv*2.0*n);
633:   VecGetArray(xin,&xx);
634:   switch (j_rem=nv&0x3) {
635:   case 3:
636:     VecGetArrayRead(y[0],&yy0);
637:     VecGetArrayRead(y[1],&yy1);
638:     VecGetArrayRead(y[2],&yy2);
639:     alpha0 = alpha[0];
640:     alpha1 = alpha[1];
641:     alpha2 = alpha[2];
642:     alpha += 3;
643:     PetscAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
644:     VecRestoreArrayRead(y[0],&yy0);
645:     VecRestoreArrayRead(y[1],&yy1);
646:     VecRestoreArrayRead(y[2],&yy2);
647:     y     += 3;
648:     break;
649:   case 2:
650:     VecGetArrayRead(y[0],&yy0);
651:     VecGetArrayRead(y[1],&yy1);
652:     alpha0 = alpha[0];
653:     alpha1 = alpha[1];
654:     alpha +=2;
655:     PetscAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
656:     VecRestoreArrayRead(y[0],&yy0);
657:     VecRestoreArrayRead(y[1],&yy1);
658:     y     +=2;
659:     break;
660:   case 1:
661:     VecGetArrayRead(y[0],&yy0);
662:     alpha0 = *alpha++;
663:     PetscAXPY(xx,alpha0,yy0,n);
664:     VecRestoreArrayRead(y[0],&yy0);
665:     y     +=1;
666:     break;
667:   }
668:   for (j=j_rem; j<nv; j+=4) {
669:     VecGetArrayRead(y[0],&yy0);
670:     VecGetArrayRead(y[1],&yy1);
671:     VecGetArrayRead(y[2],&yy2);
672:     VecGetArrayRead(y[3],&yy3);
673:     alpha0 = alpha[0];
674:     alpha1 = alpha[1];
675:     alpha2 = alpha[2];
676:     alpha3 = alpha[3];
677:     alpha  += 4;

679:     PetscAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
680:     VecRestoreArrayRead(y[0],&yy0);
681:     VecRestoreArrayRead(y[1],&yy1);
682:     VecRestoreArrayRead(y[2],&yy2);
683:     VecRestoreArrayRead(y[3],&yy3);
684:     y      += 4;
685:   }
686:   VecRestoreArray(xin,&xx);
687:   return(0);
688: }

690: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
693: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
694: {
695:   PetscErrorCode    ierr;
696:   PetscInt          n = yin->map->n;
697:   PetscScalar       *yy;
698:   const PetscScalar *xx;

701:   if (alpha == (PetscScalar)0.0) {
702:     VecCopy(xin,yin);
703:   } else if (alpha == (PetscScalar)1.0) {
704:     VecAXPY_Seq(yin,alpha,xin);
705:   } else if (alpha == (PetscScalar)-1.0) {
706:     PetscInt i;
707:     VecGetArrayRead(xin,&xx);
708:     VecGetArray(yin,&yy);
709:     for (i=0; i<n; i++) {
710:       yy[i] = xx[i] - yy[i];
711:     }
712:     VecRestoreArrayRead(xin,&xx);
713:     VecRestoreArray(yin,&yy);
714:     PetscLogFlops(1.0*n);
715:   } else {
716:     VecGetArrayRead(xin,&xx);
717:     VecGetArray(yin,&yy);
718: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
719:     {
720:       PetscScalar oalpha = alpha;
721:       fortranaypx_(&n,&oalpha,xx,yy);
722:     }
723: #else
724:     {
725:       PetscInt i;
726:       for (i=0; i<n; i++) {
727:         yy[i] = xx[i] + alpha*yy[i];
728:       }
729:     }
730: #endif
731:     VecRestoreArrayRead(xin,&xx);
732:     VecRestoreArray(yin,&yy);
733:     PetscLogFlops(2.0*n);
734:   }
735:   return(0);
736: }

738: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
739: /*
740:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
741:   to be slower than a regular C loop.  Hence,we do not include it.
742:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
743: */

747: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
748: {
749:   PetscErrorCode     ierr;
750:   PetscInt           i,n = win->map->n;
751:   PetscScalar        *ww;
752:   const PetscScalar  *yy,*xx;

755:   VecGetArrayRead(xin,&xx);
756:   VecGetArrayRead(yin,&yy);
757:   VecGetArray(win,&ww);
758:   if (alpha == (PetscScalar)1.0) {
759:     PetscLogFlops(n);
760:     /* could call BLAS axpy after call to memcopy, but may be slower */
761:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
762:   } else if (alpha == (PetscScalar)-1.0) {
763:     PetscLogFlops(n);
764:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
765:   } else if (alpha == (PetscScalar)0.0) {
766:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
767:   } else {
768:     PetscScalar oalpha = alpha;
769: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
770:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
771: #else
772:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
773: #endif
774:     PetscLogFlops(2.0*n);
775:   }
776:   VecRestoreArrayRead(xin,&xx);
777:   VecRestoreArrayRead(yin,&yy);
778:   VecRestoreArray(win,&ww);
779:   return(0);
780: }


785: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
786: {
787:   PetscErrorCode    ierr;
788:   PetscInt          n = xin->map->n,i;
789:   const PetscScalar *xx,*yy;
790:   PetscReal         m = 0.0;

793:   VecGetArrayRead(xin,&xx);
794:   VecGetArrayRead(yin,&yy);
795:   for(i = 0; i < n; i++) {
796:     if (yy[i] != (PetscScalar)0.0) {
797:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
798:     } else {
799:       m = PetscMax(PetscAbsScalar(xx[i]), m);
800:     }
801:   }
802:   VecRestoreArrayRead(xin,&xx);
803:   VecRestoreArrayRead(yin,&yy);
804:   MPI_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,((PetscObject)xin)->comm);
805:   PetscLogFlops(n);
806:   return(0);
807: }

811: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
812: {
813:   Vec_Seq *v = (Vec_Seq *)vin->data;

816:   if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
817:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
818:   v->array = (PetscScalar *)a;
819:   return(0);
820: }

824: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
825: {
826:   Vec_Seq        *v = (Vec_Seq *)vin->data;

830:   PetscFree(v->array_allocated);
831:   v->array_allocated = v->array = (PetscScalar *)a;
832:   return(0);
833: }

835: