Actual source code: bvec1.c

petsc-3.3-p5 2012-12-01
  2: /*
  3:    Defines the BLAS based vector operations. Code shared by parallel
  4:   and sequential vectors.
  5: */

  7: #include <petsc-private/vecimpl.h> 
  8: #include <../src/vec/vec/impls/dvecimpl.h> 
  9: #include <petscblaslapack.h>
 10: #include <petscthreadcomm.h>

 12: #if defined(PETSC_THREADCOMM_ACTIVE)
 13: PetscErrorCode VecDot_kernel(PetscInt thread_id,Vec xin,Vec yin,PetscThreadCommRedCtx red)
 14: {
 15:   PetscErrorCode       ierr;
 16:   PetscInt             *trstarts=xin->map->trstarts;
 17:   PetscInt             start,end,n;
 18:   PetscBLASInt         one = 1,bn;
 19:   const PetscScalar    *xa,*ya;
 20:   PetscScalar          z_loc;

 22:   VecGetArrayRead(xin,&xa);
 23:   VecGetArrayRead(yin,&ya);
 24:   start = trstarts[thread_id];
 25:   end   = trstarts[thread_id+1];
 26:   n     = end-start;
 27:   bn = PetscBLASIntCast(n);
 28:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
 29:   z_loc = BLASdot_(&bn,ya+start,&one,xa+start,&one);

 31:   PetscThreadReductionKernelBegin(thread_id,red,(void*)&z_loc);

 33:   VecRestoreArrayRead(xin,&xa);
 34:   VecRestoreArrayRead(yin,&ya);
 35:   return 0;
 36: }

 40: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 41: {
 42:   PetscErrorCode        ierr;
 43:   PetscThreadCommRedCtx red;

 46:   PetscThreadReductionBegin(((PetscObject)xin)->comm,THREADCOMM_SUM,PETSC_SCALAR,&red);
 47:   PetscThreadCommRunKernel(((PetscObject)xin)->comm,(PetscThreadKernel)VecDot_kernel,3,xin,yin,red);
 48:   PetscThreadReductionEnd(red,z);
 49:   if (xin->map->n > 0) {
 50:     PetscLogFlops(2.0*xin->map->n-1);
 51:   }
 52:   return(0);
 53: }
 54: #else
 57: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 58: {
 59:   const PetscScalar *ya,*xa;
 60:   PetscBLASInt      one = 1,bn = PetscBLASIntCast(xin->map->n);
 61:   PetscErrorCode    ierr;

 64:   VecGetArrayRead(xin,&xa);
 65:   VecGetArrayRead(yin,&ya);
 66:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
 67:   *z = BLASdot_(&bn,ya,&one,xa,&one);
 68:   VecRestoreArrayRead(xin,&xa);
 69:   VecRestoreArrayRead(yin,&ya);
 70:   if (xin->map->n > 0) {
 71:     PetscLogFlops(2.0*xin->map->n-1);
 72:   }
 73:   return(0);
 74: }
 75: #endif

 79: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
 80: {
 81:   const PetscScalar *ya,*xa;
 82:   PetscBLASInt      one = 1, bn = PetscBLASIntCast(xin->map->n);
 83:   PetscErrorCode    ierr;

 86:   VecGetArrayRead(xin,&xa);
 87:   VecGetArrayRead(yin,&ya);
 88:   *z = BLASdotu_(&bn,xa,&one,ya,&one);
 89:   VecRestoreArrayRead(xin,&xa);
 90:   VecRestoreArrayRead(yin,&ya);
 91:   if (xin->map->n > 0) {
 92:     PetscLogFlops(2.0*xin->map->n-1);
 93:   }
 94:   return(0);
 95: }

 99: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
100: {
102:   PetscBLASInt   one = 1,bn = PetscBLASIntCast(xin->map->n);

105:   if (alpha == (PetscScalar)0.0) {
106:     VecSet_Seq(xin,alpha);
107:   } else if (alpha != (PetscScalar)1.0) {
108:     PetscScalar a = alpha,*xarray;
109:     VecGetArray(xin,&xarray);
110:     BLASscal_(&bn,&a,xarray,&one);
111:     VecRestoreArray(xin,&xarray);
112:   }
113:   PetscLogFlops(xin->map->n);
114:   return(0);
115: }

117: #if defined(PETSC_THREADCOMM_ACTIVE)
118: PetscErrorCode VecAXPY_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,Vec xin)
119: {
120:   PetscErrorCode    ierr;
121:   const PetscScalar *xarray;
122:   PetscScalar       *yarray;
123:   PetscBLASInt      one=1,bn;
124:   PetscInt          *trstarts=yin->map->trstarts,start,end,n;
125:   PetscScalar       alpha = *alpha_p;

127:   start = trstarts[thread_id];
128:   end   = trstarts[thread_id+1];
129:   n     = end - start;
130:   bn = PetscBLASIntCast(n);
131:   VecGetArrayRead(xin,&xarray);
132:   VecGetArray(yin,&yarray);
133:   BLASaxpy_(&bn,&alpha,xarray+start,&one,yarray+start,&one);
134:   VecRestoreArrayRead(xin,&xarray);
135:   VecRestoreArray(yin,&yarray);

137:   return 0;
138: }
141: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
142: {
143:   PetscErrorCode    ierr;

146:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
147:   if (alpha != (PetscScalar)0.0) {
148:     PetscScalar *scalar;
149:     PetscThreadCommGetScalars(((PetscObject)yin)->comm,&scalar,PETSC_NULL,PETSC_NULL);
150:     *scalar = alpha;
151:     PetscThreadCommRunKernel(((PetscObject)yin)->comm,(PetscThreadKernel)VecAXPY_kernel,3,yin,scalar,xin);
152:     PetscLogFlops(2.0*yin->map->n);
153:   }
154:   return(0);
155: }
156: #else
159: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
160: {
161:   PetscErrorCode    ierr;
162:   const PetscScalar *xarray;
163:   PetscScalar       *yarray;
164:   PetscBLASInt      one = 1,bn = PetscBLASIntCast(yin->map->n);

167:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
168:   if (alpha != (PetscScalar)0.0) {
169:     VecGetArrayRead(xin,&xarray);
170:     VecGetArray(yin,&yarray);
171:     BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one);
172:     VecRestoreArrayRead(xin,&xarray);
173:     VecRestoreArray(yin,&yarray);
174:     PetscLogFlops(2.0*yin->map->n);
175:   }
176:   return(0);
177: }
178: #endif

182: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
183: {
184:   PetscErrorCode    ierr;
185:   PetscInt          n = yin->map->n,i;
186:   const PetscScalar *xx;
187:   PetscScalar       *yy,a = alpha,b = beta;

190:   if (a == (PetscScalar)0.0) {
191:     VecScale_Seq(yin,beta);
192:   } else if (b == (PetscScalar)1.0) {
193:     VecAXPY_Seq(yin,alpha,xin);
194:   } else if (a == (PetscScalar)1.0) {
195:     VecAYPX_Seq(yin,beta,xin);
196:   } else if (b == (PetscScalar)0.0) {
197:     VecGetArrayRead(xin,&xx);
198:     VecGetArray(yin,(PetscScalar**)&yy);
199:     for (i=0; i<n; i++) {
200:       yy[i] = a*xx[i];
201:     }
202:     VecRestoreArrayRead(xin,&xx);
203:     VecRestoreArray(yin,(PetscScalar**)&yy);
204:     PetscLogFlops(xin->map->n);
205:   } else {
206:     VecGetArrayRead(xin,&xx);
207:     VecGetArray(yin,(PetscScalar**)&yy);
208:     for (i=0; i<n; i++) {
209:       yy[i] = a*xx[i] + b*yy[i];
210:     }
211:     VecRestoreArrayRead(xin,&xx);
212:     VecRestoreArray(yin,(PetscScalar**)&yy);
213:     PetscLogFlops(3.0*xin->map->n);
214:   }
215:   return(0);
216: }

220: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
221: {
222:   PetscErrorCode     ierr;
223:   PetscInt           n = zin->map->n,i;
224:   const PetscScalar  *yy,*xx;
225:   PetscScalar        *zz;

228:   VecGetArrayRead(xin,&xx);
229:   VecGetArrayRead(yin,&yy);
230:   VecGetArray(zin,&zz);
231:   if (alpha == (PetscScalar)1.0) {
232:     for (i=0; i<n; i++) {
233:       zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
234:     }
235:     PetscLogFlops(4.0*n);
236:   } else if (gamma == (PetscScalar)1.0) {
237:     for (i=0; i<n; i++) {
238:       zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
239:     }
240:     PetscLogFlops(4.0*n);
241:   } else if (gamma == (PetscScalar)0.0) {
242:     for (i=0; i<n; i++) {
243:       zz[i] = alpha*xx[i] + beta*yy[i];
244:     }
245:     PetscLogFlops(3.0*n);
246:   } else {
247:     for (i=0; i<n; i++) {
248:       zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
249:     }
250:     PetscLogFlops(5.0*n);
251:   }
252:   VecRestoreArrayRead(xin,&xx);
253:   VecRestoreArrayRead(yin,&yy);
254:   VecRestoreArray(zin,&zz);
255:   return(0);
256: }