Actual source code: bvec1.c
petsc-3.3-p5 2012-12-01
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <petsc-private/vecimpl.h>
8: #include <../src/vec/vec/impls/dvecimpl.h>
9: #include <petscblaslapack.h>
10: #include <petscthreadcomm.h>
12: #if defined(PETSC_THREADCOMM_ACTIVE)
13: PetscErrorCode VecDot_kernel(PetscInt thread_id,Vec xin,Vec yin,PetscThreadCommRedCtx red)
14: {
15: PetscErrorCode ierr;
16: PetscInt *trstarts=xin->map->trstarts;
17: PetscInt start,end,n;
18: PetscBLASInt one = 1,bn;
19: const PetscScalar *xa,*ya;
20: PetscScalar z_loc;
22: VecGetArrayRead(xin,&xa);
23: VecGetArrayRead(yin,&ya);
24: start = trstarts[thread_id];
25: end = trstarts[thread_id+1];
26: n = end-start;
27: bn = PetscBLASIntCast(n);
28: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
29: z_loc = BLASdot_(&bn,ya+start,&one,xa+start,&one);
31: PetscThreadReductionKernelBegin(thread_id,red,(void*)&z_loc);
33: VecRestoreArrayRead(xin,&xa);
34: VecRestoreArrayRead(yin,&ya);
35: return 0;
36: }
40: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
41: {
42: PetscErrorCode ierr;
43: PetscThreadCommRedCtx red;
46: PetscThreadReductionBegin(((PetscObject)xin)->comm,THREADCOMM_SUM,PETSC_SCALAR,&red);
47: PetscThreadCommRunKernel(((PetscObject)xin)->comm,(PetscThreadKernel)VecDot_kernel,3,xin,yin,red);
48: PetscThreadReductionEnd(red,z);
49: if (xin->map->n > 0) {
50: PetscLogFlops(2.0*xin->map->n-1);
51: }
52: return(0);
53: }
54: #else
57: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
58: {
59: const PetscScalar *ya,*xa;
60: PetscBLASInt one = 1,bn = PetscBLASIntCast(xin->map->n);
61: PetscErrorCode ierr;
64: VecGetArrayRead(xin,&xa);
65: VecGetArrayRead(yin,&ya);
66: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
67: *z = BLASdot_(&bn,ya,&one,xa,&one);
68: VecRestoreArrayRead(xin,&xa);
69: VecRestoreArrayRead(yin,&ya);
70: if (xin->map->n > 0) {
71: PetscLogFlops(2.0*xin->map->n-1);
72: }
73: return(0);
74: }
75: #endif
79: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
80: {
81: const PetscScalar *ya,*xa;
82: PetscBLASInt one = 1, bn = PetscBLASIntCast(xin->map->n);
83: PetscErrorCode ierr;
86: VecGetArrayRead(xin,&xa);
87: VecGetArrayRead(yin,&ya);
88: *z = BLASdotu_(&bn,xa,&one,ya,&one);
89: VecRestoreArrayRead(xin,&xa);
90: VecRestoreArrayRead(yin,&ya);
91: if (xin->map->n > 0) {
92: PetscLogFlops(2.0*xin->map->n-1);
93: }
94: return(0);
95: }
99: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
100: {
102: PetscBLASInt one = 1,bn = PetscBLASIntCast(xin->map->n);
105: if (alpha == (PetscScalar)0.0) {
106: VecSet_Seq(xin,alpha);
107: } else if (alpha != (PetscScalar)1.0) {
108: PetscScalar a = alpha,*xarray;
109: VecGetArray(xin,&xarray);
110: BLASscal_(&bn,&a,xarray,&one);
111: VecRestoreArray(xin,&xarray);
112: }
113: PetscLogFlops(xin->map->n);
114: return(0);
115: }
117: #if defined(PETSC_THREADCOMM_ACTIVE)
118: PetscErrorCode VecAXPY_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,Vec xin)
119: {
120: PetscErrorCode ierr;
121: const PetscScalar *xarray;
122: PetscScalar *yarray;
123: PetscBLASInt one=1,bn;
124: PetscInt *trstarts=yin->map->trstarts,start,end,n;
125: PetscScalar alpha = *alpha_p;
127: start = trstarts[thread_id];
128: end = trstarts[thread_id+1];
129: n = end - start;
130: bn = PetscBLASIntCast(n);
131: VecGetArrayRead(xin,&xarray);
132: VecGetArray(yin,&yarray);
133: BLASaxpy_(&bn,&alpha,xarray+start,&one,yarray+start,&one);
134: VecRestoreArrayRead(xin,&xarray);
135: VecRestoreArray(yin,&yarray);
137: return 0;
138: }
141: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
142: {
143: PetscErrorCode ierr;
146: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
147: if (alpha != (PetscScalar)0.0) {
148: PetscScalar *scalar;
149: PetscThreadCommGetScalars(((PetscObject)yin)->comm,&scalar,PETSC_NULL,PETSC_NULL);
150: *scalar = alpha;
151: PetscThreadCommRunKernel(((PetscObject)yin)->comm,(PetscThreadKernel)VecAXPY_kernel,3,yin,scalar,xin);
152: PetscLogFlops(2.0*yin->map->n);
153: }
154: return(0);
155: }
156: #else
159: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
160: {
161: PetscErrorCode ierr;
162: const PetscScalar *xarray;
163: PetscScalar *yarray;
164: PetscBLASInt one = 1,bn = PetscBLASIntCast(yin->map->n);
167: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
168: if (alpha != (PetscScalar)0.0) {
169: VecGetArrayRead(xin,&xarray);
170: VecGetArray(yin,&yarray);
171: BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one);
172: VecRestoreArrayRead(xin,&xarray);
173: VecRestoreArray(yin,&yarray);
174: PetscLogFlops(2.0*yin->map->n);
175: }
176: return(0);
177: }
178: #endif
182: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
183: {
184: PetscErrorCode ierr;
185: PetscInt n = yin->map->n,i;
186: const PetscScalar *xx;
187: PetscScalar *yy,a = alpha,b = beta;
190: if (a == (PetscScalar)0.0) {
191: VecScale_Seq(yin,beta);
192: } else if (b == (PetscScalar)1.0) {
193: VecAXPY_Seq(yin,alpha,xin);
194: } else if (a == (PetscScalar)1.0) {
195: VecAYPX_Seq(yin,beta,xin);
196: } else if (b == (PetscScalar)0.0) {
197: VecGetArrayRead(xin,&xx);
198: VecGetArray(yin,(PetscScalar**)&yy);
199: for (i=0; i<n; i++) {
200: yy[i] = a*xx[i];
201: }
202: VecRestoreArrayRead(xin,&xx);
203: VecRestoreArray(yin,(PetscScalar**)&yy);
204: PetscLogFlops(xin->map->n);
205: } else {
206: VecGetArrayRead(xin,&xx);
207: VecGetArray(yin,(PetscScalar**)&yy);
208: for (i=0; i<n; i++) {
209: yy[i] = a*xx[i] + b*yy[i];
210: }
211: VecRestoreArrayRead(xin,&xx);
212: VecRestoreArray(yin,(PetscScalar**)&yy);
213: PetscLogFlops(3.0*xin->map->n);
214: }
215: return(0);
216: }
220: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
221: {
222: PetscErrorCode ierr;
223: PetscInt n = zin->map->n,i;
224: const PetscScalar *yy,*xx;
225: PetscScalar *zz;
228: VecGetArrayRead(xin,&xx);
229: VecGetArrayRead(yin,&yy);
230: VecGetArray(zin,&zz);
231: if (alpha == (PetscScalar)1.0) {
232: for (i=0; i<n; i++) {
233: zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
234: }
235: PetscLogFlops(4.0*n);
236: } else if (gamma == (PetscScalar)1.0) {
237: for (i=0; i<n; i++) {
238: zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
239: }
240: PetscLogFlops(4.0*n);
241: } else if (gamma == (PetscScalar)0.0) {
242: for (i=0; i<n; i++) {
243: zz[i] = alpha*xx[i] + beta*yy[i];
244: }
245: PetscLogFlops(3.0*n);
246: } else {
247: for (i=0; i<n; i++) {
248: zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
249: }
250: PetscLogFlops(5.0*n);
251: }
252: VecRestoreArrayRead(xin,&xx);
253: VecRestoreArrayRead(yin,&yy);
254: VecRestoreArray(zin,&zz);
255: return(0);
256: }