Actual source code: bvec1.c
petsc-3.3-p6 2013-02-11
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <petsc-private/vecimpl.h>
8: #include <../src/vec/vec/impls/dvecimpl.h>
9: #include <petscblaslapack.h>
10: #include <petscthreadcomm.h>
12: #if defined(PETSC_THREADCOMM_ACTIVE)
13: PetscErrorCode VecDot_kernel(PetscInt thread_id,Vec xin,Vec yin,PetscThreadCommRedCtx red)
14: {
15: PetscErrorCode ierr;
16: PetscInt *trstarts=xin->map->trstarts;
17: PetscInt start,end,n;
18: PetscBLASInt one = 1,bn;
19: const PetscScalar *xa,*ya;
20: PetscScalar z_loc;
22: VecGetArrayRead(xin,&xa);
23: VecGetArrayRead(yin,&ya);
24: start = trstarts[thread_id];
25: end = trstarts[thread_id+1];
26: n = end-start;
27: bn = PetscBLASIntCast(n);
28: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
29: z_loc = BLASdot_(&bn,ya+start,&one,xa+start,&one);
31: PetscThreadReductionKernelBegin(thread_id,red,(void*)&z_loc);
33: VecRestoreArrayRead(xin,&xa);
34: VecRestoreArrayRead(yin,&ya);
35: return 0;
36: }
40: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
41: {
42: PetscErrorCode ierr;
43: PetscThreadCommRedCtx red;
46: PetscThreadReductionBegin(((PetscObject)xin)->comm,THREADCOMM_SUM,PETSC_SCALAR,&red);
47: PetscThreadCommRunKernel(((PetscObject)xin)->comm,(PetscThreadKernel)VecDot_kernel,3,xin,yin,red);
48: PetscThreadReductionEnd(red,z);
49: if (xin->map->n > 0) {
50: PetscLogFlops(2.0*xin->map->n-1);
51: }
52: return(0);
53: }
54: #else
57: PetscErrorCode VecDot_Seq(Vec xin,Vec yin,PetscScalar *z)
58: {
59: const PetscScalar *ya,*xa;
60: PetscBLASInt one = 1,bn;
61: PetscErrorCode ierr;
64: bn = PetscBLASIntCast(xin->map->n);
65: VecGetArrayRead(xin,&xa);
66: VecGetArrayRead(yin,&ya);
67: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc the second */
68: *z = BLASdot_(&bn,ya,&one,xa,&one);
69: VecRestoreArrayRead(xin,&xa);
70: VecRestoreArrayRead(yin,&ya);
71: if (xin->map->n > 0) {
72: PetscLogFlops(2.0*xin->map->n-1);
73: }
74: return(0);
75: }
76: #endif
80: PetscErrorCode VecTDot_Seq(Vec xin,Vec yin,PetscScalar *z)
81: {
82: const PetscScalar *ya,*xa;
83: PetscBLASInt one = 1,bn;
84: PetscErrorCode ierr;
87: bn = PetscBLASIntCast(xin->map->n);
88: VecGetArrayRead(xin,&xa);
89: VecGetArrayRead(yin,&ya);
90: *z = BLASdotu_(&bn,xa,&one,ya,&one);
91: VecRestoreArrayRead(xin,&xa);
92: VecRestoreArrayRead(yin,&ya);
93: if (xin->map->n > 0) {
94: PetscLogFlops(2.0*xin->map->n-1);
95: }
96: return(0);
97: }
101: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
102: {
104: PetscBLASInt one = 1,bn = PetscBLASIntCast(xin->map->n);
107: if (alpha == (PetscScalar)0.0) {
108: VecSet_Seq(xin,alpha);
109: } else if (alpha != (PetscScalar)1.0) {
110: PetscScalar a = alpha,*xarray;
111: VecGetArray(xin,&xarray);
112: BLASscal_(&bn,&a,xarray,&one);
113: VecRestoreArray(xin,&xarray);
114: }
115: PetscLogFlops(xin->map->n);
116: return(0);
117: }
119: #if defined(PETSC_THREADCOMM_ACTIVE)
120: PetscErrorCode VecAXPY_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,Vec xin)
121: {
122: PetscErrorCode ierr;
123: const PetscScalar *xarray;
124: PetscScalar *yarray;
125: PetscBLASInt one=1,bn;
126: PetscInt *trstarts=yin->map->trstarts,start,end,n;
127: PetscScalar alpha = *alpha_p;
129: start = trstarts[thread_id];
130: end = trstarts[thread_id+1];
131: n = end - start;
132: bn = PetscBLASIntCast(n);
133: VecGetArrayRead(xin,&xarray);
134: VecGetArray(yin,&yarray);
135: BLASaxpy_(&bn,&alpha,xarray+start,&one,yarray+start,&one);
136: VecRestoreArrayRead(xin,&xarray);
137: VecRestoreArray(yin,&yarray);
139: return 0;
140: }
143: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
144: {
145: PetscErrorCode ierr;
148: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
149: if (alpha != (PetscScalar)0.0) {
150: PetscScalar *scalar;
151: PetscThreadCommGetScalars(((PetscObject)yin)->comm,&scalar,PETSC_NULL,PETSC_NULL);
152: *scalar = alpha;
153: PetscThreadCommRunKernel(((PetscObject)yin)->comm,(PetscThreadKernel)VecAXPY_kernel,3,yin,scalar,xin);
154: PetscLogFlops(2.0*yin->map->n);
155: }
156: return(0);
157: }
158: #else
161: PetscErrorCode VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)
162: {
163: PetscErrorCode ierr;
164: const PetscScalar *xarray;
165: PetscScalar *yarray;
166: PetscBLASInt one = 1,bn = PetscBLASIntCast(yin->map->n);
169: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
170: if (alpha != (PetscScalar)0.0) {
171: VecGetArrayRead(xin,&xarray);
172: VecGetArray(yin,&yarray);
173: BLASaxpy_(&bn,&alpha,xarray,&one,yarray,&one);
174: VecRestoreArrayRead(xin,&xarray);
175: VecRestoreArray(yin,&yarray);
176: PetscLogFlops(2.0*yin->map->n);
177: }
178: return(0);
179: }
180: #endif
184: PetscErrorCode VecAXPBY_Seq(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
185: {
186: PetscErrorCode ierr;
187: PetscInt n = yin->map->n,i;
188: const PetscScalar *xx;
189: PetscScalar *yy,a = alpha,b = beta;
192: if (a == (PetscScalar)0.0) {
193: VecScale_Seq(yin,beta);
194: } else if (b == (PetscScalar)1.0) {
195: VecAXPY_Seq(yin,alpha,xin);
196: } else if (a == (PetscScalar)1.0) {
197: VecAYPX_Seq(yin,beta,xin);
198: } else if (b == (PetscScalar)0.0) {
199: VecGetArrayRead(xin,&xx);
200: VecGetArray(yin,(PetscScalar**)&yy);
201: for (i=0; i<n; i++) {
202: yy[i] = a*xx[i];
203: }
204: VecRestoreArrayRead(xin,&xx);
205: VecRestoreArray(yin,(PetscScalar**)&yy);
206: PetscLogFlops(xin->map->n);
207: } else {
208: VecGetArrayRead(xin,&xx);
209: VecGetArray(yin,(PetscScalar**)&yy);
210: for (i=0; i<n; i++) {
211: yy[i] = a*xx[i] + b*yy[i];
212: }
213: VecRestoreArrayRead(xin,&xx);
214: VecRestoreArray(yin,(PetscScalar**)&yy);
215: PetscLogFlops(3.0*xin->map->n);
216: }
217: return(0);
218: }
222: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
223: {
224: PetscErrorCode ierr;
225: PetscInt n = zin->map->n,i;
226: const PetscScalar *yy,*xx;
227: PetscScalar *zz;
230: VecGetArrayRead(xin,&xx);
231: VecGetArrayRead(yin,&yy);
232: VecGetArray(zin,&zz);
233: if (alpha == (PetscScalar)1.0) {
234: for (i=0; i<n; i++) {
235: zz[i] = xx[i] + beta*yy[i] + gamma*zz[i];
236: }
237: PetscLogFlops(4.0*n);
238: } else if (gamma == (PetscScalar)1.0) {
239: for (i=0; i<n; i++) {
240: zz[i] = alpha*xx[i] + beta*yy[i] + zz[i];
241: }
242: PetscLogFlops(4.0*n);
243: } else if (gamma == (PetscScalar)0.0) {
244: for (i=0; i<n; i++) {
245: zz[i] = alpha*xx[i] + beta*yy[i];
246: }
247: PetscLogFlops(3.0*n);
248: } else {
249: for (i=0; i<n; i++) {
250: zz[i] = alpha*xx[i] + beta*yy[i] + gamma*zz[i];
251: }
252: PetscLogFlops(5.0*n);
253: }
254: VecRestoreArrayRead(xin,&xx);
255: VecRestoreArrayRead(yin,&yy);
256: VecRestoreArray(zin,&zz);
257: return(0);
258: }