Actual source code: cuspvecimpl.h
petsc-3.3-p5 2012-12-01
1: #ifndef __CUSPVECIMPL
4: #include <petsc-private/vecimpl.h>
6: #include <algorithm>
7: #include <vector>
8: #include <string>
10: #include <cublas.h>
11: #include <cusp/blas.h>
12: #include <thrust/host_vector.h>
13: #include <thrust/device_vector.h>
14: #include <thrust/iterator/constant_iterator.h>
15: #include <thrust/transform.h>
16: #include <thrust/iterator/permutation_iterator.h>
18: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
19: #define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory>
20: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
21: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>
23: extern PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar *, PetscScalar *);
24: extern PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec);
25: extern PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec);
26: extern PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar *);
27: extern PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar);
28: extern PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar *,Vec *);
29: extern PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec);
30: extern PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec);
31: extern PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar *);
32: extern PetscErrorCode VecResetArray_SeqCUSP(Vec);
33: extern PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar *);
34: extern PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar *);
35: extern PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar *);
36: extern PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar);
37: extern PetscErrorCode VecCopy_SeqCUSP(Vec,Vec);
38: extern PetscErrorCode VecSwap_SeqCUSP(Vec,Vec);
39: extern PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec);
40: extern PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec);
41: extern PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec *);
42: extern PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*);
43: extern PetscErrorCode VecCUSPCopyToGPU(Vec);
44: extern PetscErrorCode VecCUSPAllocateCheck(Vec);
45: extern PetscErrorCode VecCUSPAllocateCheckHost(Vec);
46: EXTERN_C_BEGIN
47: extern PetscErrorCode VecCreate_SeqCUSP(Vec);
48: EXTERN_C_END
49: extern PetscErrorCode VecView_Seq(Vec,PetscViewer);
50: extern PetscErrorCode VecDestroy_SeqCUSP(Vec);
51: extern PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec);
52: extern PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom);
54: extern PetscErrorCode VecCUSPCopyToGPU_Public(Vec);
55: extern PetscErrorCode VecCUSPAllocateCheck_Public(Vec);
57: #ifdef PETSC_HAVE_TXPETSCGPU
58: #include "tx_vector_interface.h"
59: #endif
61: struct _p_PetscCUSPIndices {
62: #ifdef PETSC_HAVE_TXPETSCGPU
63: GPU_Indices<PetscInt, PetscScalar> * sendIndices;
64: GPU_Indices<PetscInt, PetscScalar> * recvIndices;
65: #else
66: CUSPINTARRAYCPU sendIndicesCPU;
67: CUSPINTARRAYGPU sendIndicesGPU;
69: CUSPINTARRAYCPU recvIndicesCPU;
70: CUSPINTARRAYGPU recvIndicesGPU;
71: #endif
72: };
74: #ifdef PETSC_HAVE_TXPETSCGPU
75: extern PetscErrorCode VecCUSPCopySomeToContiguousBufferGPU(Vec, PetscCUSPIndices);
76: extern PetscErrorCode VecCUSPCopySomeFromContiguousBufferGPU(Vec, PetscCUSPIndices);
77: #endif
79: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err)
81: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0])
83: #define WaitForGPU() PetscCUSPSynchronize ? cudaThreadSynchronize() : 0
85: struct Vec_CUSP {
86: CUSPARRAY* GPUarray; /* this always holds the GPU data */
87: #ifdef PETSC_HAVE_TXPETSCGPU
88: GPU_Vector<PetscInt, PetscScalar> * GPUvector; /* this always holds the GPU data */
89: #endif
90: };
95: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayReadWrite(Vec v, CUSPARRAY** a)
96: {
100: *a = 0;
101: VecCUSPCopyToGPU(v);
102: *a = ((Vec_CUSP *)v->spptr)->GPUarray;
103: return(0);
104: }
108: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayReadWrite(Vec v, CUSPARRAY** a)
109: {
113: v->valid_GPU_array = PETSC_CUSP_GPU;
114: PetscObjectStateIncrease((PetscObject)v);
115: return(0);
116: }
120: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayRead(Vec v, CUSPARRAY** a)
121: {
125: *a = 0;
126: VecCUSPCopyToGPU(v);
127: *a = ((Vec_CUSP *)v->spptr)->GPUarray;
128: return(0);
129: }
133: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayRead(Vec v, CUSPARRAY** a)
134: {
136: return(0);
137: }
141: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayWrite(Vec v, CUSPARRAY** a)
142: {
146: VecCUSPAllocateCheck(v);
147: *a = ((Vec_CUSP *)v->spptr)->GPUarray;
148: return(0);
149: }
153: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayWrite(Vec v, CUSPARRAY** a)
154: {
158: v->valid_GPU_array = PETSC_CUSP_GPU;
159: PetscObjectStateIncrease((PetscObject)v);
160: return(0);
161: }
162: #endif