Actual source code: cuspvecimpl.h

petsc-3.3-p5 2012-12-01
  1: #ifndef __CUSPVECIMPL

  4: #include <petsc-private/vecimpl.h>

  6: #include <algorithm>
  7: #include <vector>
  8: #include <string>

 10: #include <cublas.h>
 11: #include <cusp/blas.h>
 12: #include <thrust/host_vector.h>
 13: #include <thrust/device_vector.h>
 14: #include <thrust/iterator/constant_iterator.h>
 15: #include <thrust/transform.h>
 16: #include <thrust/iterator/permutation_iterator.h>

 18: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
 19: #define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory>
 20: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
 21: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>

 23: extern PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar *, PetscScalar *);
 24: extern PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec);
 25: extern PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec);
 26: extern PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar *);
 27: extern PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar);
 28: extern PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar *,Vec *);
 29: extern PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec);
 30: extern PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec);
 31: extern PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar *);
 32: extern PetscErrorCode VecResetArray_SeqCUSP(Vec);
 33: extern PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar *);
 34: extern PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar *);
 35: extern PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar *);
 36: extern PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar);
 37: extern PetscErrorCode VecCopy_SeqCUSP(Vec,Vec);
 38: extern PetscErrorCode VecSwap_SeqCUSP(Vec,Vec);
 39: extern PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec);
 40: extern PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec);
 41: extern PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec *);
 42: extern PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*);
 43: extern PetscErrorCode VecCUSPCopyToGPU(Vec);
 44: extern PetscErrorCode VecCUSPAllocateCheck(Vec);
 45: extern PetscErrorCode VecCUSPAllocateCheckHost(Vec);
 46: EXTERN_C_BEGIN
 47: extern PetscErrorCode  VecCreate_SeqCUSP(Vec);
 48: EXTERN_C_END
 49: extern PetscErrorCode VecView_Seq(Vec,PetscViewer);
 50: extern PetscErrorCode VecDestroy_SeqCUSP(Vec);
 51: extern PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec);
 52: extern PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom);

 54: extern PetscErrorCode VecCUSPCopyToGPU_Public(Vec);
 55: extern PetscErrorCode VecCUSPAllocateCheck_Public(Vec);

 57: #ifdef PETSC_HAVE_TXPETSCGPU
 58: #include "tx_vector_interface.h"
 59: #endif

 61: struct  _p_PetscCUSPIndices {
 62: #ifdef PETSC_HAVE_TXPETSCGPU
 63:   GPU_Indices<PetscInt, PetscScalar> * sendIndices;
 64:   GPU_Indices<PetscInt, PetscScalar> * recvIndices;
 65: #else
 66:   CUSPINTARRAYCPU sendIndicesCPU;
 67:   CUSPINTARRAYGPU sendIndicesGPU;

 69:   CUSPINTARRAYCPU recvIndicesCPU;
 70:   CUSPINTARRAYGPU recvIndicesGPU;
 71: #endif
 72: };

 74: #ifdef PETSC_HAVE_TXPETSCGPU
 75: extern PetscErrorCode VecCUSPCopySomeToContiguousBufferGPU(Vec, PetscCUSPIndices);
 76: extern PetscErrorCode VecCUSPCopySomeFromContiguousBufferGPU(Vec, PetscCUSPIndices);
 77: #endif

 79: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err)

 81: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0])

 83: #define WaitForGPU() PetscCUSPSynchronize ? cudaThreadSynchronize() : 0

 85: struct Vec_CUSP {
 86:   CUSPARRAY*       GPUarray;  /* this always holds the GPU data */
 87: #ifdef PETSC_HAVE_TXPETSCGPU
 88:   GPU_Vector<PetscInt, PetscScalar> * GPUvector; /* this always holds the GPU data */
 89: #endif
 90: };


 95: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayReadWrite(Vec v, CUSPARRAY** a)
 96: {

100:   *a   = 0;
101:   VecCUSPCopyToGPU(v);
102:   *a   = ((Vec_CUSP *)v->spptr)->GPUarray;
103:   return(0);
104: }

108: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayReadWrite(Vec v, CUSPARRAY** a)
109: {

113:   v->valid_GPU_array = PETSC_CUSP_GPU;
114:   PetscObjectStateIncrease((PetscObject)v);
115:   return(0);
116: }

120: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayRead(Vec v, CUSPARRAY** a)
121: {

125:   *a   = 0;
126:   VecCUSPCopyToGPU(v);
127:   *a   = ((Vec_CUSP *)v->spptr)->GPUarray;
128:   return(0);
129: }

133: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayRead(Vec v, CUSPARRAY** a)
134: {
136:   return(0);
137: }

141: PETSC_STATIC_INLINE PetscErrorCode VecCUSPGetArrayWrite(Vec v, CUSPARRAY** a)
142: {

146:   VecCUSPAllocateCheck(v);
147:   *a   = ((Vec_CUSP *)v->spptr)->GPUarray;
148:   return(0);
149: }

153: PETSC_STATIC_INLINE PetscErrorCode VecCUSPRestoreArrayWrite(Vec v, CUSPARRAY** a)
154: {

158:   v->valid_GPU_array = PETSC_CUSP_GPU;
159:   PetscObjectStateIncrease((PetscObject)v);
160:   return(0);
161: }
162: #endif