Actual source code: mpiaijcusp.cu

petsc-3.3-p5 2012-12-01
  1: #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/

  3: EXTERN_C_BEGIN
  6: PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJCUSP(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
  7: {
  8:   Mat_MPIAIJ     *b;
 10:   PetscInt       i;

 13:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
 14:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
 15:   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
 16:   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);

 18:   PetscLayoutSetUp(B->rmap);
 19:   PetscLayoutSetUp(B->cmap);
 20:   if (d_nnz) {
 21:     for (i=0; i<B->rmap->n; i++) {
 22:       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
 23:     }
 24:   }
 25:   if (o_nnz) {
 26:     for (i=0; i<B->rmap->n; i++) {
 27:       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
 28:     }
 29:   }
 30:   b = (Mat_MPIAIJ*)B->data;

 32:   if (!B->preallocated) {
 33:     /* Explicitly create 2 MATSEQAIJ matrices. */
 34:     MatCreate(PETSC_COMM_SELF,&b->A);
 35:     MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
 36:     MatSetType(b->A,MATSEQAIJCUSP);
 37:     PetscLogObjectParent(B,b->A);
 38:     MatCreate(PETSC_COMM_SELF,&b->B);
 39:     MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
 40:     MatSetType(b->B,MATSEQAIJCUSP);
 41:     PetscLogObjectParent(B,b->B);
 42:   }

 44:   MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
 45:   MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
 46:   B->preallocated = PETSC_TRUE;
 47:   return(0);
 48: }
 49: EXTERN_C_END
 52: PetscErrorCode  MatGetVecs_MPIAIJCUSP(Mat mat,Vec *right,Vec *left)
 53: {

 57:   if (right) {
 58:     VecCreate(((PetscObject)mat)->comm,right);
 59:     VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);
 60:     VecSetBlockSize(*right,mat->rmap->bs);
 61:     VecSetType(*right,VECCUSP);
 62:     PetscLayoutReference(mat->cmap,&(*right)->map);
 63:   }
 64:   if (left) {
 65:     VecCreate(((PetscObject)mat)->comm,left);
 66:     VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);
 67:     VecSetBlockSize(*left,mat->rmap->bs);
 68:     VecSetType(*left,VECCUSP);
 69:     PetscLayoutReference(mat->rmap,&(*left)->map);
 70:   }
 71:   return(0);
 72: }


 75: #ifdef PETSC_HAVE_TXPETSCGPU
 78: PetscErrorCode MatMult_MPIAIJCUSP(Mat A,Vec xx,Vec yy)
 79: {
 80:   // This multiplication sequence is different sequence
 81:   // than the CPU version. In particular, the diagonal block
 82:   // multiplication kernel is launched in one stream. Then,
 83:   // in a separate stream, the data transfers from DeviceToHost
 84:   // (with MPI messaging in between), then HostToDevice are
 85:   // launched. Once the data transfer stream is synchronized,
 86:   // to ensure messaging is complete, the MatMultAdd kerne
 87:   // is launched in the original (MatMult) stream to protect
 88:   // against race conditions.
 89:   //
 90:   // This sequence should only be called for GPU computation.
 91:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
 93:   PetscInt       nt;

 96:   VecGetLocalSize(xx,&nt);
 97:   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
 98:   VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);
 99:   (*a->A->ops->mult)(a->A,xx,yy);
100:   VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
101:   VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
102:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
103:   VecScatterFinalizeForGPU(a->Mvctx);
104:   return(0);
105: }
106: #endif 

108: EXTERN_C_BEGIN
109: PetscErrorCode  MatCreate_MPIAIJ(Mat);
110: EXTERN_C_END
111: PetscErrorCode MatSetValuesBatch_MPIAIJCUSP(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats);

113: EXTERN_C_BEGIN
116: PetscErrorCode  MatCreate_MPIAIJCUSP(Mat B)
117: {

121:   MatCreate_MPIAIJ(B);
122:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
123:                                      "MatMPIAIJSetPreallocation_MPIAIJCUSP",
124:                                       MatMPIAIJSetPreallocation_MPIAIJCUSP);
125:   B->ops->getvecs        = MatGetVecs_MPIAIJCUSP;
126:   B->ops->setvaluesbatch = MatSetValuesBatch_MPIAIJCUSP;
127: #ifdef PETSC_HAVE_TXPETSCGPU
128:   B->ops->mult        = MatMult_MPIAIJCUSP;
129: #endif
130:   PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCUSP);
131:   return(0);
132: }
133: EXTERN_C_END


138: PetscErrorCode  MatCreateAIJCUSP(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
139: {
141:   PetscMPIInt    size;

144:   MatCreate(comm,A);
145:   MatSetSizes(*A,m,n,M,N);
146:   MPI_Comm_size(comm,&size);
147:   if (size > 1) {
148:     MatSetType(*A,MATMPIAIJCUSP);
149:     MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
150:   } else {
151:     MatSetType(*A,MATSEQAIJCUSP);
152:     MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
153:   }
154:   return(0);
155: }

157: /*MC
158:    MATAIJCUSP - MATAIJCUSP = "aijcusp" - A matrix type to be used for sparse matrices.

160:    This matrix type is identical to MATSEQAIJCUSP when constructed with a single process communicator,
161:    and MATMPIAIJCUSP otherwise.  As a result, for single process communicators, 
162:   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 
163:   for communicators controlling multiple processes.  It is recommended that you call both of
164:   the above preallocation routines for simplicity.

166:    Options Database Keys:
167: . -mat_type mpiaijcusp - sets the matrix type to "mpiaijcusp" during a call to MatSetFromOptions()

169:   Level: beginner

171: .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSP, MATSEQAIJCUSP
172: M*/