Actual source code: mpiaijcusp.cu
petsc-3.3-p5 2012-12-01
1: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: EXTERN_C_BEGIN
6: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJCUSP(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
7: {
8: Mat_MPIAIJ *b;
10: PetscInt i;
13: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
14: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
15: if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
16: if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
18: PetscLayoutSetUp(B->rmap);
19: PetscLayoutSetUp(B->cmap);
20: if (d_nnz) {
21: for (i=0; i<B->rmap->n; i++) {
22: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
23: }
24: }
25: if (o_nnz) {
26: for (i=0; i<B->rmap->n; i++) {
27: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
28: }
29: }
30: b = (Mat_MPIAIJ*)B->data;
32: if (!B->preallocated) {
33: /* Explicitly create 2 MATSEQAIJ matrices. */
34: MatCreate(PETSC_COMM_SELF,&b->A);
35: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
36: MatSetType(b->A,MATSEQAIJCUSP);
37: PetscLogObjectParent(B,b->A);
38: MatCreate(PETSC_COMM_SELF,&b->B);
39: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
40: MatSetType(b->B,MATSEQAIJCUSP);
41: PetscLogObjectParent(B,b->B);
42: }
44: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
45: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
46: B->preallocated = PETSC_TRUE;
47: return(0);
48: }
49: EXTERN_C_END
52: PetscErrorCode MatGetVecs_MPIAIJCUSP(Mat mat,Vec *right,Vec *left)
53: {
57: if (right) {
58: VecCreate(((PetscObject)mat)->comm,right);
59: VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);
60: VecSetBlockSize(*right,mat->rmap->bs);
61: VecSetType(*right,VECCUSP);
62: PetscLayoutReference(mat->cmap,&(*right)->map);
63: }
64: if (left) {
65: VecCreate(((PetscObject)mat)->comm,left);
66: VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);
67: VecSetBlockSize(*left,mat->rmap->bs);
68: VecSetType(*left,VECCUSP);
69: PetscLayoutReference(mat->rmap,&(*left)->map);
70: }
71: return(0);
72: }
75: #ifdef PETSC_HAVE_TXPETSCGPU
78: PetscErrorCode MatMult_MPIAIJCUSP(Mat A,Vec xx,Vec yy)
79: {
80: // This multiplication sequence is different sequence
81: // than the CPU version. In particular, the diagonal block
82: // multiplication kernel is launched in one stream. Then,
83: // in a separate stream, the data transfers from DeviceToHost
84: // (with MPI messaging in between), then HostToDevice are
85: // launched. Once the data transfer stream is synchronized,
86: // to ensure messaging is complete, the MatMultAdd kerne
87: // is launched in the original (MatMult) stream to protect
88: // against race conditions.
89: //
90: // This sequence should only be called for GPU computation.
91: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
93: PetscInt nt;
96: VecGetLocalSize(xx,&nt);
97: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
98: VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);
99: (*a->A->ops->mult)(a->A,xx,yy);
100: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
101: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
102: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
103: VecScatterFinalizeForGPU(a->Mvctx);
104: return(0);
105: }
106: #endif
108: EXTERN_C_BEGIN
109: PetscErrorCode MatCreate_MPIAIJ(Mat);
110: EXTERN_C_END
111: PetscErrorCode MatSetValuesBatch_MPIAIJCUSP(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats);
113: EXTERN_C_BEGIN
116: PetscErrorCode MatCreate_MPIAIJCUSP(Mat B)
117: {
121: MatCreate_MPIAIJ(B);
122: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
123: "MatMPIAIJSetPreallocation_MPIAIJCUSP",
124: MatMPIAIJSetPreallocation_MPIAIJCUSP);
125: B->ops->getvecs = MatGetVecs_MPIAIJCUSP;
126: B->ops->setvaluesbatch = MatSetValuesBatch_MPIAIJCUSP;
127: #ifdef PETSC_HAVE_TXPETSCGPU
128: B->ops->mult = MatMult_MPIAIJCUSP;
129: #endif
130: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCUSP);
131: return(0);
132: }
133: EXTERN_C_END
138: PetscErrorCode MatCreateAIJCUSP(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
139: {
141: PetscMPIInt size;
144: MatCreate(comm,A);
145: MatSetSizes(*A,m,n,M,N);
146: MPI_Comm_size(comm,&size);
147: if (size > 1) {
148: MatSetType(*A,MATMPIAIJCUSP);
149: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
150: } else {
151: MatSetType(*A,MATSEQAIJCUSP);
152: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
153: }
154: return(0);
155: }
157: /*MC
158: MATAIJCUSP - MATAIJCUSP = "aijcusp" - A matrix type to be used for sparse matrices.
160: This matrix type is identical to MATSEQAIJCUSP when constructed with a single process communicator,
161: and MATMPIAIJCUSP otherwise. As a result, for single process communicators,
162: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
163: for communicators controlling multiple processes. It is recommended that you call both of
164: the above preallocation routines for simplicity.
166: Options Database Keys:
167: . -mat_type mpiaijcusp - sets the matrix type to "mpiaijcusp" during a call to MatSetFromOptions()
169: Level: beginner
171: .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSP, MATSEQAIJCUSP
172: M*/