Actual source code: axpy.c

petsc-3.8.3 2017-12-09
Report Typos and Errors

  2:  #include <petsc/private/matimpl.h>

  4: /*@
  5:    MatAXPY - Computes Y = a*X + Y.

  7:    Logically  Collective on Mat

  9:    Input Parameters:
 10: +  a - the scalar multiplier
 11: .  X - the first matrix
 12: .  Y - the second matrix
 13: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
 14:          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 16:    Level: intermediate

 18: .keywords: matrix, add

 20: .seealso: MatAYPX()
 21:  @*/
 22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 23: {
 25:   PetscInt       m1,m2,n1,n2;

 31:   MatGetSize(X,&m1,&n1);
 32:   MatGetSize(Y,&m2,&n2);
 33:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);

 35:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 36:   if (Y->ops->axpy) {
 37:     (*Y->ops->axpy)(Y,a,X,str);
 38:   } else {
 39:     MatAXPY_Basic(Y,a,X,str);
 40:   }
 41:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
 42: #if defined(PETSC_HAVE_CUSP)
 43:   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
 44:     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
 45:   }
 46: #elif defined(PETSC_HAVE_VIENNACL)
 47:   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
 48:     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
 49:   }
 50: #elif defined(PETSC_HAVE_VECCUDA)
 51:   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
 52:     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
 53:   }
 54: #endif
 55:   return(0);
 56: }

 58: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
 59: {
 60:   PetscInt          i,start,end,j,ncols,m,n;
 61:   PetscErrorCode    ierr;
 62:   const PetscInt    *row;
 63:   PetscScalar       *val;
 64:   const PetscScalar *vals;

 67:   MatGetSize(X,&m,&n);
 68:   MatGetOwnershipRange(X,&start,&end);
 69:   if (a == 1.0) {
 70:     for (i = start; i < end; i++) {
 71:       MatGetRow(X,i,&ncols,&row,&vals);
 72:       MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
 73:       MatRestoreRow(X,i,&ncols,&row,&vals);
 74:     }
 75:   } else {
 76:     PetscMalloc1(n+1,&val);
 77:     for (i=start; i<end; i++) {
 78:       MatGetRow(X,i,&ncols,&row,&vals);
 79:       for (j=0; j<ncols; j++) {
 80:         val[j] = a*vals[j];
 81:       }
 82:       MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
 83:       MatRestoreRow(X,i,&ncols,&row,&vals);
 84:     }
 85:     PetscFree(val);
 86:   }
 87:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
 88:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
 89:   return(0);
 90: }

 92: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
 93: {
 94:   PetscInt          i,start,end,j,ncols,m,n;
 95:   PetscErrorCode    ierr;
 96:   const PetscInt    *row;
 97:   PetscScalar       *val;
 98:   const PetscScalar *vals;

101:   MatGetSize(X,&m,&n);
102:   MatGetOwnershipRange(X,&start,&end);
103:   if (a == 1.0) {
104:     for (i = start; i < end; i++) {
105:       MatGetRow(Y,i,&ncols,&row,&vals);
106:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
107:       MatRestoreRow(Y,i,&ncols,&row,&vals);

109:       MatGetRow(X,i,&ncols,&row,&vals);
110:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
111:       MatRestoreRow(X,i,&ncols,&row,&vals);
112:     }
113:   } else {
114:     PetscMalloc1(n+1,&val);
115:     for (i=start; i<end; i++) {
116:       MatGetRow(Y,i,&ncols,&row,&vals);
117:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
118:       MatRestoreRow(Y,i,&ncols,&row,&vals);

120:       MatGetRow(X,i,&ncols,&row,&vals);
121:       for (j=0; j<ncols; j++) {
122:         val[j] = a*vals[j];
123:       }
124:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
125:       MatRestoreRow(X,i,&ncols,&row,&vals);
126:     }
127:     PetscFree(val);
128:   }
129:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
130:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
131:   return(0);
132: }

134: /*@
135:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

137:    Neighbor-wise Collective on Mat

139:    Input Parameters:
140: +  Y - the matrices
141: -  a - the PetscScalar

143:    Level: intermediate

145:    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
146:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
147:    entry).

149:    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
150:     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.

152: .keywords: matrix, add, shift

154: .seealso: MatDiagonalSet()
155:  @*/
156: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
157: {

162:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
163:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
164:   MatCheckPreallocated(Y,1);

166:   if (Y->ops->shift) {
167:     (*Y->ops->shift)(Y,a);
168:   } else {
169:     MatShift_Basic(Y,a);
170:   }

172: #if defined(PETSC_HAVE_CUSP)
173:   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
174:     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
175:   }
176: #elif defined(PETSC_HAVE_VIENNACL)
177:   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
178:     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
179:   }
180: #elif defined(PETSC_HAVE_VECCUDA)
181:   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
182:     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
183:   }
184: #endif
185:   return(0);
186: }

188: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
189: {
191:   PetscInt       i,start,end;
192:   PetscScalar    *v;

195:   MatGetOwnershipRange(Y,&start,&end);
196:   VecGetArray(D,&v);
197:   for (i=start; i<end; i++) {
198:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
199:   }
200:   VecRestoreArray(D,&v);
201:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
202:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
203:   return(0);
204: }

206: /*@
207:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
208:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
209:    INSERT_VALUES.

211:    Input Parameters:
212: +  Y - the input matrix
213: .  D - the diagonal matrix, represented as a vector
214: -  i - INSERT_VALUES or ADD_VALUES

216:    Neighbor-wise Collective on Mat and Vec

218:    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
219:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
220:    entry).

222:    Level: intermediate

224: .keywords: matrix, add, shift, diagonal

226: .seealso: MatShift()
227: @*/
228: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
229: {
231:   PetscInt       matlocal,veclocal;

236:   MatGetLocalSize(Y,&matlocal,NULL);
237:   VecGetLocalSize(D,&veclocal);
238:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
239:   if (Y->ops->diagonalset) {
240:     (*Y->ops->diagonalset)(Y,D,is);
241:   } else {
242:     MatDiagonalSet_Default(Y,D,is);
243:   }
244:   return(0);
245: }

247: /*@
248:    MatAYPX - Computes Y = a*Y + X.

250:    Logically on Mat

252:    Input Parameters:
253: +  a - the PetscScalar multiplier
254: .  Y - the first matrix
255: .  X - the second matrix
256: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN

258:    Level: intermediate

260: .keywords: matrix, add

262: .seealso: MatAXPY()
263:  @*/
264: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
265: {
266:   PetscScalar    one = 1.0;
268:   PetscInt       mX,mY,nX,nY;

274:   MatGetSize(X,&mX,&nX);
275:   MatGetSize(X,&mY,&nY);
276:   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);

278:   MatScale(Y,a);
279:   MatAXPY(Y,one,X,str);
280:   return(0);
281: }

283: /*@
284:     MatComputeExplicitOperator - Computes the explicit matrix

286:     Collective on Mat

288:     Input Parameter:
289: .   inmat - the matrix

291:     Output Parameter:
292: .   mat - the explict preconditioned operator

294:     Notes:
295:     This computation is done by applying the operators to columns of the
296:     identity matrix.

298:     Currently, this routine uses a dense matrix format when 1 processor
299:     is used and a sparse format otherwise.  This routine is costly in general,
300:     and is recommended for use only with relatively small systems.

302:     Level: advanced

304: .keywords: Mat, compute, explicit, operator
305: @*/
306: PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
307: {
308:   Vec            in,out;
310:   PetscInt       i,m,n,M,N,*rows,start,end;
311:   MPI_Comm       comm;
312:   PetscScalar    *array,zero = 0.0,one = 1.0;
313:   PetscMPIInt    size;


319:   PetscObjectGetComm((PetscObject)inmat,&comm);
320:   MPI_Comm_size(comm,&size);

322:   MatGetLocalSize(inmat,&m,&n);
323:   MatGetSize(inmat,&M,&N);
324:   MatCreateVecs(inmat,&in,&out);
325:   VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
326:   VecGetOwnershipRange(out,&start,&end);
327:   PetscMalloc1(m,&rows);
328:   for (i=0; i<m; i++) rows[i] = start + i;

330:   MatCreate(comm,mat);
331:   MatSetSizes(*mat,m,n,M,N);
332:   if (size == 1) {
333:     MatSetType(*mat,MATSEQDENSE);
334:     MatSeqDenseSetPreallocation(*mat,NULL);
335:   } else {
336:     MatSetType(*mat,MATMPIAIJ);
337:     MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
338:   }

340:   for (i=0; i<N; i++) {

342:     VecSet(in,zero);
343:     VecSetValues(in,1,&i,&one,INSERT_VALUES);
344:     VecAssemblyBegin(in);
345:     VecAssemblyEnd(in);

347:     MatMult(inmat,in,out);

349:     VecGetArray(out,&array);
350:     MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
351:     VecRestoreArray(out,&array);

353:   }
354:   PetscFree(rows);
355:   VecDestroy(&out);
356:   VecDestroy(&in);
357:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
358:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
359:   return(0);
360: }

362: /*@
363:   MatChop - Set all values in the matrix less than the tolerance to zero

365:   Input Parameters:
366: + A   - The matrix
367: - tol - The zero tolerance

369:   Output Parameters:
370: . A - The chopped matrix

372:   Level: intermediate

374: .seealso: MatCreate(), MatZeroEntries()
375:  @*/
376: PetscErrorCode MatChop(Mat A, PetscReal tol)
377: {
378:   PetscScalar    *newVals;
379:   PetscInt       *newCols;
380:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

384:   MatGetOwnershipRange(A, &rStart, &rEnd);
385:   for (r = rStart; r < rEnd; ++r) {
386:     PetscInt ncols;

388:     MatGetRow(A, r, &ncols, NULL, NULL);
389:     colMax = PetscMax(colMax, ncols);
390:     MatRestoreRow(A, r, &ncols, NULL, NULL);
391:   }
392:   numRows = rEnd - rStart;
393:   MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
394:   PetscMalloc2(colMax,&newCols,colMax,&newVals);
395:   for (r = rStart; r < rStart+maxRows; ++r) {
396:     const PetscScalar *vals;
397:     const PetscInt    *cols;
398:     PetscInt           ncols, newcols, c;

400:     if (r < rEnd) {
401:       MatGetRow(A, r, &ncols, &cols, &vals);
402:       for (c = 0; c < ncols; ++c) {
403:         newCols[c] = cols[c];
404:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
405:       }
406:       newcols = ncols;
407:       MatRestoreRow(A, r, &ncols, &cols, &vals);
408:       MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
409:     }
410:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
411:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
412:   }
413:   PetscFree2(newCols,newVals);
414:   return(0);
415: }