Actual source code: axpy.c

petsc-3.7.4 2016-10-02
Report Typos and Errors
  2: #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/

  6: /*@
  7:    MatAXPY - Computes Y = a*X + Y.

  9:    Logically  Collective on Mat

 11:    Input Parameters:
 12: +  a - the scalar multiplier
 13: .  X - the first matrix
 14: .  Y - the second matrix
 15: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
 16:          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 18:    Level: intermediate

 20: .keywords: matrix, add

 22: .seealso: MatAYPX()
 23:  @*/
 24: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 25: {
 27:   PetscInt       m1,m2,n1,n2;

 33:   MatGetSize(X,&m1,&n1);
 34:   MatGetSize(Y,&m2,&n2);
 35:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);

 37:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 38:   if (Y->ops->axpy) {
 39:     (*Y->ops->axpy)(Y,a,X,str);
 40:   } else {
 41:     MatAXPY_Basic(Y,a,X,str);
 42:   }
 43:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
 44: #if defined(PETSC_HAVE_CUSP)
 45:   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
 46:     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
 47:   }
 48: #elif defined(PETSC_HAVE_VIENNACL)
 49:   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
 50:     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
 51:   }
 52: #elif defined(PETSC_HAVE_VECCUDA)
 53:   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
 54:     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
 55:   }
 56: #endif
 57:   return(0);
 58: }

 62: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
 63: {
 64:   PetscInt          i,start,end,j,ncols,m,n;
 65:   PetscErrorCode    ierr;
 66:   const PetscInt    *row;
 67:   PetscScalar       *val;
 68:   const PetscScalar *vals;

 71:   MatGetSize(X,&m,&n);
 72:   MatGetOwnershipRange(X,&start,&end);
 73:   if (a == 1.0) {
 74:     for (i = start; i < end; i++) {
 75:       MatGetRow(X,i,&ncols,&row,&vals);
 76:       MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
 77:       MatRestoreRow(X,i,&ncols,&row,&vals);
 78:     }
 79:   } else {
 80:     PetscMalloc1(n+1,&val);
 81:     for (i=start; i<end; i++) {
 82:       MatGetRow(X,i,&ncols,&row,&vals);
 83:       for (j=0; j<ncols; j++) {
 84:         val[j] = a*vals[j];
 85:       }
 86:       MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
 87:       MatRestoreRow(X,i,&ncols,&row,&vals);
 88:     }
 89:     PetscFree(val);
 90:   }
 91:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
 92:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
 93:   return(0);
 94: }

 98: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
 99: {
100:   PetscInt          i,start,end,j,ncols,m,n;
101:   PetscErrorCode    ierr;
102:   const PetscInt    *row;
103:   PetscScalar       *val;
104:   const PetscScalar *vals;

107:   MatGetSize(X,&m,&n);
108:   MatGetOwnershipRange(X,&start,&end);
109:   if (a == 1.0) {
110:     for (i = start; i < end; i++) {
111:       MatGetRow(Y,i,&ncols,&row,&vals);
112:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
113:       MatRestoreRow(Y,i,&ncols,&row,&vals);

115:       MatGetRow(X,i,&ncols,&row,&vals);
116:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
117:       MatRestoreRow(X,i,&ncols,&row,&vals);
118:     }
119:   } else {
120:     PetscMalloc1(n+1,&val);
121:     for (i=start; i<end; i++) {
122:       MatGetRow(Y,i,&ncols,&row,&vals);
123:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
124:       MatRestoreRow(Y,i,&ncols,&row,&vals);

126:       MatGetRow(X,i,&ncols,&row,&vals);
127:       for (j=0; j<ncols; j++) {
128:         val[j] = a*vals[j];
129:       }
130:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
131:       MatRestoreRow(X,i,&ncols,&row,&vals);
132:     }
133:     PetscFree(val);
134:   }
135:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
136:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
137:   return(0);
138: }

142: /*@
143:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

145:    Neighbor-wise Collective on Mat

147:    Input Parameters:
148: +  Y - the matrices
149: -  a - the PetscScalar

151:    Level: intermediate

153:    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
154:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
155:    entry).

157:    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
158:     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.

160: .keywords: matrix, add, shift

162: .seealso: MatDiagonalSet()
163:  @*/
164: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
165: {

170:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
171:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
172:   MatCheckPreallocated(Y,1);

174:   if (Y->ops->shift) {
175:     (*Y->ops->shift)(Y,a);
176:   } else {
177:     MatShift_Basic(Y,a);
178:   }

180: #if defined(PETSC_HAVE_CUSP)
181:   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
182:     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
183:   }
184: #elif defined(PETSC_HAVE_VIENNACL)
185:   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
186:     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
187:   }
188: #elif defined(PETSC_HAVE_VECCUDA)
189:   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
190:     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
191:   }
192: #endif
193:   return(0);
194: }

198: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
199: {
201:   PetscInt       i,start,end;
202:   PetscScalar    *v;

205:   MatGetOwnershipRange(Y,&start,&end);
206:   VecGetArray(D,&v);
207:   for (i=start; i<end; i++) {
208:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
209:   }
210:   VecRestoreArray(D,&v);
211:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
212:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
213:   return(0);
214: }

218: /*@
219:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
220:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
221:    INSERT_VALUES.

223:    Input Parameters:
224: +  Y - the input matrix
225: .  D - the diagonal matrix, represented as a vector
226: -  i - INSERT_VALUES or ADD_VALUES

228:    Neighbor-wise Collective on Mat and Vec

230:    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
231:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
232:    entry).

234:    Level: intermediate

236: .keywords: matrix, add, shift, diagonal

238: .seealso: MatShift()
239: @*/
240: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
241: {
243:   PetscInt       matlocal,veclocal;

248:   MatGetLocalSize(Y,&matlocal,NULL);
249:   VecGetLocalSize(D,&veclocal);
250:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
251:   if (Y->ops->diagonalset) {
252:     (*Y->ops->diagonalset)(Y,D,is);
253:   } else {
254:     MatDiagonalSet_Default(Y,D,is);
255:   }
256:   return(0);
257: }

261: /*@
262:    MatAYPX - Computes Y = a*Y + X.

264:    Logically on Mat

266:    Input Parameters:
267: +  a - the PetscScalar multiplier
268: .  Y - the first matrix
269: .  X - the second matrix
270: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN

272:    Level: intermediate

274: .keywords: matrix, add

276: .seealso: MatAXPY()
277:  @*/
278: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
279: {
280:   PetscScalar    one = 1.0;
282:   PetscInt       mX,mY,nX,nY;

288:   MatGetSize(X,&mX,&nX);
289:   MatGetSize(X,&mY,&nY);
290:   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);

292:   MatScale(Y,a);
293:   MatAXPY(Y,one,X,str);
294:   return(0);
295: }

299: /*@
300:     MatComputeExplicitOperator - Computes the explicit matrix

302:     Collective on Mat

304:     Input Parameter:
305: .   inmat - the matrix

307:     Output Parameter:
308: .   mat - the explict preconditioned operator

310:     Notes:
311:     This computation is done by applying the operators to columns of the
312:     identity matrix.

314:     Currently, this routine uses a dense matrix format when 1 processor
315:     is used and a sparse format otherwise.  This routine is costly in general,
316:     and is recommended for use only with relatively small systems.

318:     Level: advanced

320: .keywords: Mat, compute, explicit, operator
321: @*/
322: PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
323: {
324:   Vec            in,out;
326:   PetscInt       i,m,n,M,N,*rows,start,end;
327:   MPI_Comm       comm;
328:   PetscScalar    *array,zero = 0.0,one = 1.0;
329:   PetscMPIInt    size;


335:   PetscObjectGetComm((PetscObject)inmat,&comm);
336:   MPI_Comm_size(comm,&size);

338:   MatGetLocalSize(inmat,&m,&n);
339:   MatGetSize(inmat,&M,&N);
340:   MatCreateVecs(inmat,&in,&out);
341:   VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
342:   VecGetOwnershipRange(out,&start,&end);
343:   PetscMalloc1(m,&rows);
344:   for (i=0; i<m; i++) rows[i] = start + i;

346:   MatCreate(comm,mat);
347:   MatSetSizes(*mat,m,n,M,N);
348:   if (size == 1) {
349:     MatSetType(*mat,MATSEQDENSE);
350:     MatSeqDenseSetPreallocation(*mat,NULL);
351:   } else {
352:     MatSetType(*mat,MATMPIAIJ);
353:     MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
354:   }

356:   for (i=0; i<N; i++) {

358:     VecSet(in,zero);
359:     VecSetValues(in,1,&i,&one,INSERT_VALUES);
360:     VecAssemblyBegin(in);
361:     VecAssemblyEnd(in);

363:     MatMult(inmat,in,out);

365:     VecGetArray(out,&array);
366:     MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
367:     VecRestoreArray(out,&array);

369:   }
370:   PetscFree(rows);
371:   VecDestroy(&out);
372:   VecDestroy(&in);
373:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
374:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
375:   return(0);
376: }

380: /*@
381:   MatChop - Set all values in the matrix less than the tolerance to zero

383:   Input Parameters:
384: + A   - The matrix
385: - tol - The zero tolerance

387:   Output Parameters:
388: . A - The chopped matrix

390:   Level: intermediate

392: .seealso: MatCreate(), MatZeroEntries()
393:  @*/
394: PetscErrorCode MatChop(Mat A, PetscReal tol)
395: {
396:   PetscScalar    *newVals;
397:   PetscInt       *newCols;
398:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

402:   MatGetOwnershipRange(A, &rStart, &rEnd);
403:   for (r = rStart; r < rEnd; ++r) {
404:     PetscInt ncols;

406:     MatGetRow(A, r, &ncols, NULL, NULL);
407:     colMax = PetscMax(colMax, ncols);
408:     MatRestoreRow(A, r, &ncols, NULL, NULL);
409:   }
410:   numRows = rEnd - rStart;
411:   MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
412:   PetscMalloc2(colMax,&newCols,colMax,&newVals);
413:   for (r = rStart; r < rStart+maxRows; ++r) {
414:     const PetscScalar *vals;
415:     const PetscInt    *cols;
416:     PetscInt           ncols, newcols, c;

418:     if (r < rEnd) {
419:       MatGetRow(A, r, &ncols, &cols, &vals);
420:       for (c = 0; c < ncols; ++c) {
421:         newCols[c] = cols[c];
422:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
423:       }
424:       newcols = ncols;
425:       MatRestoreRow(A, r, &ncols, &cols, &vals);
426:       MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
427:     }
428:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
429:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
430:   }
431:   PetscFree2(newCols,newVals);
432:   return(0);
433: }