Actual source code: axpy.c

petsc-3.7.0 2016-04-25
Report Typos and Errors
  2: #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/

  6: /*@
  7:    MatAXPY - Computes Y = a*X + Y.

  9:    Logically  Collective on Mat

 11:    Input Parameters:
 12: +  a - the scalar multiplier
 13: .  X - the first matrix
 14: .  Y - the second matrix
 15: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
 16:          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 18:    Level: intermediate

 20: .keywords: matrix, add

 22: .seealso: MatAYPX()
 23:  @*/
 24: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 25: {
 27:   PetscInt       m1,m2,n1,n2;

 33:   MatGetSize(X,&m1,&n1);
 34:   MatGetSize(Y,&m2,&n2);
 35:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);

 37:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 38:   if (Y->ops->axpy) {
 39:     (*Y->ops->axpy)(Y,a,X,str);
 40:   } else {
 41:     MatAXPY_Basic(Y,a,X,str);
 42:   }
 43:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
 44: #if defined(PETSC_HAVE_CUSP)
 45:   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
 46:     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
 47:   }
 48: #elif defined(PETSC_HAVE_VIENNACL)
 49:   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
 50:     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
 51:   }
 52: #elif defined(PETSC_HAVE_VECCUDA)
 53:   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
 54:     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
 55:   }
 56: #endif
 57:   return(0);
 58: }

 62: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
 63: {
 64:   PetscInt          i,start,end,j,ncols,m,n;
 65:   PetscErrorCode    ierr;
 66:   const PetscInt    *row;
 67:   PetscScalar       *val;
 68:   const PetscScalar *vals;

 71:   MatGetSize(X,&m,&n);
 72:   MatGetOwnershipRange(X,&start,&end);
 73:   if (a == 1.0) {
 74:     for (i = start; i < end; i++) {
 75:       MatGetRow(X,i,&ncols,&row,&vals);
 76:       MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
 77:       MatRestoreRow(X,i,&ncols,&row,&vals);
 78:     }
 79:   } else {
 80:     PetscMalloc1(n+1,&val);
 81:     for (i=start; i<end; i++) {
 82:       MatGetRow(X,i,&ncols,&row,&vals);
 83:       for (j=0; j<ncols; j++) {
 84:         val[j] = a*vals[j];
 85:       }
 86:       MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
 87:       MatRestoreRow(X,i,&ncols,&row,&vals);
 88:     }
 89:     PetscFree(val);
 90:   }
 91:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
 92:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
 93:   return(0);
 94: }

 98: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
 99: {
100:   PetscInt          i,start,end,j,ncols,m,n;
101:   PetscErrorCode    ierr;
102:   const PetscInt    *row;
103:   PetscScalar       *val;
104:   const PetscScalar *vals;

107:   MatGetSize(X,&m,&n);
108:   MatGetOwnershipRange(X,&start,&end);
109:   if (a == 1.0) {
110:     for (i = start; i < end; i++) {
111:       MatGetRow(Y,i,&ncols,&row,&vals);
112:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
113:       MatRestoreRow(Y,i,&ncols,&row,&vals);

115:       MatGetRow(X,i,&ncols,&row,&vals);
116:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
117:       MatRestoreRow(X,i,&ncols,&row,&vals);
118:     }
119:   } else {
120:     PetscMalloc1(n+1,&val);
121:     for (i=start; i<end; i++) {
122:       MatGetRow(Y,i,&ncols,&row,&vals);
123:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
124:       MatRestoreRow(Y,i,&ncols,&row,&vals);

126:       MatGetRow(X,i,&ncols,&row,&vals);
127:       for (j=0; j<ncols; j++) {
128:         val[j] = a*vals[j];
129:       }
130:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
131:       MatRestoreRow(X,i,&ncols,&row,&vals);
132:     }
133:     PetscFree(val);
134:   }
135:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
136:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
137:   return(0);
138: }

142: /*@
143:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

145:    Neighbor-wise Collective on Mat

147:    Input Parameters:
148: +  Y - the matrices
149: -  a - the PetscScalar

151:    Level: intermediate

153:    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
154:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
155:    entry).

157:    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
158:     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.

160: .keywords: matrix, add, shift

162: .seealso: MatDiagonalSet()
163:  @*/
164: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
165: {

170:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
171:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
172:   MatCheckPreallocated(Y,1);

174:   (*Y->ops->shift)(Y,a);

176: #if defined(PETSC_HAVE_CUSP)
177:   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
178:     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
179:   }
180: #elif defined(PETSC_HAVE_VIENNACL)
181:   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
182:     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
183:   }
184: #elif defined(PETSC_HAVE_VECCUDA)
185:   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
186:     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
187:   }
188: #endif
189:   return(0);
190: }

194: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
195: {
197:   PetscInt       i,start,end;
198:   PetscScalar    *v;

201:   MatGetOwnershipRange(Y,&start,&end);
202:   VecGetArray(D,&v);
203:   for (i=start; i<end; i++) {
204:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
205:   }
206:   VecRestoreArray(D,&v);
207:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
208:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
209:   return(0);
210: }

214: /*@
215:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
216:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
217:    INSERT_VALUES.

219:    Input Parameters:
220: +  Y - the input matrix
221: .  D - the diagonal matrix, represented as a vector
222: -  i - INSERT_VALUES or ADD_VALUES

224:    Neighbor-wise Collective on Mat and Vec

226:    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
227:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
228:    entry).

230:    Level: intermediate

232: .keywords: matrix, add, shift, diagonal

234: .seealso: MatShift()
235: @*/
236: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
237: {
239:   PetscInt       matlocal,veclocal;

244:   MatGetLocalSize(Y,&matlocal,NULL);
245:   VecGetLocalSize(D,&veclocal);
246:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
247:   if (Y->ops->diagonalset) {
248:     (*Y->ops->diagonalset)(Y,D,is);
249:   } else {
250:     MatDiagonalSet_Default(Y,D,is);
251:   }
252:   return(0);
253: }

257: /*@
258:    MatAYPX - Computes Y = a*Y + X.

260:    Logically on Mat

262:    Input Parameters:
263: +  a - the PetscScalar multiplier
264: .  Y - the first matrix
265: .  X - the second matrix
266: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN

268:    Level: intermediate

270: .keywords: matrix, add

272: .seealso: MatAXPY()
273:  @*/
274: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
275: {
276:   PetscScalar    one = 1.0;
278:   PetscInt       mX,mY,nX,nY;

284:   MatGetSize(X,&mX,&nX);
285:   MatGetSize(X,&mY,&nY);
286:   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);

288:   MatScale(Y,a);
289:   MatAXPY(Y,one,X,str);
290:   return(0);
291: }

295: /*@
296:     MatComputeExplicitOperator - Computes the explicit matrix

298:     Collective on Mat

300:     Input Parameter:
301: .   inmat - the matrix

303:     Output Parameter:
304: .   mat - the explict preconditioned operator

306:     Notes:
307:     This computation is done by applying the operators to columns of the
308:     identity matrix.

310:     Currently, this routine uses a dense matrix format when 1 processor
311:     is used and a sparse format otherwise.  This routine is costly in general,
312:     and is recommended for use only with relatively small systems.

314:     Level: advanced

316: .keywords: Mat, compute, explicit, operator
317: @*/
318: PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
319: {
320:   Vec            in,out;
322:   PetscInt       i,m,n,M,N,*rows,start,end;
323:   MPI_Comm       comm;
324:   PetscScalar    *array,zero = 0.0,one = 1.0;
325:   PetscMPIInt    size;


331:   PetscObjectGetComm((PetscObject)inmat,&comm);
332:   MPI_Comm_size(comm,&size);

334:   MatGetLocalSize(inmat,&m,&n);
335:   MatGetSize(inmat,&M,&N);
336:   MatCreateVecs(inmat,&in,&out);
337:   VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
338:   VecGetOwnershipRange(out,&start,&end);
339:   PetscMalloc1(m,&rows);
340:   for (i=0; i<m; i++) rows[i] = start + i;

342:   MatCreate(comm,mat);
343:   MatSetSizes(*mat,m,n,M,N);
344:   if (size == 1) {
345:     MatSetType(*mat,MATSEQDENSE);
346:     MatSeqDenseSetPreallocation(*mat,NULL);
347:   } else {
348:     MatSetType(*mat,MATMPIAIJ);
349:     MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
350:   }

352:   for (i=0; i<N; i++) {

354:     VecSet(in,zero);
355:     VecSetValues(in,1,&i,&one,INSERT_VALUES);
356:     VecAssemblyBegin(in);
357:     VecAssemblyEnd(in);

359:     MatMult(inmat,in,out);

361:     VecGetArray(out,&array);
362:     MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
363:     VecRestoreArray(out,&array);

365:   }
366:   PetscFree(rows);
367:   VecDestroy(&out);
368:   VecDestroy(&in);
369:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
370:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
371:   return(0);
372: }

376: /*@
377:   MatChop - Set all values in the matrix less than the tolerance to zero

379:   Input Parameters:
380: + A   - The matrix
381: - tol - The zero tolerance

383:   Output Parameters:
384: . A - The chopped matrix

386:   Level: intermediate

388: .seealso: MatCreate(), MatZeroEntries()
389:  @*/
390: PetscErrorCode MatChop(Mat A, PetscReal tol)
391: {
392:   PetscScalar    *newVals;
393:   PetscInt       *newCols;
394:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

398:   MatGetOwnershipRange(A, &rStart, &rEnd);
399:   for (r = rStart; r < rEnd; ++r) {
400:     PetscInt ncols;

402:     MatGetRow(A, r, &ncols, NULL, NULL);
403:     colMax = PetscMax(colMax, ncols);
404:     MatRestoreRow(A, r, &ncols, NULL, NULL);
405:   }
406:   numRows = rEnd - rStart;
407:   MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
408:   PetscMalloc2(colMax,&newCols,colMax,&newVals);
409:   for (r = rStart; r < rStart+maxRows; ++r) {
410:     const PetscScalar *vals;
411:     const PetscInt    *cols;
412:     PetscInt           ncols, newcols, c;

414:     if (r < rEnd) {
415:       MatGetRow(A, r, &ncols, &cols, &vals);
416:       for (c = 0; c < ncols; ++c) {
417:         newCols[c] = cols[c];
418:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
419:       }
420:       newcols = ncols;
421:       MatRestoreRow(A, r, &ncols, &cols, &vals);
422:       MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
423:     }
424:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
425:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
426:   }
427:   PetscFree2(newCols,newVals);
428:   return(0);
429: }