Actual source code: axpy.c

petsc-3.10.3 2018-12-18
Report Typos and Errors

  2:  #include <petsc/private/matimpl.h>

  4: /*@
  5:    MatAXPY - Computes Y = a*X + Y.

  7:    Logically  Collective on Mat

  9:    Input Parameters:
 10: +  a - the scalar multiplier
 11: .  X - the first matrix
 12: .  Y - the second matrix
 13: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
 14:          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 16:    Level: intermediate

 18: .keywords: matrix, add

 20: .seealso: MatAYPX()
 21:  @*/
 22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 23: {
 25:   PetscInt       m1,m2,n1,n2;
 26:   PetscBool      sametype;

 32:   MatGetSize(X,&m1,&n1);
 33:   MatGetSize(Y,&m2,&n2);
 34:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);

 36:   PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
 37:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 38:   if (Y->ops->axpy && sametype) {
 39:     (*Y->ops->axpy)(Y,a,X,str);
 40:   } else {
 41:     MatAXPY_Basic(Y,a,X,str);
 42:   }
 43:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
 44: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
 45:   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
 46:     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
 47:   }
 48: #endif
 49:   return(0);
 50: }

 52: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
 53: {
 54:   PetscInt          i,start,end,j,ncols,m,n;
 55:   PetscErrorCode    ierr;
 56:   const PetscInt    *row;
 57:   PetscScalar       *val;
 58:   const PetscScalar *vals;

 61:   MatGetSize(X,&m,&n);
 62:   MatGetOwnershipRange(X,&start,&end);
 63:   if (a == 1.0) {
 64:     for (i = start; i < end; i++) {
 65:       MatGetRow(X,i,&ncols,&row,&vals);
 66:       MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
 67:       MatRestoreRow(X,i,&ncols,&row,&vals);
 68:     }
 69:   } else {
 70:     PetscMalloc1(n+1,&val);
 71:     for (i=start; i<end; i++) {
 72:       MatGetRow(X,i,&ncols,&row,&vals);
 73:       for (j=0; j<ncols; j++) {
 74:         val[j] = a*vals[j];
 75:       }
 76:       MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
 77:       MatRestoreRow(X,i,&ncols,&row,&vals);
 78:     }
 79:     PetscFree(val);
 80:   }
 81:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
 82:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
 83:   return(0);
 84: }

 86: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
 87: {
 88:   PetscInt          i,start,end,j,ncols,m,n;
 89:   PetscErrorCode    ierr;
 90:   const PetscInt    *row;
 91:   PetscScalar       *val;
 92:   const PetscScalar *vals;

 95:   MatGetSize(X,&m,&n);
 96:   MatGetOwnershipRange(X,&start,&end);
 97:   if (a == 1.0) {
 98:     for (i = start; i < end; i++) {
 99:       MatGetRow(Y,i,&ncols,&row,&vals);
100:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
101:       MatRestoreRow(Y,i,&ncols,&row,&vals);

103:       MatGetRow(X,i,&ncols,&row,&vals);
104:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
105:       MatRestoreRow(X,i,&ncols,&row,&vals);
106:     }
107:   } else {
108:     PetscMalloc1(n+1,&val);
109:     for (i=start; i<end; i++) {
110:       MatGetRow(Y,i,&ncols,&row,&vals);
111:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
112:       MatRestoreRow(Y,i,&ncols,&row,&vals);

114:       MatGetRow(X,i,&ncols,&row,&vals);
115:       for (j=0; j<ncols; j++) {
116:         val[j] = a*vals[j];
117:       }
118:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
119:       MatRestoreRow(X,i,&ncols,&row,&vals);
120:     }
121:     PetscFree(val);
122:   }
123:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
124:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
125:   return(0);
126: }

128: /*@
129:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

131:    Neighbor-wise Collective on Mat

133:    Input Parameters:
134: +  Y - the matrices
135: -  a - the PetscScalar

137:    Level: intermediate

139:    Notes:
140:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
141:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
142:    entry).

144:    To form Y = Y + diag(V) use MatDiagonalSet()

146:    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
147:     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.

149: .keywords: matrix, add, shift

151: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
152:  @*/
153: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
154: {

159:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
160:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
161:   MatCheckPreallocated(Y,1);

163:   if (Y->ops->shift) {
164:     (*Y->ops->shift)(Y,a);
165:   } else {
166:     MatShift_Basic(Y,a);
167:   }

169:   PetscObjectStateIncrease((PetscObject)Y);
170: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
171:   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
172:     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
173:   }
174: #endif
175:   return(0);
176: }

178: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
179: {
181:   PetscInt       i,start,end;
182:   PetscScalar    *v;

185:   MatGetOwnershipRange(Y,&start,&end);
186:   VecGetArray(D,&v);
187:   for (i=start; i<end; i++) {
188:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
189:   }
190:   VecRestoreArray(D,&v);
191:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
192:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
193:   return(0);
194: }

196: /*@
197:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
198:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
199:    INSERT_VALUES.

201:    Input Parameters:
202: +  Y - the input matrix
203: .  D - the diagonal matrix, represented as a vector
204: -  i - INSERT_VALUES or ADD_VALUES

206:    Neighbor-wise Collective on Mat and Vec

208:    Notes:
209:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
210:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
211:    entry).

213:    Level: intermediate

215: .keywords: matrix, add, shift, diagonal

217: .seealso: MatShift(), MatScale(), MatDiagonalScale()
218: @*/
219: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
220: {
222:   PetscInt       matlocal,veclocal;

227:   MatGetLocalSize(Y,&matlocal,NULL);
228:   VecGetLocalSize(D,&veclocal);
229:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
230:   if (Y->ops->diagonalset) {
231:     (*Y->ops->diagonalset)(Y,D,is);
232:   } else {
233:     MatDiagonalSet_Default(Y,D,is);
234:   }
235:   PetscObjectStateIncrease((PetscObject)Y);
236:   return(0);
237: }

239: /*@
240:    MatAYPX - Computes Y = a*Y + X.

242:    Logically on Mat

244:    Input Parameters:
245: +  a - the PetscScalar multiplier
246: .  Y - the first matrix
247: .  X - the second matrix
248: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN

250:    Level: intermediate

252: .keywords: matrix, add

254: .seealso: MatAXPY()
255:  @*/
256: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
257: {
258:   PetscScalar    one = 1.0;
260:   PetscInt       mX,mY,nX,nY;

266:   MatGetSize(X,&mX,&nX);
267:   MatGetSize(X,&mY,&nY);
268:   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);

270:   MatScale(Y,a);
271:   MatAXPY(Y,one,X,str);
272:   return(0);
273: }

275: /*@
276:     MatComputeExplicitOperator - Computes the explicit matrix

278:     Collective on Mat

280:     Input Parameter:
281: .   inmat - the matrix

283:     Output Parameter:
284: .   mat - the explict  operator

286:     Notes:
287:     This computation is done by applying the operators to columns of the
288:     identity matrix.

290:     Currently, this routine uses a dense matrix format when 1 processor
291:     is used and a sparse format otherwise.  This routine is costly in general,
292:     and is recommended for use only with relatively small systems.

294:     Level: advanced

296: .keywords: Mat, compute, explicit, operator
297: @*/
298: PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
299: {
300:   Vec            in,out;
302:   PetscInt       i,m,n,M,N,*rows,start,end;
303:   MPI_Comm       comm;
304:   PetscScalar    *array,zero = 0.0,one = 1.0;
305:   PetscMPIInt    size;


311:   PetscObjectGetComm((PetscObject)inmat,&comm);
312:   MPI_Comm_size(comm,&size);

314:   MatGetLocalSize(inmat,&m,&n);
315:   MatGetSize(inmat,&M,&N);
316:   MatCreateVecs(inmat,&in,&out);
317:   VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
318:   VecGetOwnershipRange(out,&start,&end);
319:   PetscMalloc1(m,&rows);
320:   for (i=0; i<m; i++) rows[i] = start + i;

322:   MatCreate(comm,mat);
323:   MatSetSizes(*mat,m,n,M,N);
324:   if (size == 1) {
325:     MatSetType(*mat,MATSEQDENSE);
326:     MatSeqDenseSetPreallocation(*mat,NULL);
327:   } else {
328:     MatSetType(*mat,MATMPIAIJ);
329:     MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
330:   }

332:   for (i=0; i<N; i++) {

334:     VecSet(in,zero);
335:     VecSetValues(in,1,&i,&one,INSERT_VALUES);
336:     VecAssemblyBegin(in);
337:     VecAssemblyEnd(in);

339:     MatMult(inmat,in,out);

341:     VecGetArray(out,&array);
342:     MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
343:     VecRestoreArray(out,&array);

345:   }
346:   PetscFree(rows);
347:   VecDestroy(&out);
348:   VecDestroy(&in);
349:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
350:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
351:   return(0);
352: }

354: /*@
355:     MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
356:         a give matrix that can apply MatMultTranspose()

358:     Collective on Mat

360:     Input Parameter:
361: .   inmat - the matrix

363:     Output Parameter:
364: .   mat - the explict  operator transposed

366:     Notes:
367:     This computation is done by applying the transpose of the operator to columns of the
368:     identity matrix.

370:     Currently, this routine uses a dense matrix format when 1 processor
371:     is used and a sparse format otherwise.  This routine is costly in general,
372:     and is recommended for use only with relatively small systems.

374:     Level: advanced

376: .keywords: Mat, compute, explicit, operator
377: @*/
378: PetscErrorCode  MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
379: {
380:   Vec            in,out;
382:   PetscInt       i,m,n,M,N,*rows,start,end;
383:   MPI_Comm       comm;
384:   PetscScalar    *array,zero = 0.0,one = 1.0;
385:   PetscMPIInt    size;


391:   PetscObjectGetComm((PetscObject)inmat,&comm);
392:   MPI_Comm_size(comm,&size);

394:   MatGetLocalSize(inmat,&m,&n);
395:   MatGetSize(inmat,&M,&N);
396:   MatCreateVecs(inmat,&in,&out);
397:   VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
398:   VecGetOwnershipRange(out,&start,&end);
399:   PetscMalloc1(m,&rows);
400:   for (i=0; i<m; i++) rows[i] = start + i;

402:   MatCreate(comm,mat);
403:   MatSetSizes(*mat,m,n,M,N);
404:   if (size == 1) {
405:     MatSetType(*mat,MATSEQDENSE);
406:     MatSeqDenseSetPreallocation(*mat,NULL);
407:   } else {
408:     MatSetType(*mat,MATMPIAIJ);
409:     MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
410:   }

412:   for (i=0; i<N; i++) {

414:     VecSet(in,zero);
415:     VecSetValues(in,1,&i,&one,INSERT_VALUES);
416:     VecAssemblyBegin(in);
417:     VecAssemblyEnd(in);

419:     MatMultTranspose(inmat,in,out);

421:     VecGetArray(out,&array);
422:     MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
423:     VecRestoreArray(out,&array);

425:   }
426:   PetscFree(rows);
427:   VecDestroy(&out);
428:   VecDestroy(&in);
429:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
430:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
431:   return(0);
432: }

434: /*@
435:   MatChop - Set all values in the matrix less than the tolerance to zero

437:   Input Parameters:
438: + A   - The matrix
439: - tol - The zero tolerance

441:   Output Parameters:
442: . A - The chopped matrix

444:   Level: intermediate

446: .seealso: MatCreate(), MatZeroEntries()
447:  @*/
448: PetscErrorCode MatChop(Mat A, PetscReal tol)
449: {
450:   PetscScalar    *newVals;
451:   PetscInt       *newCols;
452:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

456:   MatGetOwnershipRange(A, &rStart, &rEnd);
457:   for (r = rStart; r < rEnd; ++r) {
458:     PetscInt ncols;

460:     MatGetRow(A, r, &ncols, NULL, NULL);
461:     colMax = PetscMax(colMax, ncols);
462:     MatRestoreRow(A, r, &ncols, NULL, NULL);
463:   }
464:   numRows = rEnd - rStart;
465:   MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
466:   PetscMalloc2(colMax,&newCols,colMax,&newVals);
467:   for (r = rStart; r < rStart+maxRows; ++r) {
468:     const PetscScalar *vals;
469:     const PetscInt    *cols;
470:     PetscInt           ncols, newcols, c;

472:     if (r < rEnd) {
473:       MatGetRow(A, r, &ncols, &cols, &vals);
474:       for (c = 0; c < ncols; ++c) {
475:         newCols[c] = cols[c];
476:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
477:       }
478:       newcols = ncols;
479:       MatRestoreRow(A, r, &ncols, &cols, &vals);
480:       MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
481:     }
482:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
483:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
484:   }
485:   PetscFree2(newCols,newVals);
486:   return(0);
487: }