Actual source code: axpy.c

petsc-3.11.1 2019-04-17
Report Typos and Errors

  2:  #include <petsc/private/matimpl.h>

  4: /*@
  5:    MatAXPY - Computes Y = a*X + Y.

  7:    Logically  Collective on Mat

  9:    Input Parameters:
 10: +  a - the scalar multiplier
 11: .  X - the first matrix
 12: .  Y - the second matrix
 13: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
 14:          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 16:    Level: intermediate

 18: .keywords: matrix, add

 20: .seealso: MatAYPX()
 21:  @*/
 22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 23: {
 25:   PetscInt       M1,M2,N1,N2;
 26:   PetscInt       m1,m2,n1,n2;
 27:   PetscBool      sametype;

 34:   MatGetSize(X,&M1,&N1);
 35:   MatGetSize(Y,&M2,&N2);
 36:   MatGetLocalSize(X,&m1,&n1);
 37:   MatGetLocalSize(Y,&m2,&n2);
 38:   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
 39:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);

 41:   PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
 42:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 43:   if (Y->ops->axpy && sametype) {
 44:     (*Y->ops->axpy)(Y,a,X,str);
 45:   } else {
 46:     if (str != DIFFERENT_NONZERO_PATTERN) {
 47:       MatAXPY_Basic(Y,a,X,str);
 48:     } else {
 49:       Mat B;

 51:       MatAXPY_Basic_Preallocate(Y,X,&B);
 52:       MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
 53:       MatHeaderReplace(Y,&B);
 54:     }
 55:   }
 56:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
 57: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
 58:   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
 59:     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
 60:   }
 61: #endif
 62:   return(0);
 63: }

 65: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
 66: {
 68:   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;

 71:   /* look for any available faster alternative to the general preallocator */
 72:   PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
 73:   if (preall) {
 74:     (*preall)(Y,X,B);
 75:   } else { /* Use MatPrellocator, assumes same row-col distribution */
 76:     Mat      preallocator;
 77:     PetscInt r,rstart,rend;
 78:     PetscInt m,n,M,N;

 80:     MatGetSize(Y,&M,&N);
 81:     MatGetLocalSize(Y,&m,&n);
 82:     MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
 83:     MatSetType(preallocator,MATPREALLOCATOR);
 84:     MatSetSizes(preallocator,m,n,M,N);
 85:     MatSetUp(preallocator);
 86:     MatGetOwnershipRange(preallocator,&rstart,&rend);
 87:     for (r = rstart; r < rend; ++r) {
 88:       PetscInt          ncols;
 89:       const PetscInt    *row;
 90:       const PetscScalar *vals;

 92:       MatGetRow(Y,r,&ncols,&row,&vals);
 93:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
 94:       MatRestoreRow(Y,r,&ncols,&row,&vals);
 95:       MatGetRow(X,r,&ncols,&row,&vals);
 96:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
 97:       MatRestoreRow(X,r,&ncols,&row,&vals);
 98:     }
 99:     MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
100:     MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);

102:     MatCreate(PetscObjectComm((PetscObject)Y),B);
103:     MatSetType(*B,((PetscObject)Y)->type_name);
104:     MatSetSizes(*B,m,n,M,N);
105:     MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
106:     MatDestroy(&preallocator);
107:   }
108:   return(0);
109: }

111: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
112: {
113:   PetscInt          i,start,end,j,ncols,m,n;
114:   PetscErrorCode    ierr;
115:   const PetscInt    *row;
116:   PetscScalar       *val;
117:   const PetscScalar *vals;

120:   MatGetSize(X,&m,&n);
121:   MatGetOwnershipRange(X,&start,&end);
122:   if (a == 1.0) {
123:     for (i = start; i < end; i++) {
124:       MatGetRow(X,i,&ncols,&row,&vals);
125:       MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
126:       MatRestoreRow(X,i,&ncols,&row,&vals);
127:     }
128:   } else {
129:     PetscInt vs = 100;
130:     /* realloc if needed, as this function may be used in parallel */
131:     PetscMalloc1(vs,&val);
132:     for (i=start; i<end; i++) {
133:       MatGetRow(X,i,&ncols,&row,&vals);
134:       if (vs < ncols) {
135:         vs   = PetscMin(2*ncols,n);
136:         PetscRealloc(vs*sizeof(*val),&val);
137:       }
138:       for (j=0; j<ncols; j++) val[j] = a*vals[j];
139:       MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
140:       MatRestoreRow(X,i,&ncols,&row,&vals);
141:     }
142:     PetscFree(val);
143:   }
144:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
145:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
146:   return(0);
147: }

149: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
150: {
151:   PetscInt          i,start,end,j,ncols,m,n;
152:   PetscErrorCode    ierr;
153:   const PetscInt    *row;
154:   PetscScalar       *val;
155:   const PetscScalar *vals;

158:   MatGetSize(X,&m,&n);
159:   MatGetOwnershipRange(X,&start,&end);
160:   if (a == 1.0) {
161:     for (i = start; i < end; i++) {
162:       MatGetRow(Y,i,&ncols,&row,&vals);
163:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
164:       MatRestoreRow(Y,i,&ncols,&row,&vals);

166:       MatGetRow(X,i,&ncols,&row,&vals);
167:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
168:       MatRestoreRow(X,i,&ncols,&row,&vals);
169:     }
170:   } else {
171:     PetscInt vs = 100;
172:     /* realloc if needed, as this function may be used in parallel */
173:     PetscMalloc1(vs,&val);
174:     for (i=start; i<end; i++) {
175:       MatGetRow(Y,i,&ncols,&row,&vals);
176:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
177:       MatRestoreRow(Y,i,&ncols,&row,&vals);

179:       MatGetRow(X,i,&ncols,&row,&vals);
180:       if (vs < ncols) {
181:         vs   = PetscMin(2*ncols,n);
182:         PetscRealloc(vs*sizeof(*val),&val);
183:       }
184:       for (j=0; j<ncols; j++) val[j] = a*vals[j];
185:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
186:       MatRestoreRow(X,i,&ncols,&row,&vals);
187:     }
188:     PetscFree(val);
189:   }
190:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
191:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
192:   return(0);
193: }

195: /*@
196:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

198:    Neighbor-wise Collective on Mat

200:    Input Parameters:
201: +  Y - the matrices
202: -  a - the PetscScalar

204:    Level: intermediate

206:    Notes:
207:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
208:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
209:    entry).

211:    To form Y = Y + diag(V) use MatDiagonalSet()

213:    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
214:     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.

216: .keywords: matrix, add, shift

218: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
219:  @*/
220: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
221: {

226:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
227:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
228:   MatCheckPreallocated(Y,1);

230:   if (Y->ops->shift) {
231:     (*Y->ops->shift)(Y,a);
232:   } else {
233:     MatShift_Basic(Y,a);
234:   }

236:   PetscObjectStateIncrease((PetscObject)Y);
237: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
238:   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
239:     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
240:   }
241: #endif
242:   return(0);
243: }

245: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
246: {
248:   PetscInt       i,start,end;
249:   PetscScalar    *v;

252:   MatGetOwnershipRange(Y,&start,&end);
253:   VecGetArray(D,&v);
254:   for (i=start; i<end; i++) {
255:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
256:   }
257:   VecRestoreArray(D,&v);
258:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
259:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
260:   return(0);
261: }

263: /*@
264:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
265:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
266:    INSERT_VALUES.

268:    Input Parameters:
269: +  Y - the input matrix
270: .  D - the diagonal matrix, represented as a vector
271: -  i - INSERT_VALUES or ADD_VALUES

273:    Neighbor-wise Collective on Mat and Vec

275:    Notes:
276:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
277:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
278:    entry).

280:    Level: intermediate

282: .keywords: matrix, add, shift, diagonal

284: .seealso: MatShift(), MatScale(), MatDiagonalScale()
285: @*/
286: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
287: {
289:   PetscInt       matlocal,veclocal;

294:   MatGetLocalSize(Y,&matlocal,NULL);
295:   VecGetLocalSize(D,&veclocal);
296:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
297:   if (Y->ops->diagonalset) {
298:     (*Y->ops->diagonalset)(Y,D,is);
299:   } else {
300:     MatDiagonalSet_Default(Y,D,is);
301:   }
302:   PetscObjectStateIncrease((PetscObject)Y);
303:   return(0);
304: }

306: /*@
307:    MatAYPX - Computes Y = a*Y + X.

309:    Logically on Mat

311:    Input Parameters:
312: +  a - the PetscScalar multiplier
313: .  Y - the first matrix
314: .  X - the second matrix
315: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN

317:    Level: intermediate

319: .keywords: matrix, add

321: .seealso: MatAXPY()
322:  @*/
323: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
324: {
325:   PetscScalar    one = 1.0;
327:   PetscInt       mX,mY,nX,nY;

333:   MatGetSize(X,&mX,&nX);
334:   MatGetSize(X,&mY,&nY);
335:   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);

337:   MatScale(Y,a);
338:   MatAXPY(Y,one,X,str);
339:   return(0);
340: }

342: /*@
343:     MatComputeExplicitOperator - Computes the explicit matrix

345:     Collective on Mat

347:     Input Parameter:
348: .   inmat - the matrix

350:     Output Parameter:
351: .   mat - the explict  operator

353:     Notes:
354:     This computation is done by applying the operators to columns of the
355:     identity matrix.

357:     Currently, this routine uses a dense matrix format when 1 processor
358:     is used and a sparse format otherwise.  This routine is costly in general,
359:     and is recommended for use only with relatively small systems.

361:     Level: advanced

363: .keywords: Mat, compute, explicit, operator
364: @*/
365: PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
366: {
368:   MPI_Comm       comm;
369:   PetscMPIInt    size;


375:   PetscObjectGetComm((PetscObject)inmat,&comm);
376:   MPI_Comm_size(comm,&size);
377:   MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);
378:   return(0);
379: }

381: /*@
382:     MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
383:         a give matrix that can apply MatMultTranspose()

385:     Collective on Mat

387:     Input Parameter:
388: .   inmat - the matrix

390:     Output Parameter:
391: .   mat - the explict  operator transposed

393:     Notes:
394:     This computation is done by applying the transpose of the operator to columns of the
395:     identity matrix.

397:     Currently, this routine uses a dense matrix format when 1 processor
398:     is used and a sparse format otherwise.  This routine is costly in general,
399:     and is recommended for use only with relatively small systems.

401:     Level: advanced

403: .keywords: Mat, compute, explicit, operator
404: @*/
405: PetscErrorCode  MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
406: {
407:   Vec            in,out;
409:   PetscInt       i,m,n,M,N,*rows,start,end;
410:   MPI_Comm       comm;
411:   PetscScalar    *array,zero = 0.0,one = 1.0;
412:   PetscMPIInt    size;


418:   PetscObjectGetComm((PetscObject)inmat,&comm);
419:   MPI_Comm_size(comm,&size);

421:   MatGetLocalSize(inmat,&m,&n);
422:   MatGetSize(inmat,&M,&N);
423:   MatCreateVecs(inmat,&in,&out);
424:   VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
425:   VecGetOwnershipRange(out,&start,&end);
426:   PetscMalloc1(m,&rows);
427:   for (i=0; i<m; i++) rows[i] = start + i;

429:   MatCreate(comm,mat);
430:   MatSetSizes(*mat,m,n,M,N);
431:   if (size == 1) {
432:     MatSetType(*mat,MATSEQDENSE);
433:     MatSeqDenseSetPreallocation(*mat,NULL);
434:   } else {
435:     MatSetType(*mat,MATMPIAIJ);
436:     MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
437:   }

439:   for (i=0; i<N; i++) {

441:     VecSet(in,zero);
442:     VecSetValues(in,1,&i,&one,INSERT_VALUES);
443:     VecAssemblyBegin(in);
444:     VecAssemblyEnd(in);

446:     MatMultTranspose(inmat,in,out);

448:     VecGetArray(out,&array);
449:     MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
450:     VecRestoreArray(out,&array);

452:   }
453:   PetscFree(rows);
454:   VecDestroy(&out);
455:   VecDestroy(&in);
456:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
457:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
458:   return(0);
459: }

461: /*@
462:   MatChop - Set all values in the matrix less than the tolerance to zero

464:   Input Parameters:
465: + A   - The matrix
466: - tol - The zero tolerance

468:   Output Parameters:
469: . A - The chopped matrix

471:   Level: intermediate

473: .seealso: MatCreate(), MatZeroEntries()
474:  @*/
475: PetscErrorCode MatChop(Mat A, PetscReal tol)
476: {
477:   PetscScalar    *newVals;
478:   PetscInt       *newCols;
479:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

483:   MatGetOwnershipRange(A, &rStart, &rEnd);
484:   for (r = rStart; r < rEnd; ++r) {
485:     PetscInt ncols;

487:     MatGetRow(A, r, &ncols, NULL, NULL);
488:     colMax = PetscMax(colMax, ncols);
489:     MatRestoreRow(A, r, &ncols, NULL, NULL);
490:   }
491:   numRows = rEnd - rStart;
492:   MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
493:   PetscMalloc2(colMax,&newCols,colMax,&newVals);
494:   for (r = rStart; r < rStart+maxRows; ++r) {
495:     const PetscScalar *vals;
496:     const PetscInt    *cols;
497:     PetscInt           ncols, newcols, c;

499:     if (r < rEnd) {
500:       MatGetRow(A, r, &ncols, &cols, &vals);
501:       for (c = 0; c < ncols; ++c) {
502:         newCols[c] = cols[c];
503:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
504:       }
505:       newcols = ncols;
506:       MatRestoreRow(A, r, &ncols, &cols, &vals);
507:       MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
508:     }
509:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
510:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
511:   }
512:   PetscFree2(newCols,newVals);
513:   return(0);
514: }