Actual source code: axpy.c

petsc-3.14.4 2021-02-03
Report Typos and Errors

  2: #include <petsc/private/matimpl.h>

  4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
  5: {
  6:   PetscErrorCode ierr,(*f)(Mat,Mat*);
  7:   Mat            A,F;

 10:   PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
 11:   if (f) {
 12:     MatTransposeGetMat(T,&A);
 13:     if (T == X) {
 14:       PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 15:       MatTranspose(A,MAT_INITIAL_MATRIX,&F);
 16:       A = Y;
 17:     } else {
 18:       PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 19:       MatTranspose(X,MAT_INITIAL_MATRIX,&F);
 20:     }
 21:   } else {
 22:     MatHermitianTransposeGetMat(T,&A);
 23:     if (T == X) {
 24:       PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 25:       MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
 26:       A = Y;
 27:     } else {
 28:       PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 29:       MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
 30:     }
 31:   }
 32:   MatAXPY(A,a,F,str);
 33:   MatDestroy(&F);
 34:   return(0);
 35: }

 37: /*@
 38:    MatAXPY - Computes Y = a*X + Y.

 40:    Logically  Collective on Mat

 42:    Input Parameters:
 43: +  a - the scalar multiplier
 44: .  X - the first matrix
 45: .  Y - the second matrix
 46: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
 47:          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 49:    Notes: No operation is performed when a is zero.

 51:    Level: intermediate

 53: .seealso: MatAYPX()
 54:  @*/
 55: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 56: {
 58:   PetscInt       M1,M2,N1,N2;
 59:   PetscInt       m1,m2,n1,n2;
 60:   MatType        t1,t2;
 61:   PetscBool      sametype,transpose;

 68:   MatGetSize(X,&M1,&N1);
 69:   MatGetSize(Y,&M2,&N2);
 70:   MatGetLocalSize(X,&m1,&n1);
 71:   MatGetLocalSize(Y,&m2,&n2);
 72:   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
 73:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
 74:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
 75:   if (!X->assembled) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
 76:   if (a == (PetscScalar)0.0) return(0);
 77:   if (Y == X) {
 78:     MatScale(Y,1.0 + a);
 79:     return(0);
 80:   }
 81:   MatGetType(X,&t1);
 82:   MatGetType(Y,&t2);
 83:   PetscStrcmp(t1,t2,&sametype);
 84:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 85:   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
 86:     (*Y->ops->axpy)(Y,a,X,str);
 87:   } else {
 88:     PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
 89:     if (transpose) {
 90:       MatTransposeAXPY_Private(Y,a,X,str,X);
 91:     } else {
 92:       PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
 93:       if (transpose) {
 94:         MatTransposeAXPY_Private(Y,a,X,str,Y);
 95:       } else {
 96:         MatAXPY_Basic(Y,a,X,str);
 97:       }
 98:     }
 99:   }
100:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
101:   return(0);
102: }

104: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
105: {
107:   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;

110:   /* look for any available faster alternative to the general preallocator */
111:   PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
112:   if (preall) {
113:     (*preall)(Y,X,B);
114:   } else { /* Use MatPrellocator, assumes same row-col distribution */
115:     Mat      preallocator;
116:     PetscInt r,rstart,rend;
117:     PetscInt m,n,M,N;

119:     MatGetRowUpperTriangular(Y);
120:     MatGetRowUpperTriangular(X);
121:     MatGetSize(Y,&M,&N);
122:     MatGetLocalSize(Y,&m,&n);
123:     MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
124:     MatSetType(preallocator,MATPREALLOCATOR);
125:     MatSetSizes(preallocator,m,n,M,N);
126:     MatSetUp(preallocator);
127:     MatGetOwnershipRange(preallocator,&rstart,&rend);
128:     for (r = rstart; r < rend; ++r) {
129:       PetscInt          ncols;
130:       const PetscInt    *row;
131:       const PetscScalar *vals;

133:       MatGetRow(Y,r,&ncols,&row,&vals);
134:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
135:       MatRestoreRow(Y,r,&ncols,&row,&vals);
136:       MatGetRow(X,r,&ncols,&row,&vals);
137:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
138:       MatRestoreRow(X,r,&ncols,&row,&vals);
139:     }
140:     MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
141:     MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
142:     MatRestoreRowUpperTriangular(Y);
143:     MatRestoreRowUpperTriangular(X);

145:     MatCreate(PetscObjectComm((PetscObject)Y),B);
146:     MatSetType(*B,((PetscObject)Y)->type_name);
147:     MatSetSizes(*B,m,n,M,N);
148:     MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
149:     MatDestroy(&preallocator);
150:   }
151:   return(0);
152: }

154: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
155: {
157:   PetscBool      isshell,isdense;

160:   PetscObjectTypeCompare((PetscObject)Y,MATSHELL,&isshell);
161:   if (isshell) { /* MatShell has special support for AXPY */
162:     PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);

164:     MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f);
165:     if (f) {
166:       (*f)(Y,a,X,str);
167:       return(0);
168:     }
169:   }
170:   /* no need to preallocate if Y is dense */
171:   PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,"");
172:   if (isdense && str == DIFFERENT_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
173:   if (str != DIFFERENT_NONZERO_PATTERN) {
174:     PetscInt          i,start,end,j,ncols,m,n;
175:     const PetscInt    *row;
176:     PetscScalar       *val;
177:     const PetscScalar *vals;

179:     MatGetSize(X,&m,&n);
180:     MatGetOwnershipRange(X,&start,&end);
181:     MatGetRowUpperTriangular(X);
182:     if (a == 1.0) {
183:       for (i = start; i < end; i++) {
184:         MatGetRow(X,i,&ncols,&row,&vals);
185:         MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
186:         MatRestoreRow(X,i,&ncols,&row,&vals);
187:       }
188:     } else {
189:       PetscInt vs = 100;
190:       /* realloc if needed, as this function may be used in parallel */
191:       PetscMalloc1(vs,&val);
192:       for (i=start; i<end; i++) {
193:         MatGetRow(X,i,&ncols,&row,&vals);
194:         if (vs < ncols) {
195:           vs   = PetscMin(2*ncols,n);
196:           PetscRealloc(vs*sizeof(*val),&val);
197:         }
198:         for (j=0; j<ncols; j++) val[j] = a*vals[j];
199:         MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
200:         MatRestoreRow(X,i,&ncols,&row,&vals);
201:       }
202:       PetscFree(val);
203:     }
204:     MatRestoreRowUpperTriangular(X);
205:     MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
206:     MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
207:   } else {
208:     Mat B;

210:     MatAXPY_Basic_Preallocate(Y,X,&B);
211:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
212:     /* TODO mat_tests-ex37_nsize-1_mat_type-baij_mat_block_size-2 fails with MatHeaderMerge */
213:     MatHeaderReplace(Y,&B);
214:   }
215:   return(0);
216: }

218: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
219: {
220:   PetscInt          i,start,end,j,ncols,m,n;
221:   PetscErrorCode    ierr;
222:   const PetscInt    *row;
223:   PetscScalar       *val;
224:   const PetscScalar *vals;

227:   MatGetSize(X,&m,&n);
228:   MatGetOwnershipRange(X,&start,&end);
229:   MatGetRowUpperTriangular(Y);
230:   MatGetRowUpperTriangular(X);
231:   if (a == 1.0) {
232:     for (i = start; i < end; i++) {
233:       MatGetRow(Y,i,&ncols,&row,&vals);
234:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
235:       MatRestoreRow(Y,i,&ncols,&row,&vals);

237:       MatGetRow(X,i,&ncols,&row,&vals);
238:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
239:       MatRestoreRow(X,i,&ncols,&row,&vals);
240:     }
241:   } else {
242:     PetscInt vs = 100;
243:     /* realloc if needed, as this function may be used in parallel */
244:     PetscMalloc1(vs,&val);
245:     for (i=start; i<end; i++) {
246:       MatGetRow(Y,i,&ncols,&row,&vals);
247:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
248:       MatRestoreRow(Y,i,&ncols,&row,&vals);

250:       MatGetRow(X,i,&ncols,&row,&vals);
251:       if (vs < ncols) {
252:         vs   = PetscMin(2*ncols,n);
253:         PetscRealloc(vs*sizeof(*val),&val);
254:       }
255:       for (j=0; j<ncols; j++) val[j] = a*vals[j];
256:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
257:       MatRestoreRow(X,i,&ncols,&row,&vals);
258:     }
259:     PetscFree(val);
260:   }
261:   MatRestoreRowUpperTriangular(Y);
262:   MatRestoreRowUpperTriangular(X);
263:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
264:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
265:   return(0);
266: }

268: /*@
269:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

271:    Neighbor-wise Collective on Mat

273:    Input Parameters:
274: +  Y - the matrices
275: -  a - the PetscScalar

277:    Level: intermediate

279:    Notes:
280:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
281:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
282:    entry). No operation is performed when a is zero.

284:    To form Y = Y + diag(V) use MatDiagonalSet()

286: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
287:  @*/
288: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
289: {

294:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
295:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
296:   MatCheckPreallocated(Y,1);
297:   if (a == 0.0) return(0);

299:   if (Y->ops->shift) {
300:     (*Y->ops->shift)(Y,a);
301:   } else {
302:     MatShift_Basic(Y,a);
303:   }

305:   PetscObjectStateIncrease((PetscObject)Y);
306:   return(0);
307: }

309: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
310: {
312:   PetscInt       i,start,end;
313:   PetscScalar    *v;

316:   MatGetOwnershipRange(Y,&start,&end);
317:   VecGetArray(D,&v);
318:   for (i=start; i<end; i++) {
319:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
320:   }
321:   VecRestoreArray(D,&v);
322:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
323:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
324:   return(0);
325: }

327: /*@
328:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
329:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
330:    INSERT_VALUES.

332:    Neighbor-wise Collective on Mat

334:    Input Parameters:
335: +  Y - the input matrix
336: .  D - the diagonal matrix, represented as a vector
337: -  i - INSERT_VALUES or ADD_VALUES

339:    Notes:
340:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
341:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
342:    entry).

344:    Level: intermediate

346: .seealso: MatShift(), MatScale(), MatDiagonalScale()
347: @*/
348: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
349: {
351:   PetscInt       matlocal,veclocal;

356:   MatGetLocalSize(Y,&matlocal,NULL);
357:   VecGetLocalSize(D,&veclocal);
358:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
359:   if (Y->ops->diagonalset) {
360:     (*Y->ops->diagonalset)(Y,D,is);
361:   } else {
362:     MatDiagonalSet_Default(Y,D,is);
363:   }
364:   PetscObjectStateIncrease((PetscObject)Y);
365:   return(0);
366: }

368: /*@
369:    MatAYPX - Computes Y = a*Y + X.

371:    Logically on Mat

373:    Input Parameters:
374: +  a - the PetscScalar multiplier
375: .  Y - the first matrix
376: .  X - the second matrix
377: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN

379:    Level: intermediate

381: .seealso: MatAXPY()
382:  @*/
383: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
384: {
385:   PetscScalar    one = 1.0;
387:   PetscInt       mX,mY,nX,nY;

393:   MatGetSize(X,&mX,&nX);
394:   MatGetSize(X,&mY,&nY);
395:   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
396:   MatScale(Y,a);
397:   MatAXPY(Y,one,X,str);
398:   return(0);
399: }

401: /*@
402:     MatComputeOperator - Computes the explicit matrix

404:     Collective on Mat

406:     Input Parameter:
407: +   inmat - the matrix
408: -   mattype - the matrix type for the explicit operator

410:     Output Parameter:
411: .   mat - the explict  operator

413:     Notes:
414:     This computation is done by applying the operators to columns of the identity matrix.
415:     This routine is costly in general, and is recommended for use only with relatively small systems.
416:     Currently, this routine uses a dense matrix format if mattype == NULL.

418:     Level: advanced

420: @*/
421: PetscErrorCode  MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
422: {

428:   MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
429:   return(0);
430: }

432: /*@
433:     MatComputeOperatorTranspose - Computes the explicit matrix representation of
434:         a give matrix that can apply MatMultTranspose()

436:     Collective on Mat

438:     Input Parameter:
439: .   inmat - the matrix

441:     Output Parameter:
442: .   mat - the explict  operator transposed

444:     Notes:
445:     This computation is done by applying the transpose of the operator to columns of the identity matrix.
446:     This routine is costly in general, and is recommended for use only with relatively small systems.
447:     Currently, this routine uses a dense matrix format if mattype == NULL.

449:     Level: advanced

451: @*/
452: PetscErrorCode  MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
453: {
454:   Mat            A;

460:   MatCreateTranspose(inmat,&A);
461:   MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
462:   MatDestroy(&A);
463:   return(0);
464: }

466: /*@
467:   MatChop - Set all values in the matrix less than the tolerance to zero

469:   Input Parameters:
470: + A   - The matrix
471: - tol - The zero tolerance

473:   Output Parameters:
474: . A - The chopped matrix

476:   Level: intermediate

478: .seealso: MatCreate(), MatZeroEntries()
479:  @*/
480: PetscErrorCode MatChop(Mat A, PetscReal tol)
481: {
482:   PetscScalar    *newVals;
483:   PetscInt       *newCols;
484:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

488:   MatGetOwnershipRange(A, &rStart, &rEnd);
489:   MatGetRowUpperTriangular(A);
490:   for (r = rStart; r < rEnd; ++r) {
491:     PetscInt ncols;

493:     MatGetRow(A, r, &ncols, NULL, NULL);
494:     colMax = PetscMax(colMax, ncols);
495:     MatRestoreRow(A, r, &ncols, NULL, NULL);
496:   }
497:   numRows = rEnd - rStart;
498:   MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
499:   PetscMalloc2(colMax,&newCols,colMax,&newVals);
500:   for (r = rStart; r < rStart+maxRows; ++r) {
501:     const PetscScalar *vals;
502:     const PetscInt    *cols;
503:     PetscInt           ncols, newcols, c;

505:     if (r < rEnd) {
506:       MatGetRow(A, r, &ncols, &cols, &vals);
507:       for (c = 0; c < ncols; ++c) {
508:         newCols[c] = cols[c];
509:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
510:       }
511:       newcols = ncols;
512:       MatRestoreRow(A, r, &ncols, &cols, &vals);
513:       MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
514:     }
515:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
516:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
517:   }
518:   MatRestoreRowUpperTriangular(A);
519:   PetscFree2(newCols,newVals);
520:   return(0);
521: }