#include <../src/mat/impls/baij/seq/baij.h> #include <../src/mat/impls/dense/seq/dense.h> #include #include #include #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) #include #endif PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscErrorCode ierr; PetscInt row,i,j,k,l,m,n,*nidx,isz,val,ival; const PetscInt *idx; PetscInt start,end,*ai,*aj,bs,*nidx2; PetscBT table; PetscFunctionBegin; m = a->mbs; ai = a->i; aj = a->j; bs = A->rmap->bs; if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); ierr = PetscBTCreate(m,&table);CHKERRQ(ierr); ierr = PetscMalloc1(m+1,&nidx);CHKERRQ(ierr); ierr = PetscMalloc1(A->rmap->N+1,&nidx2);CHKERRQ(ierr); for (i=0; i=m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); if (!PetscBTLookupSet(table,ival)) nidx[isz++] = ival; } ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); ierr = ISDestroy(&is[i]);CHKERRQ(ierr); k = 0; for (j=0; jdata,*c; PetscErrorCode ierr; PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; const PetscInt *irow,*icol; PetscInt nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2; PetscInt *aj = a->j,*ai = a->i; MatScalar *mat_a; Mat C; PetscBool flag; PetscFunctionBegin; ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); ierr = PetscCalloc1(1+oldcols,&smap);CHKERRQ(ierr); ssmap = smap; ierr = PetscMalloc1(1+nrows,&lens);CHKERRQ(ierr); for (i=0; iilen[irow[i]]; lens[i] = 0; for (k=kstart; kdata); if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); ierr = PetscArraycmp(c->ilen,lens,c->mbs,&flag);CHKERRQ(ierr); if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); ierr = PetscArrayzero(c->ilen,c->mbs);CHKERRQ(ierr); C = *B; } else { ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr); } c = (Mat_SeqBAIJ*)(C->data); for (i=0; iilen[row]; mat_i = c->i[i]; mat_j = c->j + mat_i; mat_a = c->a + mat_i*bs2; mat_ilen = c->ilen + i; for (k=kstart; kj[k]])) { *mat_j++ = tcol - 1; ierr = PetscArraycpy(mat_a,a->a+k*bs2,bs2);CHKERRQ(ierr); mat_a += bs2; (*mat_ilen)++; } } } /* sort */ { MatScalar *work; ierr = PetscMalloc1(bs2,&work);CHKERRQ(ierr); for (i=0; ii[i]; mat_j = c->j + mat_i; mat_a = c->a + mat_i*bs2; ilen = c->ilen[i]; ierr = PetscSortIntWithDataArray(ilen,mat_j,mat_a,bs2*sizeof(MatScalar),work);CHKERRQ(ierr); } ierr = PetscFree(work);CHKERRQ(ierr); } /* Free work space */ ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); ierr = PetscFree(smap);CHKERRQ(ierr); ierr = PetscFree(lens);CHKERRQ(ierr); ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); *B = C; PetscFunctionReturn(0); } PetscErrorCode MatCreateSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; IS is1,is2; PetscErrorCode ierr; PetscInt *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count,maxmnbs,j; const PetscInt *irow,*icol; PetscFunctionBegin; ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); /* Verify if the indices corespond to each element in a block and form the IS with compressed IS */ maxmnbs = PetscMax(a->mbs,a->nbs); ierr = PetscMalloc2(maxmnbs,&vary,maxmnbs,&iary);CHKERRQ(ierr); ierr = PetscArrayzero(vary,a->mbs);CHKERRQ(ierr); for (i=0; imbs; i++) { if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); } count = 0; for (i=0; inbs);CHKERRQ(ierr); for (i=0; inbs; i++) { if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal error in PETSc"); } count = 0; for (i=0; idata; Mat_SubSppt *submatj = c->submatis1; PetscFunctionBegin; ierr = (*submatj->destroy)(C);CHKERRQ(ierr); ierr = MatDestroySubMatrix_Private(submatj);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatDestroySubMatrices_SeqBAIJ(PetscInt n,Mat *mat[]) { PetscErrorCode ierr; PetscInt i; Mat C; Mat_SeqBAIJ *c; Mat_SubSppt *submatj; PetscFunctionBegin; for (i=0; idata; submatj = c->submatis1; if (submatj) { ierr = (*submatj->destroy)(C);CHKERRQ(ierr); ierr = MatDestroySubMatrix_Private(submatj);CHKERRQ(ierr); ierr = PetscFree(C->defaultvectype);CHKERRQ(ierr); ierr = PetscLayoutDestroy(&C->rmap);CHKERRQ(ierr); ierr = PetscLayoutDestroy(&C->cmap);CHKERRQ(ierr); ierr = PetscHeaderDestroy(&C);CHKERRQ(ierr); } else { ierr = MatDestroy(&C);CHKERRQ(ierr); } } ierr = PetscFree(*mat);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatCreateSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) { PetscErrorCode ierr; PetscInt i; PetscFunctionBegin; if (scall == MAT_INITIAL_MATRIX) { ierr = PetscCalloc1(n+1,B);CHKERRQ(ierr); } for (i=0; idata; PetscScalar *z,sum; const PetscScalar *x; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,n; const PetscInt *idx,*ii,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(z,a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; } for (i=0; ia + ii[0]; idx = a->j + ii[0]; ii++; PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+1*n,1*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ sum = 0.0; PetscSparseDensePlusDot(sum,x,v,idx,n); if (usecprow) { z[ridx[i]] = sum; } else { z[i] = sum; } } ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,*zarray; const PetscScalar *x,*xb; PetscScalar x1,x2; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,2*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 2.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; #if defined(PETSC_HAVE_PRAGMA_DISJOINT) #pragma disjoint(*v,*z,*xb) #endif PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,3*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 3.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,4*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 4.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray; const PetscScalar *xb,*x; const MatScalar *v; PetscErrorCode ierr; const PetscInt *idx,*ii,*ridx=NULL; PetscInt mbs,i,j,n; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,5*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 5.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6; const PetscScalar *x,*xb; PetscScalar x1,x2,x3,x4,x5,x6,*zarray; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,6*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 6.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; const PetscScalar *x,*xb; PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,7*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 7.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) PetscErrorCode MatMult_SeqBAIJ_9_AVX2(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,*work,*workt,*zarray; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; const PetscInt *idx,*ii,*ridx=NULL; PetscInt k; PetscBool usecprow=a->compressedrow.use; __m256d a0,a1,a2,a3,a4,a5; __m256d w0,w1,w2,w3; __m256d z0,z1,z2; __m256i mask1 = _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL<<63); PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,bs*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } if (!a->mult_work) { k = PetscMax(A->rmap->n,A->cmap->n); ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); } work = a->mult_work; for (i=0; inz*bs2 - bs*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } #endif PetscErrorCode MatMult_SeqBAIJ_11(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11; const PetscScalar *x,*xb; PetscScalar *zarray,xv; const MatScalar *v; PetscErrorCode ierr; const PetscInt *ii,*ij=a->j,*idx; PetscInt mbs,i,j,k,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,11*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 11.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } /* MatMult_SeqBAIJ_15 version 1: Columns in the block are accessed one at a time */ /* Default MatMult for block size 15 */ PetscErrorCode MatMult_SeqBAIJ_15_ver1(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15; const PetscScalar *x,*xb; PetscScalar *zarray,xv; const MatScalar *v; PetscErrorCode ierr; const PetscInt *ii,*ij=a->j,*idx; PetscInt mbs,i,j,k,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,15*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } /* MatMult_SeqBAIJ_15_ver2 : Columns in the block are accessed in sets of 4,4,4,3 */ PetscErrorCode MatMult_SeqBAIJ_15_ver2(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15; const PetscScalar *x,*xb; PetscScalar x1,x2,x3,x4,*zarray; const MatScalar *v; PetscErrorCode ierr; const PetscInt *ii,*ij=a->j,*idx; PetscInt mbs,i,j,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,15*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } /* MatMult_SeqBAIJ_15_ver3 : Columns in the block are accessed in sets of 8,7 */ PetscErrorCode MatMult_SeqBAIJ_15_ver3(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15; const PetscScalar *x,*xb; PetscScalar x1,x2,x3,x4,x5,x6,x7,x8,*zarray; const MatScalar *v; PetscErrorCode ierr; const PetscInt *ii,*ij=a->j,*idx; PetscInt mbs,i,j,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,15*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } /* MatMult_SeqBAIJ_15_ver4 : All columns in the block are accessed at once */ PetscErrorCode MatMult_SeqBAIJ_15_ver4(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15; const PetscScalar *x,*xb; PetscScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,*zarray; const MatScalar *v; PetscErrorCode ierr; const PetscInt *ii,*ij=a->j,*idx; PetscInt mbs,i,j,n,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,15*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } for (i=0; inz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } /* This will not work with MatScalar == float because it calls the BLAS */ PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,*work,*workt,*zarray; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; const PetscInt *idx,*ii,*ridx=NULL; PetscInt ncols,k; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; ierr = PetscArrayzero(zarray,bs*a->mbs);CHKERRQ(ierr); } else { mbs = a->mbs; ii = a->i; z = zarray; } if (!a->mult_work) { k = PetscMax(A->rmap->n,A->cmap->n); ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); } work = a->mult_work; for (i=0; inz*bs2 - bs*a->nonzerorowcnt);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; const PetscScalar *x; PetscScalar *y,*z,sum; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs=a->mbs,i,n,*ridx=NULL; const PetscInt *idx,*ii; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { if (zz != yy) { ierr = PetscArraycpy(z,y,mbs);CHKERRQ(ierr); } mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { ii = a->i; } for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *y = 0,*z = 0,sum1,sum2; const PetscScalar *x,*xb; PetscScalar x1,x2,*yarray,*zarray; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,j; const PetscInt *idx,*ii,*ridx = NULL; PetscBool usecprow = a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { if (zz != yy) { ierr = PetscArraycpy(zarray,yarray,2*mbs);CHKERRQ(ierr); } mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { ii = a->i; y = yarray; z = zarray; } for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,j,n; const PetscInt *idx,*ii,*ridx = NULL; PetscBool usecprow = a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { if (zz != yy) { ierr = PetscArraycpy(zarray,yarray,3*mbs);CHKERRQ(ierr); } mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { ii = a->i; y = yarray; z = zarray; } for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,j,n; const PetscInt *idx,*ii,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { if (zz != yy) { ierr = PetscArraycpy(zarray,yarray,4*mbs);CHKERRQ(ierr); } mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { ii = a->i; y = yarray; z = zarray; } for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; const PetscScalar *x,*xb; PetscScalar *yarray,*zarray; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,j,n; const PetscInt *idx,*ii,*ridx = NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { if (zz != yy) { ierr = PetscArraycpy(zarray,yarray,5*mbs);CHKERRQ(ierr); } mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { ii = a->i; y = yarray; z = zarray; } for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,sum6; const PetscScalar *x,*xb; PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,j,n; const PetscInt *idx,*ii,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { if (zz != yy) { ierr = PetscArraycpy(zarray,yarray,6*mbs);CHKERRQ(ierr); } mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { ii = a->i; y = yarray; z = zarray; } for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; const PetscScalar *x,*xb; PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,j,n; const PetscInt *idx,*ii,*ridx = NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { if (zz != yy) { ierr = PetscArraycpy(zarray,yarray,7*mbs);CHKERRQ(ierr); } mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { ii = a->i; y = yarray; z = zarray; } for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) PetscErrorCode MatMultAdd_SeqBAIJ_9_AVX2(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,*work,*workt,*zarray; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; PetscInt k; PetscBool usecprow=a->compressedrow.use; const PetscInt *idx,*ii,*ridx=NULL; __m256d a0,a1,a2,a3,a4,a5; __m256d w0,w1,w2,w3; __m256d z0,z1,z2; __m256i mask1 = _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL<<63); PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { mbs = a->mbs; ii = a->i; z = zarray; } if (!a->mult_work) { k = PetscMax(A->rmap->n,A->cmap->n); ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); } work = a->mult_work; for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } #endif PetscErrorCode MatMultAdd_SeqBAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11; const PetscScalar *x,*xb; PetscScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,*yarray,*zarray; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,j,n; const PetscInt *idx,*ii,*ridx = NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { if (zz != yy) { ierr = PetscArraycpy(zarray,yarray,7*mbs);CHKERRQ(ierr); } mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { ii = a->i; y = yarray; z = zarray; } for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,*work,*workt,*zarray; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; PetscInt ncols,k; const PetscInt *ridx = NULL,*idx,*ii; PetscBool usecprow = a->compressedrow.use; PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { mbs = a->mbs; ii = a->i; z = zarray; } if (!a->mult_work) { k = PetscMax(A->rmap->n,A->cmap->n); ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); } work = a->mult_work; for (i=0; inz*bs2);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultHermitianTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) { PetscScalar zero = 0.0; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = MatMultHermitianTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) { PetscScalar zero = 0.0; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultHermitianTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5; const PetscScalar *x,*xb = NULL; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,rval,bs=A->rmap->bs,j,n; const PetscInt *idx,*ii,*ib,*ridx = NULL; Mat_CompressedRow cprow = a->compressedrow; PetscBool usecprow = cprow.use; PetscFunctionBegin; if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); } ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = cprow.nrows; ii = cprow.i; ridx = cprow.rindex; } else { mbs=a->mbs; ii = a->i; xb = x; } switch (bs) { case 1: for (i=0; ibs2; PetscScalar *work,*workt,zb; const PetscScalar *xtmp; if (!a->mult_work) { k = PetscMax(A->rmap->n,A->cmap->n); ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); } work = a->mult_work; xtmp = x; for (i=0; inz*a->bs2);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *zb,*z,x1,x2,x3,x4,x5; const PetscScalar *x,*xb = 0; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs,i,rval,bs=A->rmap->bs,j,n,bs2=a->bs2; const PetscInt *idx,*ii,*ib,*ridx = NULL; Mat_CompressedRow cprow = a->compressedrow; PetscBool usecprow=cprow.use; PetscFunctionBegin; if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); } ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = cprow.nrows; ii = cprow.i; ridx = cprow.rindex; } else { mbs=a->mbs; ii = a->i; xb = x; } switch (bs) { case 1: for (i=0; imult_work) { k = PetscMax(A->rmap->n,A->cmap->n); ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); } work = a->mult_work; xtmp = x; for (i=0; inz*a->bs2);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; PetscInt totalnz = a->bs2*a->nz; PetscScalar oalpha = alpha; PetscErrorCode ierr; PetscBLASInt one = 1,tnz; PetscFunctionBegin; ierr = PetscBLASIntCast(totalnz,&tnz);CHKERRQ(ierr); PetscStackCallBLAS("BLASscal",BLASscal_(&tnz,&oalpha,a->a,&one)); ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) { PetscErrorCode ierr; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; MatScalar *v = a->a; PetscReal sum = 0.0; PetscInt i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1; PetscFunctionBegin; if (type == NORM_FROBENIUS) { #if defined(PETSC_USE_REAL___FP16) PetscBLASInt one = 1,cnt = bs2*nz; *norm = BLASnrm2_(&cnt,v,&one); #else for (i=0; ij; ierr = PetscCalloc1(A->cmap->n+1,&tmp);CHKERRQ(ierr); for (i=0; icmap->n; j++) { if (tmp[j] > *norm) *norm = tmp[j]; } ierr = PetscFree(tmp);CHKERRQ(ierr); ierr = PetscLogFlops(PetscMax(bs2*nz-1,0));CHKERRQ(ierr); } else if (type == NORM_INFINITY) { /* maximum row sum */ *norm = 0.0; for (k=0; kmbs; j++) { v = a->a + bs2*a->i[j] + k; sum = 0.0; for (i=0; ii[j+1]-a->i[j]; i++) { for (k1=0; k1 *norm) *norm = sum; } } ierr = PetscLogFlops(PetscMax(bs2*nz-1,0));CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); PetscFunctionReturn(0); } PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscBool * flg) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; PetscErrorCode ierr; PetscFunctionBegin; /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } /* if the a->i are the same */ ierr = PetscArraycmp(a->i,b->i,a->mbs+1,flg);CHKERRQ(ierr); if (!*flg) PetscFunctionReturn(0); /* if a->j are the same */ ierr = PetscArraycmp(a->j,b->j,a->nz,flg);CHKERRQ(ierr); if (!*flg) PetscFunctionReturn(0); /* if a->a are the same */ ierr = PetscArraycmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs),flg);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscErrorCode ierr; PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; PetscScalar *x,zero = 0.0; MatScalar *aa,*aa_j; PetscFunctionBegin; if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); bs = A->rmap->bs; aa = a->a; ai = a->i; aj = a->j; ambs = a->mbs; bs2 = a->bs2; ierr = VecSet(v,zero);CHKERRQ(ierr); ierr = VecGetArray(v,&x);CHKERRQ(ierr); ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); for (i=0; idata; const PetscScalar *l,*r,*li,*ri; PetscScalar x; MatScalar *aa, *v; PetscErrorCode ierr; PetscInt i,j,k,lm,rn,M,m,n,mbs,tmp,bs,bs2,iai; const PetscInt *ai,*aj; PetscFunctionBegin; ai = a->i; aj = a->j; aa = a->a; m = A->rmap->n; n = A->cmap->n; bs = A->rmap->bs; mbs = a->mbs; bs2 = a->bs2; if (ll) { ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); for (i=0; inz);CHKERRQ(ierr); } if (rr) { ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); if (rn != n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); for (i=0; inz);CHKERRQ(ierr); } PetscFunctionReturn(0); } PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscFunctionBegin; info->block_size = a->bs2; info->nz_allocated = a->bs2*a->maxnz; info->nz_used = a->bs2*a->nz; info->nz_unneeded = info->nz_allocated - info->nz_used; info->assemblies = A->num_ass; info->mallocs = A->info.mallocs; info->memory = ((PetscObject)A)->mem; if (A->factortype) { info->fill_ratio_given = A->info.fill_ratio_given; info->fill_ratio_needed = A->info.fill_ratio_needed; info->factor_mallocs = A->info.factor_mallocs; } else { info->fill_ratio_given = 0; info->fill_ratio_needed = 0; info->factor_mallocs = 0; } PetscFunctionReturn(0); } PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscArrayzero(a->a,a->bs2*a->i[a->mbs]);CHKERRQ(ierr); PetscFunctionReturn(0); } PETSC_INTERN PetscErrorCode MatMatMult_SeqBAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { PetscErrorCode ierr; PetscFunctionBegin; if (scall == MAT_INITIAL_MATRIX) { ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); ierr = MatMatMultSymbolic_SeqBAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); } ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); ierr = MatMatMultNumeric_SeqBAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatMatMultSymbolic_SeqBAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) { PetscErrorCode ierr; PetscFunctionBegin; ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqBAIJ_SeqDense; PetscFunctionReturn(0); } PetscErrorCode MatMatMult_SeqBAIJ_1_Private(Mat A,PetscScalar* b,PetscInt bm,PetscScalar* c,PetscInt cm,PetscInt cn) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscScalar *z = 0,sum1; const PetscScalar *xb; PetscScalar x1; const MatScalar *v,*vv; PetscInt mbs,i,*idx,*ii,j,*jj,n,k,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { mbs = a->mbs; ii = a->i; z = c; } for (i=0; idata; PetscScalar *z = 0,sum1,sum2; const PetscScalar *xb; PetscScalar x1,x2; const MatScalar *v,*vv; PetscInt mbs,i,*idx,*ii,j,*jj,n,k,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { mbs = a->mbs; ii = a->i; z = c; } for (i=0; idata; PetscScalar *z = 0,sum1,sum2,sum3; const PetscScalar *xb; PetscScalar x1,x2,x3; const MatScalar *v,*vv; PetscInt mbs,i,*idx,*ii,j,*jj,n,k,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { mbs = a->mbs; ii = a->i; z = c; } for (i=0; idata; PetscScalar *z = 0,sum1,sum2,sum3,sum4; const PetscScalar *xb; PetscScalar x1,x2,x3,x4; const MatScalar *v,*vv; PetscInt mbs,i,*idx,*ii,j,*jj,n,k,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { mbs = a->mbs; ii = a->i; z = c; } for (i=0; idata; PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5; const PetscScalar *xb; PetscScalar x1,x2,x3,x4,x5; const MatScalar *v,*vv; PetscInt mbs,i,*idx,*ii,j,*jj,n,k,*ridx=NULL; PetscBool usecprow=a->compressedrow.use; PetscFunctionBegin; idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { mbs = a->mbs; ii = a->i; z = c; } for (i=0; idata; Mat_SeqDense *bd = (Mat_SeqDense*)B->data; Mat_SeqDense *cd = (Mat_SeqDense*)B->data; PetscInt cm=cd->lda,cn=B->cmap->n,bm=bd->lda; PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; PetscBLASInt bbs,bcn,bbm,bcm; PetscScalar *z = 0; PetscScalar *c,*b; const MatScalar *v; const PetscInt *idx,*ii,*ridx=NULL; PetscScalar _DZero=0.0,_DOne=1.0; PetscBool usecprow=a->compressedrow.use; PetscErrorCode ierr; PetscFunctionBegin; if (!cm || !cn) PetscFunctionReturn(0); if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n); if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); b = bd->v; if (a->nonzerorowcnt != A->rmap->n) { ierr = MatZeroEntries(C);CHKERRQ(ierr); } ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr); switch (bs) { case 1: ierr = MatMatMult_SeqBAIJ_1_Private(A, b, bm, c, cm, cn); break; case 2: ierr = MatMatMult_SeqBAIJ_2_Private(A, b, bm, c, cm, cn); break; case 3: ierr = MatMatMult_SeqBAIJ_3_Private(A, b, bm, c, cm, cn); break; case 4: ierr = MatMatMult_SeqBAIJ_4_Private(A, b, bm, c, cm, cn); break; case 5: ierr = MatMatMult_SeqBAIJ_5_Private(A, b, bm, c, cm, cn); break; default: /* block sizes larger than 5 by 5 are handled by BLAS */ ierr = PetscBLASIntCast(bs,&bbs);CHKERRQ(ierr); ierr = PetscBLASIntCast(cn,&bcn);CHKERRQ(ierr); ierr = PetscBLASIntCast(bm,&bbm);CHKERRQ(ierr); ierr = PetscBLASIntCast(cm,&bcm);CHKERRQ(ierr); idx = a->j; v = a->a; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { mbs = a->mbs; ii = a->i; z = c; } for (i=0; inz*bs2 - bs*a->nonzerorowcnt)*cn);CHKERRQ(ierr); PetscFunctionReturn(0); }