Actual source code: aijmkl.c

petsc-master 2019-12-10
Report Typos and Errors

  2: /*
  3:   Defines basic operations for the MATSEQAIJMKL matrix class.
  4:   This class is derived from the MATSEQAIJ class and retains the
  5:   compressed row storage (aka Yale sparse matrix format) but uses 
  6:   sparse BLAS operations from the Intel Math Kernel Library (MKL) 
  7:   wherever possible.
  8: */

 10:  #include <../src/mat/impls/aij/seq/aij.h>
 11:  #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 12: #include <mkl_spblas.h>

 14: typedef struct {
 15:   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 16:   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 17:   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 18:   PetscObjectState    state;
 19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 20:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 21:   struct matrix_descr descr;
 22: #endif
 23: } Mat_SeqAIJMKL;

 25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 28: {
 29:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 30:   /* so we will ignore 'MatType type'. */
 32:   Mat            B       = *newmat;
 33: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 34:   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
 35: #endif

 38:   if (reuse == MAT_INITIAL_MATRIX) {
 39:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 40:   }

 42:   /* Reset the original function pointers. */
 43:   B->ops->duplicate        = MatDuplicate_SeqAIJ;
 44:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
 45:   B->ops->destroy          = MatDestroy_SeqAIJ;
 46:   B->ops->mult             = MatMult_SeqAIJ;
 47:   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
 48:   B->ops->multadd          = MatMultAdd_SeqAIJ;
 49:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
 50:   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
 51:   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 52:   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
 53:   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
 54:   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;

 56:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
 57:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
 58:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
 59:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
 60:   PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);
 61: #if defined(PETSC_HAVE_HYPRE)
 62:   PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_seqaijmkl_seqaijmkl_C",NULL);
 63: #endif
 64: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 65:   if(!aijmkl->no_SpMV2) {
 66:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);
 67: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
 68:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
 69: #endif
 70:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);
 71:   }

 73:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 
 74:    * simply involves destroying the MKL sparse matrix handle and then freeing 
 75:    * the spptr pointer. */
 76:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;

 78:   if (aijmkl->sparse_optimized) {
 79:     sparse_status_t stat;
 80:     stat = mkl_sparse_destroy(aijmkl->csrA);
 81:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
 82:   }
 83: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 84:   PetscFree(B->spptr);

 86:   /* Change the type of B to MATSEQAIJ. */
 87:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 89:   *newmat = B;
 90:   return(0);
 91: }

 93: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 94: {
 96:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;


100:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 
101:    * spptr pointer. */
102:   if (aijmkl) {
103:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
104: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
105:     if (aijmkl->sparse_optimized) {
106:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
107:       stat = mkl_sparse_destroy(aijmkl->csrA);
108:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
109:     }
110: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
111:     PetscFree(A->spptr);
112:   }

114:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
115:    * to destroy everything that remains. */
116:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
117:   /* Note that I don't call MatSetType().  I believe this is because that
118:    * is only to be called when *building* a matrix.  I could be wrong, but
119:    * that is how things work for the SuperLU matrix class. */
120:   MatDestroy_SeqAIJ(A);
121:   return(0);
122: }

124: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 
125:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
126:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 
127:  * handle, creates a new one, and then calls mkl_sparse_optimize().
128:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 
129:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 
130:  * an unoptimized matrix handle here. */
131: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
132: {
133: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
134:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine 
135:    * does nothing. We make it callable anyway in this case because it cuts 
136:    * down on littering the code with #ifdefs. */
138:   return(0);
139: #else
140:   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
141:   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
142:   PetscInt         m,n;
143:   MatScalar        *aa;
144:   PetscInt         *aj,*ai;
145:   sparse_status_t  stat;
146:   PetscErrorCode   ierr;

149: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
150:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
151:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
152:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
153:    * mkl_sparse_optimize() later. */
154:   if (aijmkl->no_SpMV2) return(0);
155: #endif

157:   if (aijmkl->sparse_optimized) {
158:     /* Matrix has been previously assembled and optimized. Must destroy old
159:      * matrix handle before running the optimization step again. */
160:     stat = mkl_sparse_destroy(aijmkl->csrA);
161:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
162:   }
163:   aijmkl->sparse_optimized = PETSC_FALSE;

165:   /* Now perform the SpMV2 setup and matrix optimization. */
166:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
167:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
168:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
169:   m = A->rmap->n;
170:   n = A->cmap->n;
171:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
172:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
173:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
174:   if ((a->nz!=0) & !(A->structure_only)) {
175:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
176:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
177:     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
178:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
179:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
180:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
181:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
182:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
183:     if (!aijmkl->no_SpMV2) {
184:       stat = mkl_sparse_optimize(aijmkl->csrA);
185:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
186:     }
187:     aijmkl->sparse_optimized = PETSC_TRUE;
188:     PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
189:   }

191:   return(0);
192: #endif
193: }

195: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 
196:  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 
197:  * matrix handle.
198:  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
199:  * there is no good alternative. */
200: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
201: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
202: {
203:   PetscErrorCode      ierr;
204:   sparse_status_t     stat;
205:   sparse_index_base_t indexing;
206:   PetscInt            nrows, ncols;
207:   PetscInt            *aj,*ai,*dummy;
208:   MatScalar           *aa;
209:   Mat                 A;
210:   Mat_SeqAIJMKL       *aijmkl;

212:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
213:   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
214:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

216:   if (reuse == MAT_REUSE_MATRIX) {
217:     MatDestroy(mat);
218:   }
219:   MatCreate(comm,&A);
220:   MatSetType(A,MATSEQAIJ);
221:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
222:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 
223:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
224:    * they will be destroyed when the MKL handle is destroyed.
225:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
226:   MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);

228:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
229:    * Now turn it into a MATSEQAIJMKL. */
230:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);

232:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
233:   aijmkl->csrA = csrA;

235:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
236:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 
237:    * and just need to be able to run the MKL optimization step. */
238:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
239:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
240:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
241:   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
242:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
243:   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
244:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
245:   if (!aijmkl->no_SpMV2) {
246:     stat = mkl_sparse_optimize(aijmkl->csrA);
247:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
248:   }
249:   aijmkl->sparse_optimized = PETSC_TRUE;
250:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));

252:   *mat = A;
253:   return(0);
254: }
255: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

257: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
258:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 
259:  * MatMatMultNumeric(). */
260: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
261: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
262: {
263:   PetscInt            i;
264:   PetscInt            nrows,ncols;
265:   PetscInt            nz;
266:   PetscInt            *ai,*aj,*dummy;
267:   PetscScalar         *aa;
268:   PetscErrorCode      ierr;
269:   Mat_SeqAIJMKL       *aijmkl;
270:   sparse_status_t     stat;
271:   sparse_index_base_t indexing;

273:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;

275:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
276:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
277:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

279:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 
280:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
281:   for (i=0; i<nrows; i++) {
282:     nz = ai[i+1] - ai[i];
283:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
284:   }

286:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
287:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

289:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
290:   /* We mark our matrix as having a valid, optimized MKL handle.
291:    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
292:   aijmkl->sparse_optimized = PETSC_TRUE;

294:   return(0);
295: }
296: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

298: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
299: {
301:   Mat_SeqAIJMKL  *aijmkl;
302:   Mat_SeqAIJMKL  *aijmkl_dest;

305:   MatDuplicate_SeqAIJ(A,op,M);
306:   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
307:   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
308:   PetscArraycpy(aijmkl_dest,aijmkl,1);
309:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
310:   if (aijmkl->eager_inspection) {
311:     MatSeqAIJMKL_create_mkl_handle(A);
312:   }
313:   return(0);
314: }

316: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
317: {
318:   PetscErrorCode  ierr;
319:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
320:   Mat_SeqAIJMKL   *aijmkl;

323:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

325:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
326:    * extra information and some different methods, call the AssemblyEnd 
327:    * routine for a MATSEQAIJ.
328:    * I'm not sure if this is the best way to do this, but it avoids
329:    * a lot of code duplication. */
330:   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
331:   MatAssemblyEnd_SeqAIJ(A, mode);

333:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
334:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
335:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
336:   if (aijmkl->eager_inspection) {
337:     MatSeqAIJMKL_create_mkl_handle(A);
338:   }

340:   return(0);
341: }

343: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
344: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
345: {
346:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
347:   const PetscScalar *x;
348:   PetscScalar       *y;
349:   const MatScalar   *aa;
350:   PetscErrorCode    ierr;
351:   PetscInt          m=A->rmap->n;
352:   PetscInt          n=A->cmap->n;
353:   PetscScalar       alpha = 1.0;
354:   PetscScalar       beta = 0.0;
355:   const PetscInt    *aj,*ai;
356:   char              matdescra[6];


359:   /* Variables not in MatMult_SeqAIJ. */
360:   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */

363:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
364:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
365:   VecGetArrayRead(xx,&x);
366:   VecGetArray(yy,&y);
367:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
368:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
369:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

371:   /* Call MKL sparse BLAS routine to do the MatMult. */
372:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

374:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
375:   VecRestoreArrayRead(xx,&x);
376:   VecRestoreArray(yy,&y);
377:   return(0);
378: }
379: #endif

381: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
382: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
383: {
384:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
385:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
386:   const PetscScalar *x;
387:   PetscScalar       *y;
388:   PetscErrorCode    ierr;
389:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
390:   PetscObjectState  state;


394:   /* If there are no nonzero entries, zero yy and return immediately. */
395:   if(!a->nz) {
396:     PetscInt i;
397:     PetscInt m=A->rmap->n;
398:     VecGetArray(yy,&y);
399:     for (i=0; i<m; i++) {
400:       y[i] = 0.0;
401:     }
402:     VecRestoreArray(yy,&y);
403:     return(0);
404:   }

406:   VecGetArrayRead(xx,&x);
407:   VecGetArray(yy,&y);

409:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
410:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
411:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
412:   PetscObjectStateGet((PetscObject)A,&state);
413:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
414:     MatSeqAIJMKL_create_mkl_handle(A);
415:   }

417:   /* Call MKL SpMV2 executor routine to do the MatMult. */
418:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
419:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
420: 
421:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
422:   VecRestoreArrayRead(xx,&x);
423:   VecRestoreArray(yy,&y);
424:   return(0);
425: }
426: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

428: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
429: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
430: {
431:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
432:   const PetscScalar *x;
433:   PetscScalar       *y;
434:   const MatScalar   *aa;
435:   PetscErrorCode    ierr;
436:   PetscInt          m=A->rmap->n;
437:   PetscInt          n=A->cmap->n;
438:   PetscScalar       alpha = 1.0;
439:   PetscScalar       beta = 0.0;
440:   const PetscInt    *aj,*ai;
441:   char              matdescra[6];

443:   /* Variables not in MatMultTranspose_SeqAIJ. */
444:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */

447:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
448:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
449:   VecGetArrayRead(xx,&x);
450:   VecGetArray(yy,&y);
451:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
452:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
453:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

455:   /* Call MKL sparse BLAS routine to do the MatMult. */
456:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

458:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
459:   VecRestoreArrayRead(xx,&x);
460:   VecRestoreArray(yy,&y);
461:   return(0);
462: }
463: #endif

465: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
466: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
467: {
468:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
469:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
470:   const PetscScalar *x;
471:   PetscScalar       *y;
472:   PetscErrorCode    ierr;
473:   sparse_status_t   stat;
474:   PetscObjectState  state;


478:   /* If there are no nonzero entries, zero yy and return immediately. */
479:   if(!a->nz) {
480:     PetscInt i;
481:     PetscInt n=A->cmap->n;
482:     VecGetArray(yy,&y);
483:     for (i=0; i<n; i++) {
484:       y[i] = 0.0;
485:     }
486:     VecRestoreArray(yy,&y);
487:     return(0);
488:   }

490:   VecGetArrayRead(xx,&x);
491:   VecGetArray(yy,&y);

493:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
494:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
495:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
496:   PetscObjectStateGet((PetscObject)A,&state);
497:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
498:     MatSeqAIJMKL_create_mkl_handle(A);
499:   }

501:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
502:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
503:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
504: 
505:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
506:   VecRestoreArrayRead(xx,&x);
507:   VecRestoreArray(yy,&y);
508:   return(0);
509: }
510: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

512: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
513: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
514: {
515:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
516:   const PetscScalar *x;
517:   PetscScalar       *y,*z;
518:   const MatScalar   *aa;
519:   PetscErrorCode    ierr;
520:   PetscInt          m=A->rmap->n;
521:   PetscInt          n=A->cmap->n;
522:   const PetscInt    *aj,*ai;
523:   PetscInt          i;

525:   /* Variables not in MatMultAdd_SeqAIJ. */
526:   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
527:   PetscScalar       alpha = 1.0;
528:   PetscScalar       beta;
529:   char              matdescra[6];

532:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
533:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

535:   VecGetArrayRead(xx,&x);
536:   VecGetArrayPair(yy,zz,&y,&z);
537:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
538:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
539:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

541:   /* Call MKL sparse BLAS routine to do the MatMult. */
542:   if (zz == yy) {
543:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
544:     beta = 1.0;
545:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
546:   } else {
547:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
548:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
549:     beta = 0.0;
550:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
551:     for (i=0; i<m; i++) {
552:       z[i] += y[i];
553:     }
554:   }

556:   PetscLogFlops(2.0*a->nz);
557:   VecRestoreArrayRead(xx,&x);
558:   VecRestoreArrayPair(yy,zz,&y,&z);
559:   return(0);
560: }
561: #endif

563: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
564: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
565: {
566:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
567:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
568:   const PetscScalar *x;
569:   PetscScalar       *y,*z;
570:   PetscErrorCode    ierr;
571:   PetscInt          m=A->rmap->n;
572:   PetscInt          i;

574:   /* Variables not in MatMultAdd_SeqAIJ. */
575:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
576:   PetscObjectState  state;


580:   /* If there are no nonzero entries, set zz = yy and return immediately. */
581:   if(!a->nz) {
582:     PetscInt i;
583:     VecGetArrayPair(yy,zz,&y,&z);
584:     for (i=0; i<m; i++) {
585:       z[i] = y[i];
586:     }
587:     VecRestoreArrayPair(yy,zz,&y,&z);
588:     return(0);
589:   }

591:   VecGetArrayRead(xx,&x);
592:   VecGetArrayPair(yy,zz,&y,&z);

594:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
595:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
596:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
597:   PetscObjectStateGet((PetscObject)A,&state);
598:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
599:     MatSeqAIJMKL_create_mkl_handle(A);
600:   }

602:   /* Call MKL sparse BLAS routine to do the MatMult. */
603:   if (zz == yy) {
604:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
605:      * with alpha and beta both set to 1.0. */
606:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
607:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
608:   } else {
609:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
610:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
611:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
612:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
613:     for (i=0; i<m; i++) {
614:       z[i] += y[i];
615:     }
616:   }

618:   PetscLogFlops(2.0*a->nz);
619:   VecRestoreArrayRead(xx,&x);
620:   VecRestoreArrayPair(yy,zz,&y,&z);
621:   return(0);
622: }
623: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

625: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
626: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
627: {
628:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
629:   const PetscScalar *x;
630:   PetscScalar       *y,*z;
631:   const MatScalar   *aa;
632:   PetscErrorCode    ierr;
633:   PetscInt          m=A->rmap->n;
634:   PetscInt          n=A->cmap->n;
635:   const PetscInt    *aj,*ai;
636:   PetscInt          i;

638:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
639:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
640:   PetscScalar       alpha = 1.0;
641:   PetscScalar       beta;
642:   char              matdescra[6];

645:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
646:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

648:   VecGetArrayRead(xx,&x);
649:   VecGetArrayPair(yy,zz,&y,&z);
650:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
651:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
652:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

654:   /* Call MKL sparse BLAS routine to do the MatMult. */
655:   if (zz == yy) {
656:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
657:     beta = 1.0;
658:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
659:   } else {
660:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
661:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
662:     beta = 0.0;
663:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
664:     for (i=0; i<n; i++) {
665:       z[i] += y[i];
666:     }
667:   }

669:   PetscLogFlops(2.0*a->nz);
670:   VecRestoreArrayRead(xx,&x);
671:   VecRestoreArrayPair(yy,zz,&y,&z);
672:   return(0);
673: }
674: #endif

676: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
677: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
678: {
679:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
680:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
681:   const PetscScalar *x;
682:   PetscScalar       *y,*z;
683:   PetscErrorCode    ierr;
684:   PetscInt          n=A->cmap->n;
685:   PetscInt          i;
686:   PetscObjectState  state;

688:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
689:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;


693:   /* If there are no nonzero entries, set zz = yy and return immediately. */
694:   if(!a->nz) {
695:     PetscInt i;
696:     VecGetArrayPair(yy,zz,&y,&z);
697:     for (i=0; i<n; i++) {
698:       z[i] = y[i];
699:     }
700:     VecRestoreArrayPair(yy,zz,&y,&z);
701:     return(0);
702:   }

704:   VecGetArrayRead(xx,&x);
705:   VecGetArrayPair(yy,zz,&y,&z);

707:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
708:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
709:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
710:   PetscObjectStateGet((PetscObject)A,&state);
711:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
712:     MatSeqAIJMKL_create_mkl_handle(A);
713:   }

715:   /* Call MKL sparse BLAS routine to do the MatMult. */
716:   if (zz == yy) {
717:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
718:      * with alpha and beta both set to 1.0. */
719:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
720:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
721:   } else {
722:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
723:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
724:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
725:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
726:     for (i=0; i<n; i++) {
727:       z[i] += y[i];
728:     }
729:   }

731:   PetscLogFlops(2.0*a->nz);
732:   VecRestoreArrayRead(xx,&x);
733:   VecRestoreArrayPair(yy,zz,&y,&z);
734:   return(0);
735: }
736: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

738: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
739: /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 
740:  * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 
741:  * For releases of MKL prior to version 18, update 2:
742:  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 
743:  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 
744:  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 
745:  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
746: PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
747: {
748:   Mat_SeqAIJMKL    *a, *b;
749:   sparse_matrix_t  csrA, csrB, csrC;
750:   PetscErrorCode   ierr;
751:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
752:   PetscObjectState state;

755:   a = (Mat_SeqAIJMKL*)A->spptr;
756:   b = (Mat_SeqAIJMKL*)B->spptr;
757:   PetscObjectStateGet((PetscObject)A,&state);
758:   if (!a->sparse_optimized || a->state != state) {
759:     MatSeqAIJMKL_create_mkl_handle(A);
760:   }
761:   PetscObjectStateGet((PetscObject)B,&state);
762:   if (!b->sparse_optimized || b->state != state) {
763:     MatSeqAIJMKL_create_mkl_handle(B);
764:   }
765:   csrA = a->csrA;
766:   csrB = b->csrA;

768:   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
769:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

771:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

773:   return(0);
774: }
775: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

777: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
778: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
779: {
780:   Mat_SeqAIJMKL       *a, *b, *c;
781:   sparse_matrix_t     csrA, csrB, csrC;
782:   PetscErrorCode      ierr;
783:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
784:   struct matrix_descr descr_type_gen;
785:   PetscObjectState    state;

788:   a = (Mat_SeqAIJMKL*)A->spptr;
789:   b = (Mat_SeqAIJMKL*)B->spptr;
790:   c = (Mat_SeqAIJMKL*)C->spptr;
791:   PetscObjectStateGet((PetscObject)A,&state);
792:   if (!a->sparse_optimized || a->state != state) {
793:     MatSeqAIJMKL_create_mkl_handle(A);
794:   }
795:   PetscObjectStateGet((PetscObject)B,&state);
796:   if (!b->sparse_optimized || b->state != state) {
797:     MatSeqAIJMKL_create_mkl_handle(B);
798:   }
799:   csrA = a->csrA;
800:   csrB = b->csrA;
801:   csrC = c->csrA;
802:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

804:   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
805:                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
806:                          SPARSE_STAGE_FINALIZE_MULT,&csrC);

808:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");

810:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
811:   MatSeqAIJMKL_update_from_mkl_handle(C);

813:   return(0);
814: }
815: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

817: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
818: PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
819: {
820:   Mat_SeqAIJMKL    *a, *b;
821:   sparse_matrix_t  csrA, csrB, csrC;
822:   PetscErrorCode   ierr;
823:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
824:   PetscObjectState state;

827:   a = (Mat_SeqAIJMKL*)A->spptr;
828:   b = (Mat_SeqAIJMKL*)B->spptr;
829:   PetscObjectStateGet((PetscObject)A,&state);
830:   if (!a->sparse_optimized || a->state != state) {
831:     MatSeqAIJMKL_create_mkl_handle(A);
832:   }
833:   PetscObjectStateGet((PetscObject)B,&state);
834:   if (!b->sparse_optimized || b->state != state) {
835:     MatSeqAIJMKL_create_mkl_handle(B);
836:   }
837:   csrA = a->csrA;
838:   csrB = b->csrA;

840:   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
841:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

843:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

845:   return(0);
846: }
847: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

849: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
850: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
851: {
852:   Mat_SeqAIJMKL       *a, *p, *c;
853:   sparse_matrix_t     csrA, csrP, csrC;
854:   PetscBool           set, flag;
855:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
856:   struct matrix_descr descr_type_sym;
857:   PetscObjectState    state;
858:   PetscErrorCode      ierr;

861:   MatIsSymmetricKnown(A,&set,&flag);
862:   if (!set || (set && !flag)) {
863:     MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
864:     return(0);
865:   }

867:   a = (Mat_SeqAIJMKL*)A->spptr;
868:   p = (Mat_SeqAIJMKL*)P->spptr;
869:   c = (Mat_SeqAIJMKL*)C->spptr;
870:   PetscObjectStateGet((PetscObject)A,&state);
871:   if (!a->sparse_optimized || a->state != state) {
872:     MatSeqAIJMKL_create_mkl_handle(A);
873:   }
874:   PetscObjectStateGet((PetscObject)P,&state);
875:   if (!p->sparse_optimized || p->state != state) {
876:     MatSeqAIJMKL_create_mkl_handle(P);
877:   }
878:   csrA = a->csrA;
879:   csrP = p->csrA;
880:   csrC = c->csrA;
881:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
882:   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
883:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

885:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
886:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
887:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");

889:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
890:   MatSeqAIJMKL_update_from_mkl_handle(C);

892:   return(0);
893: }
894: #endif

896: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
897: PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
898: {
899:   Mat_SeqAIJMKL       *a, *p;
900:   sparse_matrix_t     csrA, csrP, csrC;
901:   PetscBool           set, flag;
902:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
903:   struct matrix_descr descr_type_sym;
904:   PetscObjectState    state;
905:   PetscErrorCode      ierr;

908:   MatIsSymmetricKnown(A,&set,&flag);
909:   if (!set || (set && !flag)) {
910:     MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);
911:     return(0);
912:   }

914:   if (scall == MAT_REUSE_MATRIX) {
915:     MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);
916:     return(0);
917:   }

919:   a = (Mat_SeqAIJMKL*)A->spptr;
920:   p = (Mat_SeqAIJMKL*)P->spptr;
921:   PetscObjectStateGet((PetscObject)A,&state);
922:   if (!a->sparse_optimized || a->state != state) {
923:     MatSeqAIJMKL_create_mkl_handle(A);
924:   }
925:   PetscObjectStateGet((PetscObject)P,&state);
926:   if (!p->sparse_optimized || p->state != state) {
927:     MatSeqAIJMKL_create_mkl_handle(P);
928:   }
929:   csrA = a->csrA;
930:   csrP = p->csrA;
931:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
932:   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
933:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

935:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
936:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
937:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");

939:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
940:   MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);

942:   return(0);
943: }
944: #endif

946: /* These function prototypes are needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */
947: PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
948: #if defined(PETSC_HAVE_HYPRE)
949: PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
950: #endif

952: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
953:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
954:  * routine, but can also be used to convert an assembled SeqAIJ matrix
955:  * into a SeqAIJMKL one. */
956: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
957: {
959:   Mat            B = *newmat;
960:   Mat_SeqAIJMKL  *aijmkl;
961:   PetscBool      set;
962:   PetscBool      sametype;

965:   if (reuse == MAT_INITIAL_MATRIX) {
966:     MatDuplicate(A,MAT_COPY_VALUES,&B);
967:   }

969:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
970:   if (sametype) return(0);

972:   PetscNewLog(B,&aijmkl);
973:   B->spptr = (void*) aijmkl;

975:   /* Set function pointers for methods that we inherit from AIJ but override. 
976:    * We also parse some command line options below, since those determine some of the methods we point to. */
977:   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
978:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
979:   B->ops->destroy          = MatDestroy_SeqAIJMKL;

981:   aijmkl->sparse_optimized = PETSC_FALSE;
982: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
983:   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
984: #else
985:   aijmkl->no_SpMV2 = PETSC_TRUE;
986: #endif
987:   aijmkl->eager_inspection = PETSC_FALSE;

989:   /* Parse command line options. */
990:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
991:   PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
992:   PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
993:   PetscOptionsEnd();
994: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
995:   if(!aijmkl->no_SpMV2) {
996:     PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
997:     aijmkl->no_SpMV2 = PETSC_TRUE;
998:   }
999: #endif

1001: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1002:   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1003:   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1004:   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1005:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1006:   B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1007: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1008:   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1009: #   if !defined(PETSC_USE_COMPLEX)
1010:   B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
1011:   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1012: #   endif
1013: # endif
1014:   B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1015: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1017: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1018:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1019:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1020:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1021:    * versions in which the older interface has not been deprecated, we use the old interface. */
1022:   if (aijmkl->no_SpMV2) {
1023:     B->ops->mult             = MatMult_SeqAIJMKL;
1024:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1025:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1026:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1027:   }
1028: #endif

1030:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1031:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
1032:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
1033:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
1034:   PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);
1035: #if defined(PETSC_HAVE_HYPRE)
1036:   PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_seqaijmkl_seqaijmkl_C",MatMatMatMult_Transpose_AIJ_AIJ);
1037: #endif
1038:   if(!aijmkl->no_SpMV2) {
1039: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1040:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1041: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1042:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
1043: #endif
1044:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1045: #endif
1046:   }

1048:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1049:   *newmat = B;
1050:   return(0);
1051: }

1053: /*@C
1054:    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1055:    This type inherits from AIJ and is largely identical, but uses sparse BLAS 
1056:    routines from Intel MKL whenever possible.
1057:    If the installed version of MKL supports the "SpMV2" sparse 
1058:    inspector-executor routines, then those are used by default.
1059:    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1060:    symmetric A) operations are currently supported.
1061:    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.

1063:    Collective

1065:    Input Parameters:
1066: +  comm - MPI communicator, set to PETSC_COMM_SELF
1067: .  m - number of rows
1068: .  n - number of columns
1069: .  nz - number of nonzeros per row (same for all rows)
1070: -  nnz - array containing the number of nonzeros in the various rows
1071:          (possibly different for each row) or NULL

1073:    Output Parameter:
1074: .  A - the matrix

1076:    Options Database Keys:
1077: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1078: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1080:    Notes:
1081:    If nnz is given then nz is ignored

1083:    Level: intermediate

1085: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1086: @*/
1087: PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1088: {

1092:   MatCreate(comm,A);
1093:   MatSetSizes(*A,m,n,m,n);
1094:   MatSetType(*A,MATSEQAIJMKL);
1095:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1096:   return(0);
1097: }

1099: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1100: {

1104:   MatSetType(A,MATSEQAIJ);
1105:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1106:   return(0);
1107: }