Actual source code: csrperm.c

petsc-3.3-p7 2013-05-11
  2: /*
  3:   Defines basic operations for the MATSEQAIJPERM matrix class.
  4:   This class is derived from the MATSEQAIJ class and retains the 
  5:   compressed row storage (aka Yale sparse matrix format) but augments 
  6:   it with some permutation information that enables some operations 
  7:   to be more vectorizable.  A physically rearranged copy of the matrix 
  8:   may be stored if the user desires.

 10:   Eventually a variety of permutations may be supported.
 11: */

 13: #include <../src/mat/impls/aij/seq/aij.h>

 15: #define NDIM 512
 16:     /* NDIM specifies how many rows at a time we should work with when 
 17:      * performing the vectorized mat-vec.  This depends on various factors 
 18:      * such as vector register length, etc., and I really need to add a 
 19:      * way for the user (or the library) to tune this.  I'm setting it to  
 20:      * 512 for now since that is what Ed D'Azevedo was using in his Fortran 
 21:      * routines. */

 23: typedef struct {
 24:   PetscInt  ngroup;
 25:   PetscInt *xgroup;
 26:     /* Denotes where groups of rows with same number of nonzeros 
 27:      * begin and end, i.e., xgroup[i] gives us the position in iperm[] 
 28:      * where the ith group begins. */
 29:   PetscInt *nzgroup; /*  how many nonzeros each row that is a member of group i has. */
 30:   PetscInt *iperm;  /* The permutation vector. */

 32:   /* Flag that indicates whether we need to clean up permutation 
 33:    * information during the MatDestroy. */
 34:   PetscBool  CleanUpAIJPERM;

 36:   /* Some of this stuff is for Ed's recursive triangular solve.
 37:    * I'm not sure what I need yet. */
 38:   PetscInt blocksize;
 39:   PetscInt nstep;
 40:   PetscInt *jstart_list;
 41:   PetscInt *jend_list;
 42:   PetscInt *action_list;
 43:   PetscInt *ngroup_list;
 44:   PetscInt **ipointer_list;
 45:   PetscInt **xgroup_list;
 46:   PetscInt **nzgroup_list;
 47:   PetscInt **iperm_list;
 48: } Mat_SeqAIJPERM;

 50: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 52: EXTERN_C_BEGIN
 55: PetscErrorCode  MatConvert_SeqAIJPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 56: {
 57:   /* This routine is only called to convert a MATAIJPERM to its base PETSc type, */
 58:   /* so we will ignore 'MatType type'. */
 60:   Mat            B = *newmat;
 61:   Mat_SeqAIJPERM *aijperm=(Mat_SeqAIJPERM*)A->spptr;

 64:   if (reuse == MAT_INITIAL_MATRIX) {
 65:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 66:   }

 68:   /* Reset the original function pointers. */
 69:   B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
 70:   B->ops->destroy     = MatDestroy_SeqAIJ;
 71:   B->ops->duplicate   = MatDuplicate_SeqAIJ;

 73:   /* Free everything in the Mat_SeqAIJPERM data structure. 
 74:    * We don't free the Mat_SeqAIJPERM struct itself, as this will 
 75:    * cause problems later when MatDestroy() tries to free it. */
 76:   if(aijperm->CleanUpAIJPERM) {
 77:     PetscFree(aijperm->xgroup);
 78:     PetscFree(aijperm->nzgroup);
 79:     PetscFree(aijperm->iperm);
 80:   }

 82:   /* Change the type of B to MATSEQAIJ. */
 83:   PetscObjectChangeTypeName( (PetscObject)B, MATSEQAIJ);
 84: 
 85:   *newmat = B;
 86:   return(0);
 87: }
 88: EXTERN_C_END

 92: PetscErrorCode MatDestroy_SeqAIJPERM(Mat A)
 93: {
 95:   Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM *) A->spptr;

 98:   /* Free everything in the Mat_SeqAIJPERM data structure.
 99:    * Note that we don't need to free the Mat_SeqAIJPERM struct
100:    * itself, as MatDestroy() will do so. */
101:   if(aijperm && aijperm->CleanUpAIJPERM) {
102:     PetscFree(aijperm->xgroup);
103:     PetscFree(aijperm->nzgroup);
104:     PetscFree(aijperm->iperm);
105:   }
106:   PetscFree(A->spptr);

108:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
109:    * to destroy everything that remains. */
110:   PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);
111:   /* Note that I don't call MatSetType().  I believe this is because that
112:    * is only to be called when *building* a matrix.  I could be wrong, but
113:    * that is how things work for the SuperLU matrix class. */
114:   MatDestroy_SeqAIJ(A);
115:   return(0);
116: }

118: PetscErrorCode MatDuplicate_SeqAIJPERM(Mat A, MatDuplicateOption op, Mat *M)
119: {
121:   Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM *) A->spptr;
122:   Mat_SeqAIJPERM *aijperm_dest = (Mat_SeqAIJPERM *) (*M)->spptr;

125:   MatDuplicate_SeqAIJ(A,op,M);
126:   PetscMemcpy((*M)->spptr,aijperm,sizeof(Mat_SeqAIJPERM));
127:   /* Allocate space for, and copy the grouping and permutation info. 
128:    * I note that when the groups are initially determined in 
129:    * MatSeqAIJPERM_create_perm, xgroup and nzgroup may be sized larger than 
130:    * necessary.  But at this point, we know how large they need to be, and 
131:    * allocate only the necessary amount of memory.  So the duplicated matrix 
132:    * may actually use slightly less storage than the original! */
133:   PetscMalloc(A->rmap->n*sizeof(PetscInt), aijperm_dest->iperm);
134:   PetscMalloc((aijperm->ngroup+1)*sizeof(PetscInt), aijperm_dest->xgroup);
135:   PetscMalloc((aijperm->ngroup)*sizeof(PetscInt), aijperm_dest->nzgroup);
136:   PetscMemcpy(aijperm_dest->iperm,aijperm->iperm,sizeof(PetscInt)*A->rmap->n);
137:   PetscMemcpy(aijperm_dest->xgroup,aijperm->xgroup,sizeof(PetscInt)*(aijperm->ngroup+1));
138:   PetscMemcpy(aijperm_dest->nzgroup,aijperm->nzgroup,sizeof(PetscInt)*aijperm->ngroup);
139:   return(0);
140: }

144: PetscErrorCode MatSeqAIJPERM_create_perm(Mat A)
145: {
146:   PetscInt        m;  /* Number of rows in the matrix. */
147:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)(A)->data;
148:   PetscInt        *ia;  /* From the CSR representation; points to the beginning  of each row. */
149:   PetscInt        maxnz; /* Maximum number of nonzeros in any row. */
150:   PetscInt        *rows_in_bucket;
151:     /* To construct the permutation, we sort each row into one of maxnz 
152:      * buckets based on how many nonzeros are in the row. */
153:   PetscInt        nz;
154:   PetscInt        *nz_in_row;  /* the number of nonzero elements in row k. */
155:   PetscInt        *ipnz;
156:     /* When constructing the iperm permutation vector, 
157:      * ipnz[nz] is used to point to the next place in the permutation vector 
158:      * that a row with nz nonzero elements should be placed.*/
159:   Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM*) A->spptr;
160:     /* Points to the MATSEQAIJPERM-specific data in the matrix B. */
162:   PetscInt       i, ngroup, istart, ipos;

164:   /* I really ought to put something in here to check if B is of 
165:    * type MATSEQAIJPERM and return an error code if it is not.
166:    * Come back and do this! */
168:   m  = A->rmap->n;
169:   ia = a->i;
170: 
171:   /* Allocate the arrays that will hold the permutation vector. */
172:   PetscMalloc(m*sizeof(PetscInt), &aijperm->iperm);

174:   /* Allocate some temporary work arrays that will be used in 
175:    * calculating the permuation vector and groupings. */
176:   PetscMalloc((m+1)*sizeof(PetscInt), &rows_in_bucket);
177:   PetscMalloc((m+1)*sizeof(PetscInt), &ipnz);
178:   PetscMalloc(m*sizeof(PetscInt), &nz_in_row);

180:   /* Now actually figure out the permutation and grouping. */

182:   /* First pass: Determine number of nonzeros in each row, maximum 
183:    * number of nonzeros in any row, and how many rows fall into each  
184:    * "bucket" of rows with same number of nonzeros. */
185:   maxnz = 0;
186:   for (i=0; i<m; i++) {
187:     nz_in_row[i] = ia[i+1]-ia[i];
188:     if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
189:   }

191:   for (i=0; i<=maxnz; i++) {
192:     rows_in_bucket[i] = 0;
193:   }
194:   for (i=0; i<m; i++) {
195:     nz = nz_in_row[i];
196:     rows_in_bucket[nz]++;
197:   }

199:   /* Allocate space for the grouping info.  There will be at most (maxnz + 1) 
200:    * groups.  (It is maxnz + 1 instead of simply maxnz because there may be 
201:    * rows with no nonzero elements.)  If there are (maxnz + 1) groups, 
202:    * then xgroup[] must consist of (maxnz + 2) elements, since the last 
203:    * element of xgroup will tell us where the (maxnz + 1)th group ends.
204:    * We allocate space for the maximum number of groups; 
205:    * that is potentially a little wasteful, but not too much so.  
206:    * Perhaps I should fix it later. */
207:   PetscMalloc((maxnz+2)*sizeof(PetscInt), &aijperm->xgroup);
208:   PetscMalloc((maxnz+1)*sizeof(PetscInt), &aijperm->nzgroup);

210:   /* Second pass.  Look at what is in the buckets and create the groupings.
211:    * Note that it is OK to have a group of rows with no non-zero values. */
212:   ngroup = 0;
213:   istart = 0;
214:   for (i=0; i<=maxnz; i++) {
215:     if (rows_in_bucket[i] > 0) {
216:       aijperm->nzgroup[ngroup] = i;
217:       aijperm->xgroup[ngroup] = istart;
218:       ngroup++;
219:       istart += rows_in_bucket[i];
220:     }
221:   }

223:   aijperm->xgroup[ngroup] = istart;
224:   aijperm->ngroup = ngroup;

226:   /* Now fill in the permutation vector iperm. */
227:   ipnz[0] = 0;
228:   for (i=0; i<maxnz; i++) {
229:     ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
230:   }

232:   for (i=0; i<m; i++) {
233:     nz = nz_in_row[i];
234:     ipos = ipnz[nz];
235:     aijperm->iperm[ipos] = i;
236:     ipnz[nz]++;
237:   }

239:   /* Clean up temporary work arrays. */
240:   PetscFree(rows_in_bucket);
241:   PetscFree(ipnz);
242:   PetscFree(nz_in_row);

244:   /* Since we've allocated some memory to hold permutation info, 
245:    * flip the CleanUpAIJPERM flag to true. */
246:   aijperm->CleanUpAIJPERM = PETSC_TRUE;
247:   return(0);
248: }


253: PetscErrorCode MatAssemblyEnd_SeqAIJPERM(Mat A, MatAssemblyType mode)
254: {
256:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;

259:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
260: 
261:   /* Since a MATSEQAIJPERM matrix is really just a MATSEQAIJ with some 
262:    * extra information, call the AssemblyEnd routine for a MATSEQAIJ. 
263:    * I'm not sure if this is the best way to do this, but it avoids 
264:    * a lot of code duplication.
265:    * I also note that currently MATSEQAIJPERM doesn't know anything about 
266:    * the Mat_CompressedRow data structure that SeqAIJ now uses when there 
267:    * are many zero rows.  If the SeqAIJ assembly end routine decides to use 
268:    * this, this may break things.  (Don't know... haven't looked at it.) */
269:   a->inode.use = PETSC_FALSE;
270:   MatAssemblyEnd_SeqAIJ(A, mode);

272:   /* Now calculate the permutation and grouping information. */
273:   MatSeqAIJPERM_create_perm(A);
274:   return(0);
275: }

279: PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
280: {
281:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
282:   const PetscScalar *x;
283:   PetscScalar       *y;
284:   const MatScalar   *aa;
285:   PetscErrorCode    ierr;
286:   const PetscInt    *aj,*ai;
287: #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking))
288:   PetscInt          i,j;
289: #endif

291:   /* Variables that don't appear in MatMult_SeqAIJ. */
292:   Mat_SeqAIJPERM *aijperm = (Mat_SeqAIJPERM *) A->spptr;
293:   PetscInt       *iperm;  /* Points to the permutation vector. */
294:   PetscInt       *xgroup;
295:     /* Denotes where groups of rows with same number of nonzeros 
296:      * begin and end in iperm. */
297:   PetscInt       *nzgroup;
298:   PetscInt       ngroup;
299:   PetscInt       igroup;
300:   PetscInt       jstart,jend;
301:     /* jstart is used in loops to denote the position in iperm where a 
302:      * group starts; jend denotes the position where it ends.
303:      * (jend + 1 is where the next group starts.) */
304:   PetscInt       iold,nz;
305:   PetscInt       istart,iend,isize;
306:   PetscInt       ipos;
307:   PetscScalar    yp[NDIM];
308:   PetscInt       ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */

310: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
311: #pragma disjoint(*x,*y,*aa)
312: #endif

315:   VecGetArrayRead(xx,&x);
316:   VecGetArray(yy,&y);
317:   aj  = a->j;    /* aj[k] gives column index for element aa[k]. */
318:   aa  = a->a;  /* Nonzero elements stored row-by-row. */
319:   ai  = a->i;   /* ai[k] is the position in aa and aj where row k starts. */

321:   /* Get the info we need about the permutations and groupings. */
322:   iperm  = aijperm->iperm;
323:   ngroup = aijperm->ngroup;
324:   xgroup = aijperm->xgroup;
325:   nzgroup = aijperm->nzgroup;
326: 
327: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking)
328:   fortranmultaijperm_(&m,x,ii,aj,aa,y);
329: #else

331:   for (igroup=0; igroup<ngroup; igroup++) {
332:     jstart = xgroup[igroup];
333:     jend   = xgroup[igroup+1] - 1;
334:     nz     = nzgroup[igroup];

336:     /* Handle the special cases where the number of nonzeros per row 
337:      * in the group is either 0 or 1. */
338:     if (nz == 0) {
339:       for (i=jstart; i<=jend; i++) {
340:         y[iperm[i]] = 0.0;
341:       }
342:     } else if (nz == 1) {
343:       for (i=jstart; i<=jend; i++) {
344:         iold = iperm[i];
345:         ipos = ai[iold];
346:         y[iold] = aa[ipos] * x[aj[ipos]];
347:       }
348:     } else {
349: 
350:       /* We work our way through the current group in chunks of NDIM rows 
351:        * at a time. */

353:       for (istart=jstart; istart<=jend; istart+=NDIM) {
354:         /* Figure out where the chunk of 'isize' rows ends in iperm.
355:          * 'isize may of course be less than NDIM for the last chunk. */
356:         iend = istart + (NDIM - 1);
357:         if (iend > jend) { iend = jend; }
358:         isize = iend - istart + 1;

360:         /* Initialize the yp[] array that will be used to hold part of 
361:          * the permuted results vector, and figure out where in aa each 
362:          * row of the chunk will begin. */
363:         for (i=0; i<isize; i++) {
364:           iold = iperm[istart + i];
365:             /* iold is a row number from the matrix A *before* reordering. */
366:           ip[i] = ai[iold];
367:             /* ip[i] tells us where the ith row of the chunk begins in aa. */
368:           yp[i] = (PetscScalar) 0.0;
369:         }

371:         /* If the number of zeros per row exceeds the number of rows in 
372:          * the chunk, we should vectorize along nz, that is, perform the 
373:          * mat-vec one row at a time as in the usual CSR case. */
374:         if (nz > isize) {
375: #if defined(PETSC_HAVE_CRAY_VECTOR)
376: #pragma _CRI preferstream
377: #endif
378:           for (i=0; i<isize; i++) {
379: #if defined(PETSC_HAVE_CRAY_VECTOR)
380: #pragma _CRI prefervector
381: #endif
382:             for (j=0; j<nz; j++) {
383:               ipos = ip[i] + j;
384:               yp[i] += aa[ipos] * x[aj[ipos]];
385:             }
386:           }
387:         } else {
388:         /* Otherwise, there are enough rows in the chunk to make it 
389:          * worthwhile to vectorize across the rows, that is, to do the 
390:          * matvec by operating with "columns" of the chunk. */
391:           for (j=0; j<nz; j++) {
392:             for(i=0; i<isize; i++) {
393:               ipos = ip[i] + j;
394:               yp[i] += aa[ipos] * x[aj[ipos]];
395:             }
396:           }
397:         }

399: #if defined(PETSC_HAVE_CRAY_VECTOR)
400: #pragma _CRI ivdep
401: #endif
402:         /* Put results from yp[] into non-permuted result vector y. */
403:         for (i=0; i<isize; i++) {
404:           y[iperm[istart+i]] = yp[i];
405:         }
406:       } /* End processing chunk of isize rows of a group. */
407:     } /* End handling matvec for chunk with nz > 1. */
408:   } /* End loop over igroup. */
409: #endif
410:   PetscLogFlops(2.0*a->nz - A->rmap->n);
411:   VecRestoreArrayRead(xx,&x);
412:   VecRestoreArray(yy,&y);
413:   return(0);
414: }


417: /* MatMultAdd_SeqAIJPERM() calculates yy = ww + A * xx.
418:  * Note that the names I used to designate the vectors differs from that 
419:  * used in MatMultAdd_SeqAIJ().  I did this to keep my notation consistent 
420:  * with the MatMult_SeqAIJPERM() routine, which is very similar to this one. */
421: /*
422:     I hate having virtually identical code for the mult and the multadd!!!
423: */
426: PetscErrorCode MatMultAdd_SeqAIJPERM(Mat A,Vec xx,Vec ww,Vec yy)
427: {
428:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
429:   const PetscScalar *x;
430:   PetscScalar       *y,*w;
431:   const MatScalar   *aa;
432:   PetscErrorCode    ierr;
433:   const PetscInt    *aj,*ai;
434: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
435:   PetscInt          i,j;
436: #endif

438:   /* Variables that don't appear in MatMultAdd_SeqAIJ. */
439:   Mat_SeqAIJPERM *  aijperm;
440:   PetscInt         *iperm;  /* Points to the permutation vector. */
441:   PetscInt         *xgroup;
442:     /* Denotes where groups of rows with same number of nonzeros 
443:      * begin and end in iperm. */
444:   PetscInt         *nzgroup;
445:   PetscInt         ngroup;
446:   PetscInt         igroup;
447:   PetscInt         jstart,jend;
448:     /* jstart is used in loops to denote the position in iperm where a 
449:      * group starts; jend denotes the position where it ends.
450:      * (jend + 1 is where the next group starts.) */
451:   PetscInt         iold,nz;
452:   PetscInt         istart,iend,isize;
453:   PetscInt         ipos;
454:   PetscScalar      yp[NDIM];
455:   PetscInt         ip[NDIM];
456:     /* yp[] and ip[] are treated as vector "registers" for performing 
457:      * the mat-vec. */

459: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
460: #pragma disjoint(*x,*y,*aa)
461: #endif

464:   VecGetArrayRead(xx,&x);
465:   VecGetArray(yy,&y);
466:   if (yy != ww) {
467:     VecGetArray(ww,&w);
468:   } else {
469:     w = y;
470:   }

472:   aj  = a->j;  /* aj[k] gives column index for element aa[k]. */
473:   aa  = a->a;  /* Nonzero elements stored row-by-row. */
474:   ai  = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

476:   /* Get the info we need about the permutations and groupings. */
477:   aijperm = (Mat_SeqAIJPERM *) A->spptr;
478:   iperm = aijperm->iperm;
479:   ngroup = aijperm->ngroup;
480:   xgroup = aijperm->xgroup;
481:   nzgroup = aijperm->nzgroup;
482: 
483: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJPERM)
484:   fortranmultaddaijperm_(&m,x,ii,aj,aa,y,w);
485: #else

487:   for(igroup=0; igroup<ngroup; igroup++) {
488:     jstart = xgroup[igroup];
489:     jend = xgroup[igroup+1] - 1;

491:     nz = nzgroup[igroup];

493:     /* Handle the special cases where the number of nonzeros per row 
494:      * in the group is either 0 or 1. */
495:     if(nz == 0) {
496:       for(i=jstart; i<=jend; i++) {
497:         iold = iperm[i];
498:         y[iold] = w[iold];
499:       }
500:     }
501:     else if(nz == 1) {
502:       for(i=jstart; i<=jend; i++) {
503:         iold = iperm[i];
504:         ipos = ai[iold];
505:         y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
506:       }
507:     }
508:     /* For the general case: */
509:     else {
510: 
511:       /* We work our way through the current group in chunks of NDIM rows 
512:        * at a time. */

514:       for(istart=jstart; istart<=jend; istart+=NDIM) {
515:         /* Figure out where the chunk of 'isize' rows ends in iperm.
516:          * 'isize may of course be less than NDIM for the last chunk. */
517:         iend = istart + (NDIM - 1);
518:         if(iend > jend) { iend = jend; }
519:         isize = iend - istart + 1;

521:         /* Initialize the yp[] array that will be used to hold part of 
522:          * the permuted results vector, and figure out where in aa each 
523:          * row of the chunk will begin. */
524:         for(i=0; i<isize; i++) {
525:           iold = iperm[istart + i];
526:             /* iold is a row number from the matrix A *before* reordering. */
527:           ip[i] = ai[iold];
528:             /* ip[i] tells us where the ith row of the chunk begins in aa. */
529:           yp[i] = w[iold];
530:         }

532:         /* If the number of zeros per row exceeds the number of rows in 
533:          * the chunk, we should vectorize along nz, that is, perform the 
534:          * mat-vec one row at a time as in the usual CSR case. */
535:         if(nz > isize) {
536: #if defined(PETSC_HAVE_CRAY_VECTOR)
537: #pragma _CRI preferstream
538: #endif
539:           for(i=0; i<isize; i++) {
540: #if defined(PETSC_HAVE_CRAY_VECTOR)
541: #pragma _CRI prefervector
542: #endif
543:             for(j=0; j<nz; j++) {
544:               ipos = ip[i] + j;
545:               yp[i] += aa[ipos] * x[aj[ipos]];
546:             }
547:           }
548:         }
549:         /* Otherwise, there are enough rows in the chunk to make it 
550:          * worthwhile to vectorize across the rows, that is, to do the 
551:          * matvec by operating with "columns" of the chunk. */
552:         else {
553:           for(j=0; j<nz; j++) {
554:             for(i=0; i<isize; i++) {
555:               ipos = ip[i] + j;
556:               yp[i] += aa[ipos] * x[aj[ipos]];
557:             }
558:           }
559:         }

561: #if defined(PETSC_HAVE_CRAY_VECTOR)
562: #pragma _CRI ivdep
563: #endif
564:         /* Put results from yp[] into non-permuted result vector y. */
565:         for(i=0; i<isize; i++) {
566:           y[iperm[istart+i]] = yp[i];
567:         }
568:       } /* End processing chunk of isize rows of a group. */
569: 
570:     } /* End handling matvec for chunk with nz > 1. */
571:   } /* End loop over igroup. */

573: #endif
574:   PetscLogFlops(2.0*a->nz);
575:   VecRestoreArrayRead(xx,&x);
576:   VecRestoreArray(yy,&y);
577:   if (yy != ww) {
578:     VecRestoreArray(ww,&w);
579:   }
580:   return(0);
581: }


584: /* MatConvert_SeqAIJ_SeqAIJPERM converts a SeqAIJ matrix into a 
585:  * SeqAIJPERM matrix.  This routine is called by the MatCreate_SeqAIJPERM() 
586:  * routine, but can also be used to convert an assembled SeqAIJ matrix 
587:  * into a SeqAIJPERM one. */
588: EXTERN_C_BEGIN
591: PetscErrorCode  MatConvert_SeqAIJ_SeqAIJPERM(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
592: {
594:   Mat            B = *newmat;
595:   Mat_SeqAIJPERM *aijperm;

598:   if (reuse == MAT_INITIAL_MATRIX) {
599:     MatDuplicate(A,MAT_COPY_VALUES,&B);
600:   }

602:   PetscNewLog(B,Mat_SeqAIJPERM,&aijperm);
603:   B->spptr = (void *) aijperm;

605:   /* Set function pointers for methods that we inherit from AIJ but override. */
606:   B->ops->duplicate   = MatDuplicate_SeqAIJPERM;
607:   B->ops->assemblyend = MatAssemblyEnd_SeqAIJPERM;
608:   B->ops->destroy     = MatDestroy_SeqAIJPERM;
609:   B->ops->mult        = MatMult_SeqAIJPERM;
610:   B->ops->multadd     = MatMultAdd_SeqAIJPERM;

612:   /* If A has already been assembled, compute the permutation. */
613:   if (A->assembled) {
614:     MatSeqAIJPERM_create_perm(B);
615:   }
616: 
617:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaijperm_seqaij_C","MatConvert_SeqAIJPERM_SeqAIJ",MatConvert_SeqAIJPERM_SeqAIJ);

619:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJPERM);
620:   *newmat = B;
621:   return(0);
622: }
623: EXTERN_C_END


628: /*@C
629:    MatCreateSeqAIJPERM - Creates a sparse matrix of type SEQAIJPERM.
630:    This type inherits from AIJ, but calculates some additional permutation 
631:    information that is used to allow better vectorization of some 
632:    operations.  At the cost of increased storage, the AIJ formatted 
633:    matrix can be copied to a format in which pieces of the matrix are 
634:    stored in ELLPACK format, allowing the vectorized matrix multiply 
635:    routine to use stride-1 memory accesses.  As with the AIJ type, it is 
636:    important to preallocate matrix storage in order to get good assembly 
637:    performance.
638:    
639:    Collective on MPI_Comm

641:    Input Parameters:
642: +  comm - MPI communicator, set to PETSC_COMM_SELF
643: .  m - number of rows
644: .  n - number of columns
645: .  nz - number of nonzeros per row (same for all rows)
646: -  nnz - array containing the number of nonzeros in the various rows 
647:          (possibly different for each row) or PETSC_NULL

649:    Output Parameter:
650: .  A - the matrix 

652:    Notes:
653:    If nnz is given then nz is ignored

655:    Level: intermediate

657: .keywords: matrix, cray, sparse, parallel

659: .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
660: @*/
661: PetscErrorCode  MatCreateSeqAIJPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
662: {

666:   MatCreate(comm,A);
667:   MatSetSizes(*A,m,n,m,n);
668:   MatSetType(*A,MATSEQAIJPERM);
669:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
670:   return(0);
671: }

673: EXTERN_C_BEGIN
676: PetscErrorCode  MatCreate_SeqAIJPERM(Mat A)
677: {

681:   MatSetType(A,MATSEQAIJ);
682:   MatConvert_SeqAIJ_SeqAIJPERM(A,MATSEQAIJPERM,MAT_REUSE_MATRIX,&A);
683:   return(0);
684: }
685: EXTERN_C_END