Actual source code: ex5.c

petsc-3.4.4 2014-03-13
  2: static char help[] = "Micro-benchmark kernel times.\n\n";

  4: /*
  5:   Include "petscthreadcomm.h" so that we can use the PetscThreadComm interface.
  6: */
  7: #include <petscthreadcomm.h>
  8: #include <petsc-private/threadcommimpl.h>
  9: #include <petscvec.h>
 10: #include <petsctime.h>
 11: #if defined(PETSC_HAVE_OPENMP)
 12: #  include <omp.h>
 13: #endif

 15: static PetscErrorCode CounterInit_kernel(PetscInt trank,PetscInt **counters)
 16: {
 17:   counters[trank]  = malloc(sizeof(PetscInt)); /* Separate allocation per thread */
 18:   *counters[trank] = 0;                      /* Initialize memory to fault it */
 19:   return 0;
 20: }

 22: static PetscErrorCode CounterIncrement_kernel(PetscInt trank,PetscInt **counters)
 23: {
 24:   (*counters[trank])++;
 25:   return 0;
 26: }

 28: static PetscErrorCode CounterFree_kernel(PetscInt trank,PetscInt **counters)
 29: {
 30:   free(counters[trank]);
 31:   return 0;
 32: }

 36: int main(int argc,char **argv)
 37: {
 39:   PetscInt       i,j,k,N=100,**counters,tsize;

 41:   PetscInitialize(&argc,&argv,(char*)0,help);

 43:   PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD);
 44:   PetscOptionsGetInt(NULL,"-N",&N,NULL);

 46:   PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&tsize);
 47:   PetscMalloc(tsize*sizeof(*counters),&counters);
 48:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterInit_kernel,1,counters);

 50:   for (i=0; i<10; i++) {
 51:     PetscReal t0,t1;
 52:     PetscThreadCommBarrier(PETSC_COMM_WORLD);
 53:     PetscTime(&t0);
 54:     for (j=0; j<N; j++) {
 55:       /*      PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,1,counters); */
 56:       PetscThreadCommRunKernel1(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,counters);
 57:     }
 58:     PetscThreadCommBarrier(PETSC_COMM_WORLD);
 59:     PetscTime(&t1);
 60:     PetscPrintf(PETSC_COMM_WORLD,"Time per kernel: %g us\n",1e6*(t1-t0)/N);
 61:   }

 63:   for (i=0; i<10; i++) {
 64:     PetscReal t0,t1;
 65:     PetscThreadCommBarrier(PETSC_COMM_WORLD);
 66:     PetscTime(&t0);
 67:     for (j=0; j<N; j++) {
 68: #pragma omp parallel num_threads(tsize)
 69:       {
 70:         PetscInt trank = omp_get_thread_num();
 71:         CounterIncrement_kernel(trank,counters);
 72:       }
 73:     }
 74:     PetscThreadCommBarrier(PETSC_COMM_WORLD);
 75:     PetscTime(&t1);
 76:     PetscPrintf(PETSC_COMM_WORLD,"OpenMP inline time per kernel: %g us\n",1e6*(t1-t0)/N);
 77:   }

 79:   for (i=0; i<10; i++) {
 80:     PetscReal t0,t1;
 81:     PetscTime(&t0);
 82:     for (j=0; j<N; j++) CounterIncrement_kernel(0,counters);
 83:     PetscTime(&t1);
 84:     PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per single kernel: %g us\n",1e6*(t1-t0)/N);
 85:   }

 87:   for (i=0; i<10; i++) {
 88:     PetscReal t0,t1;
 89:     PetscTime(&t0);
 90:     for (j=0; j<N; j++) {
 91:       for (k=0; k<tsize; k++) CounterIncrement_kernel(k,counters);
 92:     }
 93:     PetscTime(&t1);
 94:     PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per kernel: %g us\n",1e6*(t1-t0)/N);
 95:   }

 97:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterFree_kernel,1,counters);
 98:   PetscThreadCommBarrier(PETSC_COMM_WORLD);
 99:   PetscFree(counters);
100:   PetscFinalize();
101:   return 0;
102: }