Actual source code: ex5.c

petsc-3.3-p7 2013-05-11
  1: static char help[] = "Micro-benchmark kernel times.\n\n";

  3: /*
  4:   Include "petscthreadcomm.h" so that we can use the PetscThreadComm interface.
  5: */
  6: #include <petscthreadcomm.h>
  7: #include <petscvec.h>
  8: #if defined(PETSC_HAVE_OPENMP)
  9: #  include <omp.h>
 10: #endif

 12: static PetscErrorCode CounterInit_kernel(PetscInt trank,PetscInt **counters)
 13: {
 14:   counters[trank] = malloc(sizeof(PetscInt)); /* Separate allocation per thread */
 15:   *counters[trank] = 0;                      /* Initialize memory to fault it */
 16:   return 0;
 17: }

 19: static PetscErrorCode CounterIncrement_kernel(PetscInt trank,PetscInt **counters)
 20: {
 21:   (*counters[trank])++;
 22:   return 0;
 23: }

 25: static PetscErrorCode CounterFree_kernel(PetscInt trank,PetscInt **counters)
 26: {
 27:   free(counters[trank]);
 28:   return 0;
 29: }

 33: int main(int argc,char **argv)
 34: {
 36:   PetscInt       i,j,N=100,**counters,tsize;

 38:   PetscInitialize(&argc,&argv,(char *)0,help);

 40: #if defined(PETSC_THREADCOMM_ACTIVE)
 41:   PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD);
 42: #endif
 43:   PetscOptionsGetInt(PETSC_NULL,"-N",&N,PETSC_NULL);

 45:   PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&tsize);
 46:   PetscMalloc(tsize*sizeof *counters,&counters);
 47:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterInit_kernel,1,counters);

 49:   for (i=0; i<10; i++) {
 50:     PetscReal t0,t1;
 51:     PetscThreadCommBarrier(PETSC_COMM_WORLD);
 52:     PetscGetTime(&t0);
 53:     for (j=0; j<N; j++) {
 54:       PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,1,counters);
 55:     }
 56:     PetscThreadCommBarrier(PETSC_COMM_WORLD);
 57:     PetscGetTime(&t1);
 58:     PetscPrintf(PETSC_COMM_WORLD,"Time per kernel: %g us\n",1e6*(t1-t0)/N);
 59:   }

 61:   for (i=0; i<10; i++) {
 62:     PetscReal t0,t1;
 63:     PetscThreadCommBarrier(PETSC_COMM_WORLD);
 64:     PetscGetTime(&t0);
 65:     for (j=0; j<N; j++) {
 66: #pragma omp parallel num_threads(tsize)
 67:       {
 68:         int trank = omp_get_thread_num();
 69:         (*counters[trank])++;
 70:       }
 71:     }
 72:     PetscThreadCommBarrier(PETSC_COMM_WORLD);
 73:     PetscGetTime(&t1);
 74:     PetscPrintf(PETSC_COMM_WORLD,"OpenMP inline time per kernel: %g us\n",1e6*(t1-t0)/N);
 75:   }

 77:     for (i=0; i<10; i++) {
 78:     PetscReal t0,t1;
 79:     PetscGetTime(&t0);
 80:     for (j=0; j<N; j++) {
 81:       (*(volatile PetscInt*)counters[0])++;
 82:     }
 83:     PetscGetTime(&t1);
 84:     PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per kernel: %g us\n",1e6*(t1-t0)/N);
 85:   }

 87:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterFree_kernel,1,counters);
 88:   PetscFree(counters);
 89:   PetscFinalize();
 90:   return 0;
 91: }