Actual source code: performance.c
petsc-3.15.0 2021-04-05
1: static char help[] = "Time vector operations on GPU\n";
2: /* This program produces the results for Argonne Technical Report ANL-19/41.
3: The technical report and resources for generating data can be found in the
4: repository: https://gitlab.com/hannah_mairs/summit-performance */
6: #include <petscvec.h>
8: int main(int argc, char **argv)
9: {
11: Vec v,w,x;
12: PetscInt n=15;
13: PetscScalar val;
14: PetscReal norm1,norm2;
15: PetscRandom rctx;
16: #if defined(PETSC_USE_LOG)
17: PetscLogStage stage;
18: #endif
20: PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
21: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
22: PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
23: PetscRandomSetFromOptions(rctx);
24: VecCreate(PETSC_COMM_WORLD,&v);
25: VecSetSizes(v,PETSC_DECIDE,n);
26: VecSetFromOptions(v);
27: VecDuplicate(v,&w);
28: VecSetRandom(v,rctx);
29: VecSetRandom(w,rctx);
31: /* create dummy vector to clear cache */
32: VecCreate(PETSC_COMM_WORLD,&x);
33: VecSetSizes(x,PETSC_DECIDE,10000000);
34: VecSetFromOptions(x);
35: VecSetRandom(x,rctx);
37: /* send v to GPU */
38: PetscBarrier(NULL);
39: VecNorm(v,NORM_1,&norm1);
41: /* register a stage work on GPU */
42: PetscLogStageRegister("Work on GPU", &stage);
43: PetscLogStagePush(stage);
44: VecNorm(w,NORM_1,&norm1); /* send w to GPU */
45: VecNorm(x,NORM_1,&norm1); /* clear cache */
46: PetscBarrier(NULL);
47: VecAXPY(w,1.0,v);
48: VecNorm(x,NORM_INFINITY,&norm1);
49: PetscBarrier(NULL);
50: VecDot(w,v,&val);
51: VecNorm(x,NORM_1,&norm1);
52: PetscBarrier(NULL);
53: VecSet(v,0.0);
54: VecNorm(x,NORM_2,&norm2);
55: PetscBarrier(NULL);
56: VecCopy(v,w);
57: PetscLogStagePop();
59: PetscPrintf(PETSC_COMM_WORLD,"Test completed successfully!\n");
60: VecDestroy(&v);
61: VecDestroy(&w);
62: VecDestroy(&x);
63: PetscRandomDestroy(&rctx);
64: PetscFinalize();
65: return ierr;
66: }
68: /*TEST
70: testset:
71: nsize: 2
72: output_file: output/performance_cuda.out
74: test:
75: suffix: cuda
76: args: -vec_type mpicuda
77: requires: cuda
79: test:
80: suffix: hip
81: args: -vec_type mpihip
82: requires: hip
84: TEST*/