Actual source code: ex158.c

  1: static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n";

  3: /*
  4:   Compiling the code:
  5:       This code uses the complex numbers version of PETSc, so configure
  6:       must be run to enable this

  8:  Usage:
  9:    mpiexec -n <np> ./ex158 -use_FFTW_interface NO
 10:    mpiexec -n <np> ./ex158 -use_FFTW_interface YES
 11: */

 13: #include <petscmat.h>
 14: #include <fftw3-mpi.h>

 18: PetscInt main(PetscInt argc,char **args)
 19: {
 20:   PetscErrorCode  ierr;
 21:   PetscMPIInt     rank,size;
 22:   PetscInt        N0=50,N1=20,N=N0*N1;
 23:   PetscRandom     rdm;
 24:   PetscScalar     a;
 25:   PetscReal       enorm;
 26:   Vec             x,y,z;
 27:   PetscBool       view=PETSC_FALSE,use_interface=PETSC_TRUE;

 29:   PetscInitialize(&argc,&args,(char *)0,help);
 30: #if defined(PETSC_USE_COMPLEX)
 31:   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex!");
 32: #endif

 34:   PetscOptionsBegin(PETSC_COMM_WORLD, PETSC_NULL, "FFTW Options", "ex143");
 35:     PetscOptionsBool("-vec_view_draw", "View the vectors", "ex143", view, &view, PETSC_NULL);
 36:     PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex143",use_interface, &use_interface, PETSC_NULL);
 37:   PetscOptionsEnd();

 39:   PetscOptionsGetBool(PETSC_NULL,"-use_FFTW_interface",&use_interface,PETSC_NULL);

 41:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 42:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);

 44:   PetscRandomCreate(PETSC_COMM_WORLD, &rdm);
 45:   PetscRandomSetFromOptions(rdm);

 47:   if (!use_interface){
 48:     /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */
 49:     /*---------------------------------------------------------*/
 50:     fftw_plan       fplan,bplan;
 51:     fftw_complex    *data_in,*data_out,*data_out2;
 52:     ptrdiff_t       alloc_local,local_n0,local_0_start;

 54:     if (!rank) printf("Use FFTW without PETSc-FFTW interface\n");
 55:     fftw_mpi_init();
 56:     N = N0*N1;
 57:     alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start);

 59:     data_in   = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
 60:     data_out  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
 61:     data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
 62:     VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x);
 63:     PetscObjectSetName((PetscObject) x, "Real Space vector");
 64:     VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y);
 65:     PetscObjectSetName((PetscObject) y, "Frequency space vector");
 66:     VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z);
 67:     PetscObjectSetName((PetscObject) z, "Reconstructed vector");

 69:     fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE);
 70:     bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE);

 72:     VecSetRandom(x, rdm);
 73:     if (view){VecView(x,PETSC_VIEWER_STDOUT_WORLD);}

 75:     fftw_execute(fplan);
 76:     if (view){VecView(y,PETSC_VIEWER_STDOUT_WORLD);}

 78:     fftw_execute(bplan);

 80:     /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
 81:     a = 1.0/(PetscReal)N;
 82:     VecScale(z,a);
 83:     if (view){VecView(z, PETSC_VIEWER_STDOUT_WORLD);}
 84:     VecAXPY(z,-1.0,x);
 85:     VecNorm(z,NORM_1,&enorm);
 86:     if (enorm > 1.e-11){
 87:       PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %A\n",enorm);
 88:     }

 90:     /* Free spaces */
 91:     fftw_destroy_plan(fplan);
 92:     fftw_destroy_plan(bplan);
 93:     fftw_free(data_in);  VecDestroy(&x);
 94:     fftw_free(data_out); VecDestroy(&y);
 95:     fftw_free(data_out2);VecDestroy(&z);

 97:   } else {
 98:     /* Use PETSc-FFTW interface                  */
 99:     /*-------------------------------------------*/
100:     PetscInt i,*dim,k,DIM;
101:     Mat      A;
102:     Vec input,output;

104:     N=30;
105:     for (i=2; i<5; i++){
106:       DIM = i;
107:       PetscMalloc(i*sizeof(PetscInt),&dim);
108:       for(k=0;k<i;k++){
109:         dim[k]=30;
110:       }
111:       N *= dim[i-1];
112: 
113:       /* Create FFTW object */
114:       if (!rank) printf("Use PETSc-FFTW interface...%d-DIM:%d \n",DIM,N);
115:       MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);

117:       /* Create FFTW vectors that are compatible with parallel layout of A */
118:       MatGetVecsFFTW(A,&x,&y,&z);
119:       PetscObjectSetName((PetscObject) x, "Real space vector");
120:       PetscObjectSetName((PetscObject) y, "Frequency space vector");
121:       PetscObjectSetName((PetscObject) z, "Reconstructed vector");

123:       /* Create and set PETSc vector */
124:       VecCreate(PETSC_COMM_WORLD,&input);
125:       VecSetSizes(input,PETSC_DECIDE,N);
126:       VecSetFromOptions(input);
127:       VecSetRandom(input,rdm);
128:       VecDuplicate(input,&output);
129:       if (view){VecView(input,PETSC_VIEWER_STDOUT_WORLD);}

131:       /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data 
132:          can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original 
133:          data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel
134:          layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically
135:          at the end of last  dimension. This padding is required by FFTW to perform parallel real D.F.T.  */
136:       VecScatterPetscToFFTW(A,input,x);
137: 
138:       /* Apply FFTW_FORWARD and FFTW_BACKWARD */
139:       MatMult(A,x,y);
140:       if (view){VecView(y,PETSC_VIEWER_STDOUT_WORLD);}
141:       MatMultTranspose(A,y,z);

143:       /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc 
144:          performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of 
145:          the extra spaces that were artificially padded to perform real parallel transform.    */
146:       VecScatterFFTWToPetsc(A,z,output);

148:       /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */
149:       a = 1.0/(PetscReal)N;
150:       VecScale(output,a);
151:       if (view){VecView(output,PETSC_VIEWER_STDOUT_WORLD);}
152:       VecAXPY(output,-1.0,input);
153:       VecNorm(output,NORM_1,&enorm);
154:       if (enorm > 1.e-09 && !rank){
155:         PetscPrintf(PETSC_COMM_SELF,"  Error norm of |x - z| %e\n",enorm);
156:       }

158:       /* Free spaces */
159:       PetscFree(dim);
160:       VecDestroy(&input);
161:       VecDestroy(&output);
162:       VecDestroy(&x);
163:       VecDestroy(&y);
164:       VecDestroy(&z);
165:       MatDestroy(&A);
166:     }
167:   }
168:   PetscRandomDestroy(&rdm);
169:   PetscFinalize();
170:   return 0;
171: }