Actual source code: fftw.c

  1: /*
  2:     Provides an interface to the FFTW package.
  3:     Testing examples can be found in ~src/mat/tests
  4: */

  6: #include <../src/mat/impls/fft/fft.h>
  7: EXTERN_C_BEGIN
  8: #if !PetscDefined(HAVE_MPIUNI)
  9:   #include <fftw3-mpi.h>
 10: #else
 11:   #include <fftw3.h>
 12: #endif
 13: EXTERN_C_END

 15: typedef struct {
 16:   ptrdiff_t ndim_fftw, *dim_fftw;
 17: #if defined(PETSC_USE_64BIT_INDICES)
 18:   fftw_iodim64 *iodims;
 19: #else
 20:   fftw_iodim *iodims;
 21: #endif
 22:   PetscInt     partial_dim;
 23:   fftw_plan    p_forward, p_backward;
 24:   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
 25:   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
 26:                                                             executed for the arrays with which the plan was created */
 27: } Mat_FFTW;

 29: extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
 30: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
 31: extern PetscErrorCode MatDestroy_FFTW(Mat);
 32: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
 33: #if !PetscDefined(HAVE_MPIUNI)
 34: extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
 35: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
 36: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
 37: #endif

 39: PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
 40: {
 41:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
 42:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
 43:   const PetscScalar *x_array;
 44:   PetscScalar       *y_array;
 45: #if defined(PETSC_USE_COMPLEX)
 46:   #if defined(PETSC_USE_64BIT_INDICES)
 47:   fftw_iodim64 *iodims;
 48:   #else
 49:   fftw_iodim *iodims;
 50:   #endif
 51:   PetscInt i;
 52: #endif
 53:   PetscInt ndim = fft->ndim, *dim = fft->dim;

 55:   PetscFunctionBegin;
 56:   PetscCall(VecGetArrayRead(x, &x_array));
 57:   PetscCall(VecGetArray(y, &y_array));
 58:   if (!fftw->p_forward) { /* create a plan, then execute it */
 59:     switch (ndim) {
 60:     case 1:
 61: #if defined(PETSC_USE_COMPLEX)
 62:       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 63: #else
 64:       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 65: #endif
 66:       break;
 67:     case 2:
 68: #if defined(PETSC_USE_COMPLEX)
 69:       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 70: #else
 71:       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 72: #endif
 73:       break;
 74:     case 3:
 75: #if defined(PETSC_USE_COMPLEX)
 76:       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 77: #else
 78:       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
 79: #endif
 80:       break;
 81:     default:
 82: #if defined(PETSC_USE_COMPLEX)
 83:       iodims = fftw->iodims;
 84:   #if defined(PETSC_USE_64BIT_INDICES)
 85:       if (ndim) {
 86:         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
 87:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 88:         for (i = ndim - 2; i >= 0; --i) {
 89:           iodims[i].n  = (ptrdiff_t)dim[i];
 90:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
 91:         }
 92:       }
 93:       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
 94:   #else
 95:       if (ndim) {
 96:         iodims[ndim - 1].n  = (int)dim[ndim - 1];
 97:         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
 98:         for (i = ndim - 2; i >= 0; --i) {
 99:           iodims[i].n  = (int)dim[i];
100:           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
101:         }
102:       }
103:       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
104:   #endif

106: #else
107:       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
108: #endif
109:       break;
110:     }
111:     fftw->finarray  = (PetscScalar *)x_array;
112:     fftw->foutarray = y_array;
113:     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
114:                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
115:     fftw_execute(fftw->p_forward);
116:   } else {                                                         /* use existing plan */
117:     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
118: #if defined(PETSC_USE_COMPLEX)
119:       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
120: #else
121:       fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
122: #endif
123:     } else {
124:       fftw_execute(fftw->p_forward);
125:     }
126:   }
127:   PetscCall(VecRestoreArray(y, &y_array));
128:   PetscCall(VecRestoreArrayRead(x, &x_array));
129:   PetscFunctionReturn(PETSC_SUCCESS);
130: }

132: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
133: {
134:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
135:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
136:   const PetscScalar *x_array;
137:   PetscScalar       *y_array;
138:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
139: #if defined(PETSC_USE_COMPLEX)
140:   #if defined(PETSC_USE_64BIT_INDICES)
141:   fftw_iodim64 *iodims = fftw->iodims;
142:   #else
143:   fftw_iodim *iodims = fftw->iodims;
144:   #endif
145: #endif

147:   PetscFunctionBegin;
148:   PetscCall(VecGetArrayRead(x, &x_array));
149:   PetscCall(VecGetArray(y, &y_array));
150:   if (!fftw->p_backward) { /* create a plan, then execute it */
151:     switch (ndim) {
152:     case 1:
153: #if defined(PETSC_USE_COMPLEX)
154:       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
155: #else
156:       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
157: #endif
158:       break;
159:     case 2:
160: #if defined(PETSC_USE_COMPLEX)
161:       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
162: #else
163:       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
164: #endif
165:       break;
166:     case 3:
167: #if defined(PETSC_USE_COMPLEX)
168:       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
169: #else
170:       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
171: #endif
172:       break;
173:     default:
174: #if defined(PETSC_USE_COMPLEX)
175:   #if defined(PETSC_USE_64BIT_INDICES)
176:       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
177:   #else
178:       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
179:   #endif
180: #else
181:       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
182: #endif
183:       break;
184:     }
185:     fftw->binarray  = (PetscScalar *)x_array;
186:     fftw->boutarray = y_array;
187:     fftw_execute(fftw->p_backward);
188:   } else {                                                         /* use existing plan */
189:     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
190: #if defined(PETSC_USE_COMPLEX)
191:       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
192: #else
193:       fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
194: #endif
195:     } else {
196:       fftw_execute(fftw->p_backward);
197:     }
198:   }
199:   PetscCall(VecRestoreArray(y, &y_array));
200:   PetscCall(VecRestoreArrayRead(x, &x_array));
201:   PetscFunctionReturn(PETSC_SUCCESS);
202: }

204: #if !PetscDefined(HAVE_MPIUNI)
205: PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
206: {
207:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
208:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
209:   const PetscScalar *x_array;
210:   PetscScalar       *y_array;
211:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
212:   MPI_Comm           comm;

214:   PetscFunctionBegin;
215:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
216:   PetscCall(VecGetArrayRead(x, &x_array));
217:   PetscCall(VecGetArray(y, &y_array));
218:   if (!fftw->p_forward) { /* create a plan, then execute it */
219:     switch (ndim) {
220:     case 1:
221:   #if defined(PETSC_USE_COMPLEX)
222:       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
223:   #else
224:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
225:   #endif
226:       break;
227:     case 2:
228:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
229:       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
230:   #else
231:       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
232:   #endif
233:       break;
234:     case 3:
235:   #if defined(PETSC_USE_COMPLEX)
236:       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
237:   #else
238:       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
239:   #endif
240:       break;
241:     default:
242:   #if defined(PETSC_USE_COMPLEX)
243:       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
244:   #else
245:       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
246:   #endif
247:       break;
248:     }
249:     fftw->finarray  = (PetscScalar *)x_array;
250:     fftw->foutarray = y_array;
251:     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
252:                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
253:     fftw_execute(fftw->p_forward);
254:   } else {                                                         /* use existing plan */
255:     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
256:       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
257:     } else {
258:       fftw_execute(fftw->p_forward);
259:     }
260:   }
261:   PetscCall(VecRestoreArray(y, &y_array));
262:   PetscCall(VecRestoreArrayRead(x, &x_array));
263:   PetscFunctionReturn(PETSC_SUCCESS);
264: }

266: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
267: {
268:   Mat_FFT           *fft  = (Mat_FFT *)A->data;
269:   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
270:   const PetscScalar *x_array;
271:   PetscScalar       *y_array;
272:   PetscInt           ndim = fft->ndim, *dim = fft->dim;
273:   MPI_Comm           comm;

275:   PetscFunctionBegin;
276:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
277:   PetscCall(VecGetArrayRead(x, &x_array));
278:   PetscCall(VecGetArray(y, &y_array));
279:   if (!fftw->p_backward) { /* create a plan, then execute it */
280:     switch (ndim) {
281:     case 1:
282:   #if defined(PETSC_USE_COMPLEX)
283:       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
284:   #else
285:       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
286:   #endif
287:       break;
288:     case 2:
289:   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
290:       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
291:   #else
292:       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
293:   #endif
294:       break;
295:     case 3:
296:   #if defined(PETSC_USE_COMPLEX)
297:       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
298:   #else
299:       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
300:   #endif
301:       break;
302:     default:
303:   #if defined(PETSC_USE_COMPLEX)
304:       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
305:   #else
306:       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
307:   #endif
308:       break;
309:     }
310:     fftw->binarray  = (PetscScalar *)x_array;
311:     fftw->boutarray = y_array;
312:     fftw_execute(fftw->p_backward);
313:   } else {                                                         /* use existing plan */
314:     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
315:       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
316:     } else {
317:       fftw_execute(fftw->p_backward);
318:     }
319:   }
320:   PetscCall(VecRestoreArray(y, &y_array));
321:   PetscCall(VecRestoreArrayRead(x, &x_array));
322:   PetscFunctionReturn(PETSC_SUCCESS);
323: }
324: #endif

326: PetscErrorCode MatDestroy_FFTW(Mat A)
327: {
328:   Mat_FFT  *fft  = (Mat_FFT *)A->data;
329:   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;

331:   PetscFunctionBegin;
332:   fftw_destroy_plan(fftw->p_forward);
333:   fftw_destroy_plan(fftw->p_backward);
334:   if (fftw->iodims) free(fftw->iodims);
335:   PetscCall(PetscFree(fftw->dim_fftw));
336:   PetscCall(PetscFree(fft->data));
337:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
338:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
339:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
340: #if !PetscDefined(HAVE_MPIUNI)
341:   fftw_mpi_cleanup();
342: #endif
343:   PetscFunctionReturn(PETSC_SUCCESS);
344: }

346: #if !PetscDefined(HAVE_MPIUNI)
347: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
348: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
349: {
350:   PetscScalar *array;

352:   PetscFunctionBegin;
353:   PetscCall(VecGetArray(v, &array));
354:   fftw_free((fftw_complex *)array);
355:   PetscCall(VecRestoreArray(v, &array));
356:   PetscCall(VecDestroy_MPI(v));
357:   PetscFunctionReturn(PETSC_SUCCESS);
358: }
359: #endif

361: #if !PetscDefined(HAVE_MPIUNI)
362: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
363: {
364:   Mat A;

366:   PetscFunctionBegin;
367:   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
368:   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
369:   PetscFunctionReturn(PETSC_SUCCESS);
370: }

372: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
373: {
374:   Mat A;

376:   PetscFunctionBegin;
377:   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
378:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
379:   PetscFunctionReturn(PETSC_SUCCESS);
380: }

382: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
383: {
384:   Mat A;

386:   PetscFunctionBegin;
387:   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
388:   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
389:   PetscFunctionReturn(PETSC_SUCCESS);
390: }
391: #endif

393: /*@
394:   MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
395:   parallel layout determined by `MATFFTW`

397:   Collective

399:   Input Parameter:
400: . A - the matrix

402:   Output Parameters:
403: + x - (optional) input vector of forward FFTW
404: . y - (optional) output vector of forward FFTW
405: - z - (optional) output vector of backward FFTW

407:   Options Database Key:
408: . -mat_fftw_plannerflags - set FFTW planner flags

410:   Level: advanced

412:   Notes:
413:   The parallel layout of output of forward FFTW is always same as the input
414:   of the backward FFTW. But parallel layout of the input vector of forward
415:   FFTW might not be same as the output of backward FFTW.

417:   We need to provide enough space while doing parallel real transform.
418:   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
419:   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
420:   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
421:   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
422:   zeros if it is odd only one zero is needed.

424:   Lastly one needs some scratch space at the end of data set in each process. alloc_local
425:   figures out how much space is needed, i.e. it figures out the data+scratch space for
426:   each processor and returns that.

428:   Developer Notes:
429:   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?

431: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
432: @*/
433: PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
434: {
435:   PetscFunctionBegin;
436:   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
437:   PetscFunctionReturn(PETSC_SUCCESS);
438: }

440: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
441: {
442:   PetscMPIInt size, rank;
443:   MPI_Comm    comm;
444:   Mat_FFT    *fft = (Mat_FFT *)A->data;

446:   PetscFunctionBegin;
449:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));

451:   PetscCallMPI(MPI_Comm_size(comm, &size));
452:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
453:   if (size == 1) { /* sequential case */
454: #if defined(PETSC_USE_COMPLEX)
455:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
456:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
457:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
458: #else
459:     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
460:     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
461:     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
462: #endif
463: #if !PetscDefined(HAVE_MPIUNI)
464:   } else { /* parallel cases */
465:     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
466:     PetscInt      ndim = fft->ndim, *dim = fft->dim;
467:     ptrdiff_t     alloc_local, local_n0, local_0_start;
468:     ptrdiff_t     local_n1;
469:     fftw_complex *data_fout;
470:     ptrdiff_t     local_1_start;
471:   #if defined(PETSC_USE_COMPLEX)
472:     fftw_complex *data_fin, *data_bout;
473:   #else
474:     double   *data_finr, *data_boutr;
475:     PetscInt  n1, N1;
476:     ptrdiff_t temp;
477:   #endif

479:     switch (ndim) {
480:     case 1:
481:   #if !defined(PETSC_USE_COMPLEX)
482:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
483:   #else
484:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
485:       if (fin) {
486:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
487:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
488:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
489:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
490:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
491:       }
492:       if (fout) {
493:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
494:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
495:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
496:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
497:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
498:       }
499:       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
500:       if (bout) {
501:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
502:         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
503:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
504:         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
505:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
506:       }
507:       break;
508:   #endif
509:     case 2:
510:   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
511:       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
512:       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
513:       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
514:       if (fin) {
515:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
516:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
517:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
518:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
519:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
520:       }
521:       if (fout) {
522:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
523:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
524:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
525:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
526:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
527:       }
528:       if (bout) {
529:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
530:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
531:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
532:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
533:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
534:       }
535:   #else
536:       /* Get local size */
537:       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
538:       if (fin) {
539:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
540:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
541:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
542:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
543:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
544:       }
545:       if (fout) {
546:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
547:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
548:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
549:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
550:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
551:       }
552:       if (bout) {
553:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
554:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
555:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
556:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
557:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
558:       }
559:   #endif
560:       break;
561:     case 3:
562:   #if !defined(PETSC_USE_COMPLEX)
563:       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
564:       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
565:       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
566:       if (fin) {
567:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
568:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
569:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
570:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
571:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
572:       }
573:       if (fout) {
574:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
575:         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
576:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
577:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
578:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
579:       }
580:       if (bout) {
581:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
582:         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
583:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
584:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
585:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
586:       }
587:   #else
588:       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
589:       if (fin) {
590:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
591:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
592:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
593:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
594:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
595:       }
596:       if (fout) {
597:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
598:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
599:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
600:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
601:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
602:       }
603:       if (bout) {
604:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
605:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
606:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
607:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
608:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
609:       }
610:   #endif
611:       break;
612:     default:
613:   #if !defined(PETSC_USE_COMPLEX)
614:       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
615:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
616:       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
617:       N1                                    = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
618:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

620:       if (fin) {
621:         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
622:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
623:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
624:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
625:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
626:       }
627:       if (fout) {
628:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
629:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
630:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
631:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
632:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
633:       }
634:       if (bout) {
635:         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
636:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
637:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
638:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
639:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
640:       }
641:   #else
642:       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
643:       if (fin) {
644:         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
645:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
646:         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
647:         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
648:         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
649:       }
650:       if (fout) {
651:         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
652:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
653:         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
654:         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
655:         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
656:       }
657:       if (bout) {
658:         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
659:         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
660:         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
661:         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
662:         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
663:       }
664:   #endif
665:       break;
666:     }
667:     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
668:        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
669:        memory leaks. We void these pointers here to avoid accident uses.
670:     */
671:     if (fin) (*fin)->ops->replacearray = NULL;
672:     if (fout) (*fout)->ops->replacearray = NULL;
673:     if (bout) (*bout)->ops->replacearray = NULL;
674: #endif
675:   }
676:   PetscFunctionReturn(PETSC_SUCCESS);
677: }

679: /*@
680:   VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.

682:   Collective

684:   Input Parameters:
685: + A - FFTW matrix
686: - x - the PETSc vector

688:   Output Parameter:
689: . y - the FFTW vector

691:   Level: intermediate

693:   Note:
694:   For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
695:   one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
696:   zeros. This routine does that job by scattering operation.

698: .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
699: @*/
700: PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
701: {
702:   PetscFunctionBegin;
703:   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
704:   PetscFunctionReturn(PETSC_SUCCESS);
705: }

707: static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
708: {
709:   MPI_Comm    comm;
710:   Mat_FFT    *fft = (Mat_FFT *)A->data;
711:   PetscInt    low;
712:   PetscMPIInt rank, size;
713:   PetscInt    vsize, vsize1;
714:   VecScatter  vecscat;
715:   IS          list1;

717:   PetscFunctionBegin;
718:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
719:   PetscCallMPI(MPI_Comm_size(comm, &size));
720:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
721:   PetscCall(VecGetOwnershipRange(y, &low, NULL));

723:   if (size == 1) {
724:     PetscCall(VecGetSize(x, &vsize));
725:     PetscCall(VecGetSize(y, &vsize1));
726:     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
727:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
728:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
729:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
730:     PetscCall(VecScatterDestroy(&vecscat));
731:     PetscCall(ISDestroy(&list1));
732: #if !PetscDefined(HAVE_MPIUNI)
733:   } else {
734:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
735:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
736:     ptrdiff_t local_n0, local_0_start;
737:     ptrdiff_t local_n1, local_1_start;
738:     IS        list2;
739:   #if !defined(PETSC_USE_COMPLEX)
740:     PetscInt  i, j, k, partial_dim;
741:     PetscInt *indx1, *indx2, tempindx, tempindx1;
742:     PetscInt  NM;
743:     ptrdiff_t temp;
744:   #endif

746:     switch (ndim) {
747:     case 1:
748:   #if defined(PETSC_USE_COMPLEX)
749:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

751:       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
752:       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
753:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
754:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
755:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
756:       PetscCall(VecScatterDestroy(&vecscat));
757:       PetscCall(ISDestroy(&list1));
758:       PetscCall(ISDestroy(&list2));
759:   #else
760:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
761:   #endif
762:       break;
763:     case 2:
764:   #if defined(PETSC_USE_COMPLEX)
765:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

767:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
768:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
769:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
770:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
771:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
772:       PetscCall(VecScatterDestroy(&vecscat));
773:       PetscCall(ISDestroy(&list1));
774:       PetscCall(ISDestroy(&list2));
775:   #else
776:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

778:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
779:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));

781:       if (dim[1] % 2 == 0) {
782:         NM = dim[1] + 2;
783:       } else {
784:         NM = dim[1] + 1;
785:       }
786:       for (i = 0; i < local_n0; i++) {
787:         for (j = 0; j < dim[1]; j++) {
788:           tempindx  = i * dim[1] + j;
789:           tempindx1 = i * NM + j;

791:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
792:           indx2[tempindx] = low + tempindx1;
793:         }
794:       }

796:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
797:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

799:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
800:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
801:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
802:       PetscCall(VecScatterDestroy(&vecscat));
803:       PetscCall(ISDestroy(&list1));
804:       PetscCall(ISDestroy(&list2));
805:       PetscCall(PetscFree(indx1));
806:       PetscCall(PetscFree(indx2));
807:   #endif
808:       break;

810:     case 3:
811:   #if defined(PETSC_USE_COMPLEX)
812:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

814:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
815:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
816:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
817:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
818:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
819:       PetscCall(VecScatterDestroy(&vecscat));
820:       PetscCall(ISDestroy(&list1));
821:       PetscCall(ISDestroy(&list2));
822:   #else
823:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
824:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
825:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

827:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
828:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));

830:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
831:       else NM = dim[2] + 1;

833:       for (i = 0; i < local_n0; i++) {
834:         for (j = 0; j < dim[1]; j++) {
835:           for (k = 0; k < dim[2]; k++) {
836:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
837:             tempindx1 = i * dim[1] * NM + j * NM + k;

839:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
840:             indx2[tempindx] = low + tempindx1;
841:           }
842:         }
843:       }

845:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
846:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
847:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
848:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
849:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
850:       PetscCall(VecScatterDestroy(&vecscat));
851:       PetscCall(ISDestroy(&list1));
852:       PetscCall(ISDestroy(&list2));
853:       PetscCall(PetscFree(indx1));
854:       PetscCall(PetscFree(indx2));
855:   #endif
856:       break;

858:     default:
859:   #if defined(PETSC_USE_COMPLEX)
860:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

862:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
863:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
864:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
865:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
866:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
867:       PetscCall(VecScatterDestroy(&vecscat));
868:       PetscCall(ISDestroy(&list1));
869:       PetscCall(ISDestroy(&list2));
870:   #else
871:       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
872:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
873:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

875:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

877:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

879:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

881:       partial_dim = fftw->partial_dim;

883:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
884:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));

886:       if (dim[ndim - 1] % 2 == 0) NM = 2;
887:       else NM = 1;

889:       j = low;
890:       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
891:         indx1[i] = local_0_start * partial_dim + i;
892:         indx2[i] = j;
893:         if (k % dim[ndim - 1] == 0) j += NM;
894:         j++;
895:       }
896:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
897:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
898:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
899:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
900:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
901:       PetscCall(VecScatterDestroy(&vecscat));
902:       PetscCall(ISDestroy(&list1));
903:       PetscCall(ISDestroy(&list2));
904:       PetscCall(PetscFree(indx1));
905:       PetscCall(PetscFree(indx2));
906:   #endif
907:       break;
908:     }
909: #endif
910:   }
911:   PetscFunctionReturn(PETSC_SUCCESS);
912: }

914: /*@
915:   VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.

917:   Collective

919:   Input Parameters:
920: + A - `MATFFTW` matrix
921: - x - FFTW vector

923:   Output Parameter:
924: . y - PETSc vector

926:   Level: intermediate

928:   Note:
929:   While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
930:   `VecScatterFFTWToPetsc()` removes those extra zeros.

932: .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
933: @*/
934: PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
935: {
936:   PetscFunctionBegin;
937:   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
938:   PetscFunctionReturn(PETSC_SUCCESS);
939: }

941: static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
942: {
943:   MPI_Comm    comm;
944:   Mat_FFT    *fft = (Mat_FFT *)A->data;
945:   PetscInt    low;
946:   PetscMPIInt rank, size;
947:   VecScatter  vecscat;
948:   IS          list1;

950:   PetscFunctionBegin;
951:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
952:   PetscCallMPI(MPI_Comm_size(comm, &size));
953:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
954:   PetscCall(VecGetOwnershipRange(x, &low, NULL));

956:   if (size == 1) {
957:     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
958:     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
959:     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
960:     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
961:     PetscCall(VecScatterDestroy(&vecscat));
962:     PetscCall(ISDestroy(&list1));

964: #if !PetscDefined(HAVE_MPIUNI)
965:   } else {
966:     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
967:     PetscInt  ndim = fft->ndim, *dim = fft->dim;
968:     ptrdiff_t local_n0, local_0_start;
969:     ptrdiff_t local_n1, local_1_start;
970:     IS        list2;
971:   #if !defined(PETSC_USE_COMPLEX)
972:     PetscInt  i, j, k, partial_dim;
973:     PetscInt *indx1, *indx2, tempindx, tempindx1;
974:     PetscInt  NM;
975:     ptrdiff_t temp;
976:   #endif
977:     switch (ndim) {
978:     case 1:
979:   #if defined(PETSC_USE_COMPLEX)
980:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);

982:       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
983:       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
984:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
985:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
986:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
987:       PetscCall(VecScatterDestroy(&vecscat));
988:       PetscCall(ISDestroy(&list1));
989:       PetscCall(ISDestroy(&list2));
990:   #else
991:       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
992:   #endif
993:       break;
994:     case 2:
995:   #if defined(PETSC_USE_COMPLEX)
996:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);

998:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
999:       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1000:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1001:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1002:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1003:       PetscCall(VecScatterDestroy(&vecscat));
1004:       PetscCall(ISDestroy(&list1));
1005:       PetscCall(ISDestroy(&list2));
1006:   #else
1007:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1009:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
1010:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));

1012:       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1013:       else NM = dim[1] + 1;

1015:       for (i = 0; i < local_n0; i++) {
1016:         for (j = 0; j < dim[1]; j++) {
1017:           tempindx  = i * dim[1] + j;
1018:           tempindx1 = i * NM + j;

1020:           indx1[tempindx] = local_0_start * dim[1] + tempindx;
1021:           indx2[tempindx] = low + tempindx1;
1022:         }
1023:       }

1025:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1026:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));

1028:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1029:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1030:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1031:       PetscCall(VecScatterDestroy(&vecscat));
1032:       PetscCall(ISDestroy(&list1));
1033:       PetscCall(ISDestroy(&list2));
1034:       PetscCall(PetscFree(indx1));
1035:       PetscCall(PetscFree(indx2));
1036:   #endif
1037:       break;
1038:     case 3:
1039:   #if defined(PETSC_USE_COMPLEX)
1040:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1042:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1043:       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1044:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1045:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1046:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1047:       PetscCall(VecScatterDestroy(&vecscat));
1048:       PetscCall(ISDestroy(&list1));
1049:       PetscCall(ISDestroy(&list2));
1050:   #else
1051:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1053:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
1054:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));

1056:       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1057:       else NM = dim[2] + 1;

1059:       for (i = 0; i < local_n0; i++) {
1060:         for (j = 0; j < dim[1]; j++) {
1061:           for (k = 0; k < dim[2]; k++) {
1062:             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
1063:             tempindx1 = i * dim[1] * NM + j * NM + k;

1065:             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1066:             indx2[tempindx] = low + tempindx1;
1067:           }
1068:         }
1069:       }

1071:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1072:       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));

1074:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1075:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1076:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1077:       PetscCall(VecScatterDestroy(&vecscat));
1078:       PetscCall(ISDestroy(&list1));
1079:       PetscCall(ISDestroy(&list2));
1080:       PetscCall(PetscFree(indx1));
1081:       PetscCall(PetscFree(indx2));
1082:   #endif
1083:       break;
1084:     default:
1085:   #if defined(PETSC_USE_COMPLEX)
1086:       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);

1088:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1089:       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1090:       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1091:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1092:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1093:       PetscCall(VecScatterDestroy(&vecscat));
1094:       PetscCall(ISDestroy(&list1));
1095:       PetscCall(ISDestroy(&list2));
1096:   #else
1097:       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];

1099:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;

1101:       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1103:       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;

1105:       partial_dim = fftw->partial_dim;

1107:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
1108:       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));

1110:       if (dim[ndim - 1] % 2 == 0) NM = 2;
1111:       else NM = 1;

1113:       j = low;
1114:       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1115:         indx1[i] = local_0_start * partial_dim + i;
1116:         indx2[i] = j;
1117:         if (k % dim[ndim - 1] == 0) j += NM;
1118:         j++;
1119:       }
1120:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1121:       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));

1123:       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1124:       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1125:       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1126:       PetscCall(VecScatterDestroy(&vecscat));
1127:       PetscCall(ISDestroy(&list1));
1128:       PetscCall(ISDestroy(&list2));
1129:       PetscCall(PetscFree(indx1));
1130:       PetscCall(PetscFree(indx2));
1131:   #endif
1132:       break;
1133:     }
1134: #endif
1135:   }
1136:   PetscFunctionReturn(PETSC_SUCCESS);
1137: }

1139: /*MC
1140:   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.

1142:   Level: intermediate

1144: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1145: M*/
1146: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1147: {
1148:   MPI_Comm    comm;
1149:   Mat_FFT    *fft = (Mat_FFT *)A->data;
1150:   Mat_FFTW   *fftw;
1151:   PetscInt    ndim = fft->ndim, *dim = fft->dim;
1152:   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1153:   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1154:   PetscBool   flg;
1155:   PetscInt    p_flag, partial_dim = 1, ctr;
1156:   PetscMPIInt size, rank;
1157:   ptrdiff_t  *pdim;
1158: #if !defined(PETSC_USE_COMPLEX)
1159:   PetscInt tot_dim;
1160: #endif

1162:   PetscFunctionBegin;
1163:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1164:   PetscCallMPI(MPI_Comm_size(comm, &size));
1165:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

1167: #if !PetscDefined(HAVE_MPIUNI)
1168:   fftw_mpi_init();
1169: #endif
1170:   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1171:   pdim[0] = dim[0];
1172: #if !defined(PETSC_USE_COMPLEX)
1173:   tot_dim = 2 * dim[0];
1174: #endif
1175:   for (ctr = 1; ctr < ndim; ctr++) {
1176:     partial_dim *= dim[ctr];
1177:     pdim[ctr] = dim[ctr];
1178: #if !defined(PETSC_USE_COMPLEX)
1179:     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1180:     else tot_dim *= dim[ctr];
1181: #endif
1182:   }

1184:   if (size == 1) {
1185: #if defined(PETSC_USE_COMPLEX)
1186:     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1187:     fft->n = fft->N;
1188: #else
1189:     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1190:     fft->n = tot_dim;
1191: #endif
1192: #if !PetscDefined(HAVE_MPIUNI)
1193:   } else {
1194:     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1195:   #if !defined(PETSC_USE_COMPLEX)
1196:     ptrdiff_t temp;
1197:     PetscInt  N1;
1198:   #endif

1200:     switch (ndim) {
1201:     case 1:
1202:   #if !defined(PETSC_USE_COMPLEX)
1203:       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1204:   #else
1205:       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1206:       fft->n = (PetscInt)local_n0;
1207:       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1208:   #endif
1209:       break;
1210:     case 2:
1211:   #if defined(PETSC_USE_COMPLEX)
1212:       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1213:       fft->n = (PetscInt)local_n0 * dim[1];
1214:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1215:   #else
1216:       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1218:       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1219:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1220:   #endif
1221:       break;
1222:     case 3:
1223:   #if defined(PETSC_USE_COMPLEX)
1224:       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);

1226:       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1227:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1228:   #else
1229:       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1231:       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1232:       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1233:   #endif
1234:       break;
1235:     default:
1236:   #if defined(PETSC_USE_COMPLEX)
1237:       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);

1239:       fft->n = (PetscInt)local_n0 * partial_dim;
1240:       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1241:   #else
1242:       temp = pdim[ndim - 1];

1244:       pdim[ndim - 1] = temp / 2 + 1;

1246:       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);

1248:       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1249:       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);

1251:       pdim[ndim - 1] = temp;

1253:       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1254:   #endif
1255:       break;
1256:     }
1257: #endif
1258:   }
1259:   free(pdim);
1260:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1261:   PetscCall(PetscNew(&fftw));
1262:   fft->data = (void *)fftw;

1264:   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1265:   fftw->partial_dim = partial_dim;

1267:   PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
1268:   if (size == 1) {
1269: #if defined(PETSC_USE_64BIT_INDICES)
1270:     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1271: #else
1272:     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1273: #endif
1274:   }

1276:   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];

1278:   fftw->p_forward  = NULL;
1279:   fftw->p_backward = NULL;
1280:   fftw->p_flag     = FFTW_ESTIMATE;

1282:   if (size == 1) {
1283:     A->ops->mult          = MatMult_SeqFFTW;
1284:     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1285: #if !PetscDefined(HAVE_MPIUNI)
1286:   } else {
1287:     A->ops->mult          = MatMult_MPIFFTW;
1288:     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1289: #endif
1290:   }
1291:   fft->matdestroy = MatDestroy_FFTW;
1292:   A->assembled    = PETSC_TRUE;
1293:   A->preallocated = PETSC_TRUE;

1295:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1296:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1297:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));

1299:   /* get runtime options */
1300:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1301:   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1302:   if (flg) fftw->p_flag = iplans[p_flag];
1303:   PetscOptionsEnd();
1304:   PetscFunctionReturn(PETSC_SUCCESS);
1305: }