Actual source code: hyperbolic.c

petsc-master 2018-01-18
Report Typos and Errors
  1: #include <petsctao.h>

  3: /*T
  4:    Concepts: TAO^Solving a system of nonlinear equations, nonlinear least squares
  5:    Routines: TaoCreate();
  6:    Routines: TaoSetType();
  7:    Routines: TaoSetInitialVector();
  8:    Routines: TaoSetObjectiveRoutine();
  9:    Routines: TaoSetGradientRoutine();
 10:    Routines: TaoSetConstraintsRoutine();
 11:    Routines: TaoSetJacobianStateRoutine();
 12:    Routines: TaoSetJacobianDesignRoutine();
 13:    Routines: TaoSetStateDesignIS();
 14:    Routines: TaoSetFromOptions();
 15:    Routines: TaoSetHistory(); TaoGetHistory();
 16:    Routines: TaoSolve();
 17:    Routines: TaoDestroy();
 18:    Processors: 1
 19: T*/



 23: typedef struct {
 24:   PetscInt n; /*  Number of variables */
 25:   PetscInt m; /*  Number of constraints */
 26:   PetscInt mx; /*  grid points in each direction */
 27:   PetscInt nt; /*  Number of time steps */
 28:   PetscInt ndata; /*  Number of data points per sample */
 29:   IS       s_is;
 30:   IS       d_is;
 31:   VecScatter state_scatter;
 32:   VecScatter design_scatter;
 33:   VecScatter *uxi_scatter,*uyi_scatter,*ux_scatter,*uy_scatter,*ui_scatter;
 34:   VecScatter *yi_scatter;

 36:   Mat       Js,Jd,JsBlockPrec,JsInv,JsBlock;
 37:   PetscBool jformed,c_formed;

 39:   PetscReal alpha; /*  Regularization parameter */
 40:   PetscReal gamma;
 41:   PetscReal ht; /*  Time step */
 42:   PetscReal T; /*  Final time */
 43:   Mat Q,QT;
 44:   Mat L,LT;
 45:   Mat Div,Divwork,Divxy[2];
 46:   Mat Grad,Gradxy[2];
 47:   Mat M;
 48:   Mat *C,*Cwork;
 49:   /* Mat Hs,Hd,Hsd; */
 50:   Vec q;
 51:   Vec ur; /*  reference */

 53:   Vec d;
 54:   Vec dwork;

 56:   Vec y; /*  state variables */
 57:   Vec ywork;
 58:   Vec ytrue;
 59:   Vec *yi,*yiwork,*ziwork;
 60:   Vec *uxi,*uyi,*uxiwork,*uyiwork,*ui,*uiwork;

 62:   Vec u; /*  design variables */
 63:   Vec uwork,vwork;
 64:   Vec utrue;

 66:   Vec js_diag;

 68:   Vec c; /*  constraint vector */
 69:   Vec cwork;

 71:   Vec lwork;

 73:   KSP      solver;
 74:   PC       prec;
 75:   PetscInt block_index;

 77:   PetscInt ksp_its;
 78:   PetscInt ksp_its_initial;
 79: } AppCtx;


 82: PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
 83: PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
 84: PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
 85: PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
 86: PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
 87: PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
 88: PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
 89: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 90: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 91: PetscErrorCode HyperbolicInitialize(AppCtx *user);
 92: PetscErrorCode HyperbolicDestroy(AppCtx *user);
 93: PetscErrorCode HyperbolicMonitor(Tao, void*);

 95: PetscErrorCode StateMatMult(Mat,Vec,Vec);
 96: PetscErrorCode StateMatBlockMult(Mat,Vec,Vec);
 97: PetscErrorCode StateMatBlockMultTranspose(Mat,Vec,Vec);
 98: PetscErrorCode StateMatMultTranspose(Mat,Vec,Vec);
 99: PetscErrorCode StateMatGetDiagonal(Mat,Vec);
100: PetscErrorCode StateMatDuplicate(Mat,MatDuplicateOption,Mat*);
101: PetscErrorCode StateMatInvMult(Mat,Vec,Vec);
102: PetscErrorCode StateMatInvTransposeMult(Mat,Vec,Vec);
103: PetscErrorCode StateMatBlockPrecMult(PC,Vec,Vec);

105: PetscErrorCode DesignMatMult(Mat,Vec,Vec);
106: PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);

108: PetscErrorCode Scatter_yi(Vec,Vec*,VecScatter*,PetscInt); /*  y to y1,y2,...,y_nt */
109: PetscErrorCode Gather_yi(Vec,Vec*,VecScatter*,PetscInt);
110: PetscErrorCode Scatter_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt); /*  u to ux_1,uy_1,ux_2,uy_2,...,u */
111: PetscErrorCode Gather_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt);

113: static  char help[]="";

115: int main(int argc, char **argv)
116: {
117:   PetscErrorCode     ierr;
118:   Vec                x,x0;
119:   Tao                tao;
120:   AppCtx             user;
121:   IS                 is_allstate,is_alldesign;
122:   PetscInt           lo,hi,hi2,lo2,ksp_old;
123:   PetscInt           ntests = 1;
124:   PetscInt           i;
125: #if defined(PETSC_USE_LOG)
126:   PetscLogStage      stages[1];
127: #endif

129:   PetscInitialize(&argc, &argv, (char*)0,help);if (ierr) return ierr;
130:   user.mx = 32;
131:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,NULL,NULL);
132:   PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL);
133:   user.nt = 16;
134:   PetscOptionsInt("-nt","Number of time steps","",user.nt,&user.nt,NULL);
135:   user.ndata = 64;
136:   PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL);
137:   user.alpha = 10.0;
138:   PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL);
139:   user.T = 1.0/32.0;
140:   PetscOptionsReal("-Tfinal","Final time","",user.T,&user.T,NULL);

142:   user.m = user.mx*user.mx*user.nt; /*  number of constraints */
143:   user.n = user.mx*user.mx*3*user.nt; /*  number of variables */
144:   user.ht = user.T/user.nt; /*  Time step */
145:   user.gamma = user.T*user.ht / (user.mx*user.mx);

147:   VecCreate(PETSC_COMM_WORLD,&user.u);
148:   VecCreate(PETSC_COMM_WORLD,&user.y);
149:   VecCreate(PETSC_COMM_WORLD,&user.c);
150:   VecSetSizes(user.u,PETSC_DECIDE,user.n-user.m);
151:   VecSetSizes(user.y,PETSC_DECIDE,user.m);
152:   VecSetSizes(user.c,PETSC_DECIDE,user.m);
153:   VecSetFromOptions(user.u);
154:   VecSetFromOptions(user.y);
155:   VecSetFromOptions(user.c);

157:   /* Create scatters for reduced spaces.
158:      If the state vector y and design vector u are partitioned as
159:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
160:      then the solution vector x is organized as
161:      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
162:      The index sets user.s_is and user.d_is correspond to the indices of the
163:      state and design variables owned by the current processor.
164:   */
165:   VecCreate(PETSC_COMM_WORLD,&x);

167:   VecGetOwnershipRange(user.y,&lo,&hi);
168:   VecGetOwnershipRange(user.u,&lo2,&hi2);

170:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate);
171:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user.s_is);

173:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign);
174:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user.d_is);

176:   VecSetSizes(x,hi-lo+hi2-lo2,user.n);
177:   VecSetFromOptions(x);

179:   VecScatterCreate(x,user.s_is,user.y,is_allstate,&user.state_scatter);
180:   VecScatterCreate(x,user.d_is,user.u,is_alldesign,&user.design_scatter);
181:   ISDestroy(&is_alldesign);
182:   ISDestroy(&is_allstate);

184:   /* Create TAO solver and set desired solution method */
185:   TaoCreate(PETSC_COMM_WORLD,&tao);
186:   TaoSetType(tao,TAOLCL);

188:   /* Set up initial vectors and matrices */
189:   HyperbolicInitialize(&user);

191:   Gather(x,user.y,user.state_scatter,user.u,user.design_scatter);
192:   VecDuplicate(x,&x0);
193:   VecCopy(x,x0);

195:   /* Set solution vector with an initial guess */
196:   TaoSetInitialVector(tao,x);
197:   TaoSetObjectiveRoutine(tao, FormFunction, (void *)&user);
198:   TaoSetGradientRoutine(tao, FormGradient, (void *)&user);
199:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, (void *)&user);
200:   TaoSetJacobianStateRoutine(tao, user.Js, user.Js, user.JsInv, FormJacobianState, (void *)&user);
201:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, (void *)&user);
202:   TaoSetFromOptions(tao);
203:   TaoSetStateDesignIS(tao,user.s_is,user.d_is);

205:   /* SOLVE THE APPLICATION */
206:   PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL);
207:   PetscOptionsEnd();
208:   PetscLogStageRegister("Trials",&stages[0]);
209:   PetscLogStagePush(stages[0]);
210:   user.ksp_its_initial = user.ksp_its;
211:   ksp_old = user.ksp_its;
212:   for (i=0; i<ntests; i++){
213:     TaoSolve(tao);
214:     PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its-ksp_old);
215:     VecCopy(x0,x);
216:     TaoSetInitialVector(tao,x);
217:   }
218:   PetscLogStagePop();
219:   PetscBarrier((PetscObject)x);
220:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");
221:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);
222:   PetscPrintf(PETSC_COMM_WORLD,"Total KSP iterations over %D trial(s): ",ntests);
223:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its);
224:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations per trial: ");
225:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",(user.ksp_its-user.ksp_its_initial)/ntests);

227:   TaoDestroy(&tao);
228:   VecDestroy(&x);
229:   VecDestroy(&x0);
230:   HyperbolicDestroy(&user);
231:   PetscFinalize();
232:   return ierr;
233: }
234: /* ------------------------------------------------------------------- */
235: /*
236:    dwork = Qy - d
237:    lwork = L*(u-ur).^2
238:    f = 1/2 * (dwork.dork + alpha*y.lwork)
239: */
240: PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
241: {
243:   PetscReal      d1=0,d2=0;
244:   AppCtx         *user = (AppCtx*)ptr;

247:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
248:   MatMult(user->Q,user->y,user->dwork);
249:   VecAXPY(user->dwork,-1.0,user->d);
250:   VecDot(user->dwork,user->dwork,&d1);

252:   VecWAXPY(user->uwork,-1.0,user->ur,user->u);
253:   VecPointwiseMult(user->uwork,user->uwork,user->uwork);
254:   MatMult(user->L,user->uwork,user->lwork);
255:   VecDot(user->y,user->lwork,&d2);
256:   *f = 0.5 * (d1 + user->alpha*d2);
257:   return(0);
258: }

260: /* ------------------------------------------------------------------- */
261: /*
262:     state: g_s = Q' *(Qy - d) + 0.5*alpha*L*(u-ur).^2
263:     design: g_d = alpha*(L'y).*(u-ur)
264: */
265: PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
266: {
268:   AppCtx         *user = (AppCtx*)ptr;

271:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
272:   MatMult(user->Q,user->y,user->dwork);
273:   VecAXPY(user->dwork,-1.0,user->d);

275:   MatMult(user->QT,user->dwork,user->ywork);

277:   MatMult(user->LT,user->y,user->uwork);
278:   VecWAXPY(user->vwork,-1.0,user->ur,user->u);
279:   VecPointwiseMult(user->uwork,user->vwork,user->uwork);
280:   VecScale(user->uwork,user->alpha);

282:   VecPointwiseMult(user->vwork,user->vwork,user->vwork);
283:   MatMult(user->L,user->vwork,user->lwork);
284:   VecAXPY(user->ywork,0.5*user->alpha,user->lwork);

286:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
287:   return(0);
288: }

290: PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
291: {
293:   PetscReal      d1,d2;
294:   AppCtx         *user = (AppCtx*)ptr;

297:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
298:   MatMult(user->Q,user->y,user->dwork);
299:   VecAXPY(user->dwork,-1.0,user->d);

301:   MatMult(user->QT,user->dwork,user->ywork);

303:   VecDot(user->dwork,user->dwork,&d1);

305:   MatMult(user->LT,user->y,user->uwork);
306:   VecWAXPY(user->vwork,-1.0,user->ur,user->u);
307:   VecPointwiseMult(user->uwork,user->vwork,user->uwork);
308:   VecScale(user->uwork,user->alpha);

310:   VecPointwiseMult(user->vwork,user->vwork,user->vwork);
311:   MatMult(user->L,user->vwork,user->lwork);
312:   VecAXPY(user->ywork,0.5*user->alpha,user->lwork);

314:   VecDot(user->y,user->lwork,&d2);

316:   *f = 0.5 * (d1 + user->alpha*d2);
317:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
318:   return(0);
319: }

321: /* ------------------------------------------------------------------- */
322: /* A
323: MatShell object
324: */
325: PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
326: {
328:   PetscInt       i;
329:   AppCtx         *user = (AppCtx*)ptr;

332:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
333:   Scatter_yi(user->u,user->ui,user->ui_scatter,user->nt);
334:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);
335:   for (i=0; i<user->nt; i++){
336:     MatCopy(user->Divxy[0],user->C[i],SAME_NONZERO_PATTERN);
337:     MatCopy(user->Divxy[1],user->Cwork[i],SAME_NONZERO_PATTERN);

339:     MatDiagonalScale(user->C[i],NULL,user->uxi[i]);
340:     MatDiagonalScale(user->Cwork[i],NULL,user->uyi[i]);
341:     MatAXPY(user->C[i],1.0,user->Cwork[i],SUBSET_NONZERO_PATTERN);
342:     MatScale(user->C[i],user->ht);
343:     MatShift(user->C[i],1.0);
344:   }
345:   return(0);
346: }

348: /* ------------------------------------------------------------------- */
349: /* B */
350: PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
351: {
353:   AppCtx         *user = (AppCtx*)ptr;

356:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
357:   return(0);
358: }

360: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
361: {
363:   PetscInt       i;
364:   AppCtx         *user;

367:   MatShellGetContext(J_shell,(void**)&user);
368:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt);
369:   user->block_index = 0;
370:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]);

372:   for (i=1; i<user->nt; i++){
373:     user->block_index = i;
374:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]);
375:     MatMult(user->M,user->yi[i-1],user->ziwork[i-1]);
376:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]);
377:   }
378:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);
379:   return(0);
380: }

382: PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y)
383: {
385:   PetscInt       i;
386:   AppCtx         *user;

389:   MatShellGetContext(J_shell,(void**)&user);
390:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt);

392:   for (i=0; i<user->nt-1; i++){
393:     user->block_index = i;
394:     MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]);
395:     MatMult(user->M,user->yi[i+1],user->ziwork[i+1]);
396:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i+1]);
397:   }

399:   i = user->nt-1;
400:   user->block_index = i;
401:   MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]);
402:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);
403:   return(0);
404: }

406: PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y)
407: {
409:   PetscInt       i;
410:   AppCtx         *user;

413:   MatShellGetContext(J_shell,(void**)&user);
414:   i = user->block_index;
415:   VecPointwiseMult(user->uxiwork[i],X,user->uxi[i]);
416:   VecPointwiseMult(user->uyiwork[i],X,user->uyi[i]);
417:   Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
418:   MatMult(user->Div,user->uiwork[i],Y);
419:   VecAYPX(Y,user->ht,X);
420:   return(0);
421: }

423: PetscErrorCode StateMatBlockMultTranspose(Mat J_shell, Vec X, Vec Y)
424: {
426:   PetscInt       i;
427:   AppCtx         *user;

430:   MatShellGetContext(J_shell,(void**)&user);
431:   i = user->block_index;
432:   MatMult(user->Grad,X,user->uiwork[i]);
433:   Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
434:   VecPointwiseMult(user->uxiwork[i],user->uxi[i],user->uxiwork[i]);
435:   VecPointwiseMult(user->uyiwork[i],user->uyi[i],user->uyiwork[i]);
436:   VecWAXPY(Y,1.0,user->uxiwork[i],user->uyiwork[i]);
437:   VecAYPX(Y,user->ht,X);
438:   return(0);
439: }

441: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
442: {
444:   PetscInt       i;
445:   AppCtx         *user;

448:   MatShellGetContext(J_shell,(void**)&user);
449:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt);
450:   Scatter_uxi_uyi(X,user->uxiwork,user->uxi_scatter,user->uyiwork,user->uyi_scatter,user->nt);
451:   for (i=0; i<user->nt; i++){
452:     VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]);
453:     VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]);
454:     Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
455:     MatMult(user->Div,user->uiwork[i],user->ziwork[i]);
456:     VecScale(user->ziwork[i],user->ht);
457:   }
458:   Gather_yi(Y,user->ziwork,user->yi_scatter,user->nt);
459:   return(0);
460: }

462: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
463: {
465:   PetscInt       i;
466:   AppCtx         *user;

469:   MatShellGetContext(J_shell,(void**)&user);
470:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt);
471:   Scatter_yi(X,user->yiwork,user->yi_scatter,user->nt);
472:   for (i=0; i<user->nt; i++){
473:     MatMult(user->Grad,user->yiwork[i],user->uiwork[i]);
474:     Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
475:     VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]);
476:     VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]);
477:     Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]);
478:     VecScale(user->uiwork[i],user->ht);
479:   }
480:   Gather_yi(Y,user->uiwork,user->ui_scatter,user->nt);
481:   return(0);
482: }

484: PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y)
485: {
487:   PetscInt       i;
488:   AppCtx         *user;

491:   PCShellGetContext(PC_shell,(void**)&user);
492:   i = user->block_index;
493:   if (user->c_formed) {
494:     MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y);
495:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not formed");
496:   return(0);
497: }

499: PetscErrorCode StateMatBlockPrecMultTranspose(PC PC_shell, Vec X, Vec Y)
500: {
502:   PetscInt       i;
503:   AppCtx         *user;

506:   PCShellGetContext(PC_shell,(void**)&user);

508:   i = user->block_index;
509:   if (user->c_formed) {
510:     MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y);
511:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not formed");
512:   return(0);
513: }

515: PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
516: {
518:   AppCtx         *user;
519:   PetscInt       its,i;

522:   MatShellGetContext(J_shell,(void**)&user);

524:   if (Y == user->ytrue) {
525:     /* First solve is done using true solution to set up problem */
526:     KSPSetTolerances(user->solver,1e-4,1e-20,PETSC_DEFAULT,PETSC_DEFAULT);
527:   } else {
528:     KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
529:   }
530:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt);
531:   Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt);
532:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

534:   user->block_index = 0;
535:   KSPSolve(user->solver,user->yi[0],user->yiwork[0]);

537:   KSPGetIterationNumber(user->solver,&its);
538:   user->ksp_its = user->ksp_its + its;
539:   for (i=1; i<user->nt; i++){
540:     MatMult(user->M,user->yiwork[i-1],user->ziwork[i-1]);
541:     VecAXPY(user->yi[i],1.0,user->ziwork[i-1]);
542:     user->block_index = i;
543:     KSPSolve(user->solver,user->yi[i],user->yiwork[i]);

545:     KSPGetIterationNumber(user->solver,&its);
546:     user->ksp_its = user->ksp_its + its;
547:   }
548:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);
549:   return(0);
550: }

552: PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
553: {
555:   AppCtx         *user;
556:   PetscInt       its,i;

559:   MatShellGetContext(J_shell,(void**)&user);

561:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt);
562:   Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt);
563:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

565:   i = user->nt - 1;
566:   user->block_index = i;
567:   KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]);

569:   KSPGetIterationNumber(user->solver,&its);
570:   user->ksp_its = user->ksp_its + its;

572:   for (i=user->nt-2; i>=0; i--){
573:     MatMult(user->M,user->yiwork[i+1],user->ziwork[i+1]);
574:     VecAXPY(user->yi[i],1.0,user->ziwork[i+1]);
575:     user->block_index = i;
576:     KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]);

578:     KSPGetIterationNumber(user->solver,&its);
579:     user->ksp_its = user->ksp_its + its;
580:   }
581:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt);
582:   return(0);
583: }

585: PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
586: {
588:   AppCtx         *user;

591:   MatShellGetContext(J_shell,(void**)&user);

593:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,new_shell);
594:   MatShellSetOperation(*new_shell,MATOP_MULT,(void(*)(void))StateMatMult);
595:   MatShellSetOperation(*new_shell,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate);
596:   MatShellSetOperation(*new_shell,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose);
597:   MatShellSetOperation(*new_shell,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal);
598:   return(0);
599: }

601: PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
602: {
604:   AppCtx         *user;

607:   MatShellGetContext(J_shell,(void**)&user);
608:    VecCopy(user->js_diag,X);
609:   return(0);
610: }

612: PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
613: {
614:   /* con = Ay - q, A = [C(u1)  0     0     ...   0;
615:                          -M  C(u2)   0     ...   0;
616:                           0   -M   C(u3)   ...   0;
617:                                       ...         ;
618:                           0    ...      -M C(u_nt)]
619:      C(u) = eye + ht*Div*[diag(u1); diag(u2)]       */
621:   PetscInt       i;
622:   AppCtx         *user = (AppCtx*)ptr;

625:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
626:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt);
627:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

629:   user->block_index = 0;
630:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]);

632:   for (i=1; i<user->nt; i++){
633:     user->block_index = i;
634:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]);
635:     MatMult(user->M,user->yi[i-1],user->ziwork[i-1]);
636:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]);
637:   }

639:   Gather_yi(C,user->yiwork,user->yi_scatter,user->nt);
640:   VecAXPY(C,-1.0,user->q);

642:   return(0);
643: }


646: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
647: {

651:   VecScatterBegin(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD);
652:   VecScatterEnd(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD);
653:   VecScatterBegin(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD);
654:   VecScatterEnd(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD);
655:   return(0);
656: }

658: PetscErrorCode Scatter_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
659: {
661:   PetscInt       i;

664:   for (i=0; i<nt; i++){
665:     VecScatterBegin(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD);
666:     VecScatterEnd(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD);
667:     VecScatterBegin(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD);
668:     VecScatterEnd(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD);
669:   }
670:   return(0);
671: }

673: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
674: {

678:   VecScatterBegin(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE);
679:   VecScatterEnd(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE);
680:   VecScatterBegin(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE);
681:   VecScatterEnd(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE);
682:   return(0);
683: }

685: PetscErrorCode Gather_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
686: {
688:   PetscInt       i;

691:   for (i=0; i<nt; i++){
692:     VecScatterBegin(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE);
693:     VecScatterEnd(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE);
694:     VecScatterBegin(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE);
695:     VecScatterEnd(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE);
696:   }
697:   return(0);
698: }

700: PetscErrorCode Scatter_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
701: {
703:   PetscInt       i;

706:   for (i=0; i<nt; i++){
707:     VecScatterBegin(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD);
708:     VecScatterEnd(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD);
709:   }
710:   return(0);
711: }

713: PetscErrorCode Gather_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
714: {
716:   PetscInt       i;

719:   for (i=0; i<nt; i++){
720:     VecScatterBegin(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE);
721:     VecScatterEnd(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE);
722:   }
723:   return(0);
724: }

726: PetscErrorCode HyperbolicInitialize(AppCtx *user)
727: {
729:   PetscInt       n,i,j,linear_index,istart,iend,iblock,lo,hi;
730:   Vec            XX,YY,XXwork,YYwork,yi,uxi,ui,bc;
731:   PetscReal      h,sum;
732:   PetscScalar    hinv,neg_hinv,quarter=0.25,one=1.0,half_hinv,neg_half_hinv;
733:   PetscScalar    vx,vy,zero=0.0;
734:   IS             is_from_y,is_to_yi,is_from_u,is_to_uxi,is_to_uyi;

737:   user->jformed = PETSC_FALSE;
738:   user->c_formed = PETSC_FALSE;

740:   user->ksp_its = 0;
741:   user->ksp_its_initial = 0;

743:   n = user->mx * user->mx;

745:   h = 1.0/user->mx;
746:   hinv = user->mx;
747:   neg_hinv = -hinv;
748:   half_hinv = hinv / 2.0;
749:   neg_half_hinv = neg_hinv / 2.0;

751:   /* Generate Grad matrix */
752:   MatCreate(PETSC_COMM_WORLD,&user->Grad);
753:   MatSetSizes(user->Grad,PETSC_DECIDE,PETSC_DECIDE,2*n,n);
754:   MatSetFromOptions(user->Grad);
755:   MatMPIAIJSetPreallocation(user->Grad,3,NULL,3,NULL);
756:   MatSeqAIJSetPreallocation(user->Grad,3,NULL);
757:   MatGetOwnershipRange(user->Grad,&istart,&iend);

759:   for (i=istart; i<iend; i++){
760:     if (i<n){
761:       iblock = i / user->mx;
762:       j = iblock*user->mx + ((i+user->mx-1) % user->mx);
763:       MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES);
764:       j = iblock*user->mx + ((i+1) % user->mx);
765:       MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);
766:     }
767:     if (i>=n){
768:       j = (i - user->mx) % n;
769:       MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES);
770:       j = (j + 2*user->mx) % n;
771:       MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);
772:     }
773:   }

775:   MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY);
776:   MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY);

778:   MatCreate(PETSC_COMM_WORLD,&user->Gradxy[0]);
779:   MatSetSizes(user->Gradxy[0],PETSC_DECIDE,PETSC_DECIDE,n,n);
780:   MatSetFromOptions(user->Gradxy[0]);
781:   MatMPIAIJSetPreallocation(user->Gradxy[0],3,NULL,3,NULL);
782:   MatSeqAIJSetPreallocation(user->Gradxy[0],3,NULL);
783:   MatGetOwnershipRange(user->Gradxy[0],&istart,&iend);

785:   for (i=istart; i<iend; i++){
786:     iblock = i / user->mx;
787:     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
788:     MatSetValues(user->Gradxy[0],1,&i,1,&j,&half_hinv,INSERT_VALUES);
789:     j = iblock*user->mx + ((i+1) % user->mx);
790:     MatSetValues(user->Gradxy[0],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);
791:     MatSetValues(user->Gradxy[0],1,&i,1,&i,&zero,INSERT_VALUES);
792:   }
793:   MatAssemblyBegin(user->Gradxy[0],MAT_FINAL_ASSEMBLY);
794:   MatAssemblyEnd(user->Gradxy[0],MAT_FINAL_ASSEMBLY);

796:   MatCreate(PETSC_COMM_WORLD,&user->Gradxy[1]);
797:   MatSetSizes(user->Gradxy[1],PETSC_DECIDE,PETSC_DECIDE,n,n);
798:   MatSetFromOptions(user->Gradxy[1]);
799:   MatMPIAIJSetPreallocation(user->Gradxy[1],3,NULL,3,NULL);
800:   MatSeqAIJSetPreallocation(user->Gradxy[1],3,NULL);
801:   MatGetOwnershipRange(user->Gradxy[1],&istart,&iend);

803:   for (i=istart; i<iend; i++){
804:     j = (i + n - user->mx) % n;
805:     MatSetValues(user->Gradxy[1],1,&i,1,&j,&half_hinv,INSERT_VALUES);
806:     j = (j + 2*user->mx) % n;
807:     MatSetValues(user->Gradxy[1],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES);
808:     MatSetValues(user->Gradxy[1],1,&i,1,&i,&zero,INSERT_VALUES);
809:   }
810:   MatAssemblyBegin(user->Gradxy[1],MAT_FINAL_ASSEMBLY);
811:   MatAssemblyEnd(user->Gradxy[1],MAT_FINAL_ASSEMBLY);

813:   /* Generate Div matrix */
814:   MatTranspose(user->Grad,MAT_INITIAL_MATRIX,&user->Div);
815:   MatTranspose(user->Gradxy[0],MAT_INITIAL_MATRIX,&user->Divxy[0]);
816:   MatTranspose(user->Gradxy[1],MAT_INITIAL_MATRIX,&user->Divxy[1]);

818:   /* Off-diagonal averaging matrix */
819:   MatCreate(PETSC_COMM_WORLD,&user->M);
820:   MatSetSizes(user->M,PETSC_DECIDE,PETSC_DECIDE,n,n);
821:   MatSetFromOptions(user->M);
822:   MatMPIAIJSetPreallocation(user->M,4,NULL,4,NULL);
823:   MatSeqAIJSetPreallocation(user->M,4,NULL);
824:   MatGetOwnershipRange(user->M,&istart,&iend);

826:   for (i=istart; i<iend; i++){
827:     /* kron(Id,Av) */
828:     iblock = i / user->mx;
829:     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
830:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);
831:     j = iblock*user->mx + ((i+1) % user->mx);
832:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);

834:     /* kron(Av,Id) */
835:     j = (i + user->mx) % n;
836:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);
837:     j = (i + n - user->mx) % n;
838:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES);
839:   }
840:   MatAssemblyBegin(user->M,MAT_FINAL_ASSEMBLY);
841:   MatAssemblyEnd(user->M,MAT_FINAL_ASSEMBLY);

843:   /* Generate 2D grid */
844:   VecCreate(PETSC_COMM_WORLD,&XX);
845:   VecCreate(PETSC_COMM_WORLD,&user->q);
846:   VecSetSizes(XX,PETSC_DECIDE,n);
847:   VecSetSizes(user->q,PETSC_DECIDE,n*user->nt);
848:   VecSetFromOptions(XX);
849:   VecSetFromOptions(user->q);

851:   VecDuplicate(XX,&YY);
852:   VecDuplicate(XX,&XXwork);
853:   VecDuplicate(XX,&YYwork);
854:   VecDuplicate(XX,&user->d);
855:   VecDuplicate(XX,&user->dwork);

857:   VecGetOwnershipRange(XX,&istart,&iend);
858:   for (linear_index=istart; linear_index<iend; linear_index++){
859:     i = linear_index % user->mx;
860:     j = (linear_index-i)/user->mx;
861:     vx = h*(i+0.5);
862:     vy = h*(j+0.5);
863:     VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES);
864:     VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES);
865:   }

867:   VecAssemblyBegin(XX);
868:   VecAssemblyEnd(XX);
869:   VecAssemblyBegin(YY);
870:   VecAssemblyEnd(YY);

872:   /* Compute final density function yT
873:      yT = 1.0 + exp(-30*((x-0.25)^2+(y-0.25)^2)) + exp(-30*((x-0.75)^2+(y-0.75)^2))
874:      yT = yT / (h^2*sum(yT)) */
875:   VecCopy(XX,XXwork);
876:   VecCopy(YY,YYwork);

878:   VecShift(XXwork,-0.25);
879:   VecShift(YYwork,-0.25);

881:   VecPointwiseMult(XXwork,XXwork,XXwork);
882:   VecPointwiseMult(YYwork,YYwork,YYwork);

884:   VecCopy(XXwork,user->dwork);
885:   VecAXPY(user->dwork,1.0,YYwork);
886:   VecScale(user->dwork,-30.0);
887:   VecExp(user->dwork);
888:   VecCopy(user->dwork,user->d);

890:   VecCopy(XX,XXwork);
891:   VecCopy(YY,YYwork);

893:   VecShift(XXwork,-0.75);
894:   VecShift(YYwork,-0.75);

896:   VecPointwiseMult(XXwork,XXwork,XXwork);
897:   VecPointwiseMult(YYwork,YYwork,YYwork);

899:   VecCopy(XXwork,user->dwork);
900:   VecAXPY(user->dwork,1.0,YYwork);
901:   VecScale(user->dwork,-30.0);
902:   VecExp(user->dwork);

904:   VecAXPY(user->d,1.0,user->dwork);
905:   VecShift(user->d,1.0);
906:   VecSum(user->d,&sum);
907:   VecScale(user->d,1.0/(h*h*sum));

909:   /* Initial conditions of forward problem */
910:   VecDuplicate(XX,&bc);
911:   VecCopy(XX,XXwork);
912:   VecCopy(YY,YYwork);

914:   VecShift(XXwork,-0.5);
915:   VecShift(YYwork,-0.5);

917:   VecPointwiseMult(XXwork,XXwork,XXwork);
918:   VecPointwiseMult(YYwork,YYwork,YYwork);

920:   VecWAXPY(bc,1.0,XXwork,YYwork);
921:   VecScale(bc,-50.0);
922:   VecExp(bc);
923:   VecShift(bc,1.0);
924:   VecSum(bc,&sum);
925:   VecScale(bc,1.0/(h*h*sum));

927:   /* Create scatter from y to y_1,y_2,...,y_nt */
928:   /*  TODO: Reorder for better parallelism. (This will require reordering Q and L as well.) */
929:   PetscMalloc1(user->nt*user->mx*user->mx,&user->yi_scatter);
930:   VecCreate(PETSC_COMM_WORLD,&yi);
931:   VecSetSizes(yi,PETSC_DECIDE,user->mx*user->mx);
932:   VecSetFromOptions(yi);
933:   VecDuplicateVecs(yi,user->nt,&user->yi);
934:   VecDuplicateVecs(yi,user->nt,&user->yiwork);
935:   VecDuplicateVecs(yi,user->nt,&user->ziwork);
936:   for (i=0; i<user->nt; i++){
937:     VecGetOwnershipRange(user->yi[i],&lo,&hi);
938:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_yi);
939:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+i*user->mx*user->mx,1,&is_from_y);
940:     VecScatterCreate(user->y,is_from_y,user->yi[i],is_to_yi,&user->yi_scatter[i]);
941:     ISDestroy(&is_to_yi);
942:     ISDestroy(&is_from_y);
943:   }

945:   /* Create scatter from u to ux_1,uy_1,ux_2,uy_2,...,ux_nt,uy_nt */
946:   /*  TODO: reorder for better parallelism */
947:   PetscMalloc1(user->nt*user->mx*user->mx,&user->uxi_scatter);
948:   PetscMalloc1(user->nt*user->mx*user->mx,&user->uyi_scatter);
949:   PetscMalloc1(user->nt*user->mx*user->mx,&user->ux_scatter);
950:   PetscMalloc1(user->nt*user->mx*user->mx,&user->uy_scatter);
951:   PetscMalloc1(2*user->nt*user->mx*user->mx,&user->ui_scatter);
952:   VecCreate(PETSC_COMM_WORLD,&uxi);
953:   VecCreate(PETSC_COMM_WORLD,&ui);
954:   VecSetSizes(uxi,PETSC_DECIDE,user->mx*user->mx);
955:   VecSetSizes(ui,PETSC_DECIDE,2*user->mx*user->mx);
956:   VecSetFromOptions(uxi);
957:   VecSetFromOptions(ui);
958:   VecDuplicateVecs(uxi,user->nt,&user->uxi);
959:   VecDuplicateVecs(uxi,user->nt,&user->uyi);
960:   VecDuplicateVecs(uxi,user->nt,&user->uxiwork);
961:   VecDuplicateVecs(uxi,user->nt,&user->uyiwork);
962:   VecDuplicateVecs(ui,user->nt,&user->ui);
963:   VecDuplicateVecs(ui,user->nt,&user->uiwork);
964:   for (i=0; i<user->nt; i++){
965:     VecGetOwnershipRange(user->uxi[i],&lo,&hi);
966:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi);
967:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u);
968:     VecScatterCreate(user->u,is_from_u,user->uxi[i],is_to_uxi,&user->uxi_scatter[i]);

970:     ISDestroy(&is_to_uxi);
971:     ISDestroy(&is_from_u);

973:     VecGetOwnershipRange(user->uyi[i],&lo,&hi);
974:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi);
975:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+(2*i+1)*user->mx*user->mx,1,&is_from_u);
976:     VecScatterCreate(user->u,is_from_u,user->uyi[i],is_to_uyi,&user->uyi_scatter[i]);

978:     ISDestroy(&is_to_uyi);
979:     ISDestroy(&is_from_u);

981:     VecGetOwnershipRange(user->uxi[i],&lo,&hi);
982:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi);
983:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_from_u);
984:     VecScatterCreate(user->ui[i],is_from_u,user->uxi[i],is_to_uxi,&user->ux_scatter[i]);

986:     ISDestroy(&is_to_uxi);
987:     ISDestroy(&is_from_u);

989:     VecGetOwnershipRange(user->uyi[i],&lo,&hi);
990:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi);
991:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+user->mx*user->mx,1,&is_from_u);
992:     VecScatterCreate(user->ui[i],is_from_u,user->uyi[i],is_to_uyi,&user->uy_scatter[i]);

994:     ISDestroy(&is_to_uyi);
995:     ISDestroy(&is_from_u);

997:     VecGetOwnershipRange(user->ui[i],&lo,&hi);
998:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi);
999:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u);
1000:     VecScatterCreate(user->u,is_from_u,user->ui[i],is_to_uxi,&user->ui_scatter[i]);

1002:     ISDestroy(&is_to_uxi);
1003:     ISDestroy(&is_from_u);
1004:   }

1006:   /* RHS of forward problem */
1007:   MatMult(user->M,bc,user->yiwork[0]);
1008:   for (i=1; i<user->nt; i++){
1009:     VecSet(user->yiwork[i],0.0);
1010:   }
1011:   Gather_yi(user->q,user->yiwork,user->yi_scatter,user->nt);

1013:   /* Compute true velocity field utrue */
1014:   VecDuplicate(user->u,&user->utrue);
1015:   for (i=0; i<user->nt; i++){
1016:     VecCopy(YY,user->uxi[i]);
1017:     VecScale(user->uxi[i],150.0*i*user->ht);
1018:     VecCopy(XX,user->uyi[i]);
1019:     VecShift(user->uyi[i],-10.0);
1020:     VecScale(user->uyi[i],15.0*i*user->ht);
1021:   }
1022:   Gather_uxi_uyi(user->utrue,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

1024:   /* Initial guess and reference model */
1025:   VecDuplicate(user->utrue,&user->ur);
1026:   for (i=0; i<user->nt; i++){
1027:     VecCopy(XX,user->uxi[i]);
1028:     VecShift(user->uxi[i],i*user->ht);
1029:     VecCopy(YY,user->uyi[i]);
1030:     VecShift(user->uyi[i],-i*user->ht);
1031:   }
1032:   Gather_uxi_uyi(user->ur,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

1034:   /* Generate regularization matrix L */
1035:   MatCreate(PETSC_COMM_WORLD,&user->LT);
1036:   MatSetSizes(user->LT,PETSC_DECIDE,PETSC_DECIDE,2*n*user->nt,n*user->nt);
1037:   MatSetFromOptions(user->LT);
1038:   MatMPIAIJSetPreallocation(user->LT,1,NULL,1,NULL);
1039:   MatSeqAIJSetPreallocation(user->LT,1,NULL);
1040:   MatGetOwnershipRange(user->LT,&istart,&iend);

1042:   for (i=istart; i<iend; i++){
1043:     iblock = (i+n) / (2*n);
1044:     j = i - iblock*n;
1045:     MatSetValues(user->LT,1,&i,1,&j,&user->gamma,INSERT_VALUES);
1046:   }

1048:   MatAssemblyBegin(user->LT,MAT_FINAL_ASSEMBLY);
1049:   MatAssemblyEnd(user->LT,MAT_FINAL_ASSEMBLY);

1051:   MatTranspose(user->LT,MAT_INITIAL_MATRIX,&user->L);

1053:   /* Build work vectors and matrices */
1054:   VecCreate(PETSC_COMM_WORLD,&user->lwork);
1055:   VecSetType(user->lwork,VECMPI);
1056:   VecSetSizes(user->lwork,PETSC_DECIDE,user->m);
1057:   VecSetFromOptions(user->lwork);

1059:   MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork);

1061:   VecDuplicate(user->y,&user->ywork);
1062:   VecDuplicate(user->u,&user->uwork);
1063:   VecDuplicate(user->u,&user->vwork);
1064:   VecDuplicate(user->u,&user->js_diag);
1065:   VecDuplicate(user->c,&user->cwork);

1067:   /* Create matrix-free shell user->Js for computing A*x */
1068:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->Js);
1069:   MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult);
1070:   MatShellSetOperation(user->Js,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate);
1071:   MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose);
1072:   MatShellSetOperation(user->Js,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal);

1074:   /* Diagonal blocks of user->Js */
1075:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlock);
1076:   MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateMatBlockMult);
1077:   MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockMultTranspose);

1079:   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
1080:      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
1081:      This is an SOR preconditioner for user->JsBlock. */
1082:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlockPrec);
1083:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT,(void(*)(void))StateMatBlockPrecMult);
1084:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockPrecMultTranspose);

1086:   /* Create a matrix-free shell user->Jd for computing B*x */
1087:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->n-user->m,user,&user->Jd);
1088:   MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult);
1089:   MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose);

1091:   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
1092:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsInv);
1093:   MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateMatInvMult);
1094:   MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatInvTransposeMult);

1096:   /* Build matrices for SOR preconditioner */
1097:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);
1098:   MatShift(user->Divxy[0],0.0); /*  Force C[i] and Divxy[0] to share same nonzero pattern */
1099:   MatAXPY(user->Divxy[0],0.0,user->Divxy[1],DIFFERENT_NONZERO_PATTERN);
1100:   PetscMalloc1(5*n,&user->C);
1101:   PetscMalloc1(2*n,&user->Cwork);
1102:   for (i=0; i<user->nt; i++){
1103:     MatDuplicate(user->Divxy[0],MAT_COPY_VALUES,&user->C[i]);
1104:     MatDuplicate(user->Divxy[1],MAT_COPY_VALUES,&user->Cwork[i]);

1106:     MatDiagonalScale(user->C[i],NULL,user->uxi[i]);
1107:     MatDiagonalScale(user->Cwork[i],NULL,user->uyi[i]);
1108:     MatAXPY(user->C[i],1.0,user->Cwork[i],SUBSET_NONZERO_PATTERN);
1109:     MatScale(user->C[i],user->ht);
1110:     MatShift(user->C[i],1.0);
1111:   }

1113:   /* Solver options and tolerances */
1114:   KSPCreate(PETSC_COMM_WORLD,&user->solver);
1115:   KSPSetType(user->solver,KSPGMRES);
1116:   KSPSetOperators(user->solver,user->JsBlock,user->JsBlockPrec);
1117:   KSPSetInitialGuessNonzero(user->solver,PETSC_FALSE); /*  TODO: why is true slower? */
1118:   KSPSetTolerances(user->solver,1e-4,1e-20,1e3,500);
1119:   /* KSPSetTolerances(user->solver,1e-8,1e-16,1e3,500); */
1120:   KSPGetPC(user->solver,&user->prec);
1121:   PCSetType(user->prec,PCSHELL);

1123:   PCShellSetApply(user->prec,StateMatBlockPrecMult);
1124:   PCShellSetApplyTranspose(user->prec,StateMatBlockPrecMultTranspose);
1125:   PCShellSetContext(user->prec,user);

1127:   /* Compute true state function yt given ut */
1128:   VecCreate(PETSC_COMM_WORLD,&user->ytrue);
1129:   VecSetSizes(user->ytrue,PETSC_DECIDE,n*user->nt);
1130:   VecSetFromOptions(user->ytrue);
1131:   user->c_formed = PETSC_TRUE;
1132:   VecCopy(user->utrue,user->u); /*  Set u=utrue temporarily for StateMatInv */
1133:   VecSet(user->ytrue,0.0); /*  Initial guess */
1134:   StateMatInvMult(user->Js,user->q,user->ytrue);
1135:   VecCopy(user->ur,user->u); /*  Reset u=ur */

1137:   /* Initial guess y0 for state given u0 */
1138:   StateMatInvMult(user->Js,user->q,user->y);

1140:   /* Data discretization */
1141:   MatCreate(PETSC_COMM_WORLD,&user->Q);
1142:   MatSetSizes(user->Q,PETSC_DECIDE,PETSC_DECIDE,user->mx*user->mx,user->m);
1143:   MatSetFromOptions(user->Q);
1144:   MatMPIAIJSetPreallocation(user->Q,0,NULL,1,NULL);
1145:   MatSeqAIJSetPreallocation(user->Q,1,NULL);

1147:   MatGetOwnershipRange(user->Q,&istart,&iend);

1149:   for (i=istart; i<iend; i++){
1150:     j = i + user->m - user->mx*user->mx;
1151:     MatSetValues(user->Q,1,&i,1,&j,&one,INSERT_VALUES);
1152:   }

1154:   MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY);
1155:   MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY);

1157:   MatTranspose(user->Q,MAT_INITIAL_MATRIX,&user->QT);

1159:   VecDestroy(&XX);
1160:   VecDestroy(&YY);
1161:   VecDestroy(&XXwork);
1162:   VecDestroy(&YYwork);
1163:   VecDestroy(&yi);
1164:   VecDestroy(&uxi);
1165:   VecDestroy(&ui);
1166:   VecDestroy(&bc);

1168:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1169:   KSPSetFromOptions(user->solver);
1170:   return(0);
1171: }

1173: PetscErrorCode HyperbolicDestroy(AppCtx *user)
1174: {
1176:   PetscInt       i;

1179:   MatDestroy(&user->Q);
1180:   MatDestroy(&user->QT);
1181:   MatDestroy(&user->Div);
1182:   MatDestroy(&user->Divwork);
1183:   MatDestroy(&user->Grad);
1184:   MatDestroy(&user->L);
1185:   MatDestroy(&user->LT);
1186:   KSPDestroy(&user->solver);
1187:   MatDestroy(&user->Js);
1188:   MatDestroy(&user->Jd);
1189:   MatDestroy(&user->JsBlockPrec);
1190:   MatDestroy(&user->JsInv);
1191:   MatDestroy(&user->JsBlock);
1192:   MatDestroy(&user->Divxy[0]);
1193:   MatDestroy(&user->Divxy[1]);
1194:   MatDestroy(&user->Gradxy[0]);
1195:   MatDestroy(&user->Gradxy[1]);
1196:   MatDestroy(&user->M);
1197:   for (i=0; i<user->nt; i++){
1198:     MatDestroy(&user->C[i]);
1199:     MatDestroy(&user->Cwork[i]);
1200:   }
1201:   PetscFree(user->C);
1202:   PetscFree(user->Cwork);
1203:   VecDestroy(&user->u);
1204:   VecDestroy(&user->uwork);
1205:   VecDestroy(&user->vwork);
1206:   VecDestroy(&user->utrue);
1207:   VecDestroy(&user->y);
1208:   VecDestroy(&user->ywork);
1209:   VecDestroy(&user->ytrue);
1210:   VecDestroyVecs(user->nt,&user->yi);
1211:   VecDestroyVecs(user->nt,&user->yiwork);
1212:   VecDestroyVecs(user->nt,&user->ziwork);
1213:   VecDestroyVecs(user->nt,&user->uxi);
1214:   VecDestroyVecs(user->nt,&user->uyi);
1215:   VecDestroyVecs(user->nt,&user->uxiwork);
1216:   VecDestroyVecs(user->nt,&user->uyiwork);
1217:   VecDestroyVecs(user->nt,&user->ui);
1218:   VecDestroyVecs(user->nt,&user->uiwork);
1219:   VecDestroy(&user->c);
1220:   VecDestroy(&user->cwork);
1221:   VecDestroy(&user->ur);
1222:   VecDestroy(&user->q);
1223:   VecDestroy(&user->d);
1224:   VecDestroy(&user->dwork);
1225:   VecDestroy(&user->lwork);
1226:   VecDestroy(&user->js_diag);
1227:   ISDestroy(&user->s_is);
1228:   ISDestroy(&user->d_is);
1229:   VecScatterDestroy(&user->state_scatter);
1230:   VecScatterDestroy(&user->design_scatter);
1231:   for (i=0; i<user->nt; i++){
1232:     VecScatterDestroy(&user->uxi_scatter[i]);
1233:     VecScatterDestroy(&user->uyi_scatter[i]);
1234:     VecScatterDestroy(&user->ux_scatter[i]);
1235:     VecScatterDestroy(&user->uy_scatter[i]);
1236:     VecScatterDestroy(&user->ui_scatter[i]);
1237:     VecScatterDestroy(&user->yi_scatter[i]);
1238:   }
1239:   PetscFree(user->uxi_scatter);
1240:   PetscFree(user->uyi_scatter);
1241:   PetscFree(user->ux_scatter);
1242:   PetscFree(user->uy_scatter);
1243:   PetscFree(user->ui_scatter);
1244:   PetscFree(user->yi_scatter);
1245:   return(0);
1246: }

1248: PetscErrorCode HyperbolicMonitor(Tao tao, void *ptr)
1249: {
1251:   Vec            X;
1252:   PetscReal      unorm,ynorm;
1253:   AppCtx         *user = (AppCtx*)ptr;

1256:   TaoGetSolutionVector(tao,&X);
1257:   Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
1258:   VecAXPY(user->ywork,-1.0,user->ytrue);
1259:   VecAXPY(user->uwork,-1.0,user->utrue);
1260:   VecNorm(user->uwork,NORM_2,&unorm);
1261:   VecNorm(user->ywork,NORM_2,&ynorm);
1262:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm);
1263:   return(0);
1264: }


1267: /*TEST

1269:    build:
1270:       requires: !complex

1272:    test:
1273:       args: -tao_cmonitor -tao_max_funcs 10 -tao_type lcl
1274:       requires: !single


1277: TEST*/