Actual source code: hyperbolic.c

tao-2.1-p0 2012-07-24
  1: #include "taosolver.h"
  2: #include "src/pde_constrained/impls/lcl/lcl.h"
  3: #include "petsctime.h"

  5: /*T
  6:    Concepts: TAO - Solving a system of nonlinear equations, nonlinear ;east squares
  7:    Routines: TaoInitialize(); TaoFinalize(); 
  8:    Routines: TaoCreate();
  9:    Routines: TaoSetType(); 
 10:    Routines: TaoSetInitialVector();
 11:    Routines: TaoSetObjectiveRoutine();
 12:    Routines: TaoSetGradientRoutine();
 13:    Routines: TaoSetConstraintsRoutine();
 14:    Routines: TaoSetJacobianStateRoutine();
 15:    Routines: TaoSetJacobianDesignRoutine();
 16:    Routines: TaoSetStateDesignIS();
 17:    Routines: TaoSetFromOptions();
 18:    Routines: TaoSetHistory(); TaoGetHistory();
 19:    Routines: TaoSolve();
 20:    Routines: TaoGetTerminationReason(); TaoDestroy(); 
 21:    Processors: 1
 22: T*/

 24: typedef struct {
 25:   PetscInt n; /*  Number of variables */
 26:   PetscInt m; /*  Number of constraints */
 27:   PetscInt mx; /*  grid points in each direction */
 28:   PetscInt nt; /*  Number of time steps */
 29:   PetscInt ndata; /*  Number of data points per sample */
 30:   IS s_is;
 31:   IS d_is;
 32:   VecScatter state_scatter;
 33:   VecScatter design_scatter;
 34:   VecScatter *uxi_scatter,*uyi_scatter,*ux_scatter,*uy_scatter,*ui_scatter;
 35:   VecScatter *yi_scatter;

 37:   Mat Js,Jd,JsBlockPrec,JsInv,JsBlock;
 38:   PetscBool jformed,c_formed;

 40:   PetscReal alpha; /*  Regularization parameter */
 41:   PetscReal gamma;
 42:   PetscReal ht; /*  Time step */
 43:   PetscReal T; /*  Final time */
 44:   Mat Q,QT;
 45:   Mat L,LT;
 46:   Mat Div,Divwork,Divxy[2];
 47:   Mat Grad,Gradxy[2];
 48:   Mat M;
 49:   Mat *C,*Cwork;
 50:   /* Mat Hs,Hd,Hsd; */
 51:   Vec q;
 52:   Vec ur; /*  reference */

 54:   Vec d;
 55:   Vec dwork;

 57:   Vec y; /*  state variables */
 58:   Vec ywork;
 59:   /* Vec ysave; */
 60:   Vec ytrue;
 61:   Vec *yi,*yiwork,*ziwork;
 62:   Vec *uxi,*uyi,*uxiwork,*uyiwork,*ui,*uiwork;

 64:   Vec u; /*  design variables */
 65:   Vec uwork,vwork;
 66:   /* Vec usave; */
 67:   Vec utrue;
 68:  
 69:   Vec js_diag;
 70:   
 71:   Vec c; /*  constraint vector */
 72:   Vec cwork;
 73:   
 74:   Vec lwork;

 76:   KSP solver;
 77:   PC prec;
 78:   PetscInt block_index;

 80:   TAO_LCL *lcl;
 81:   PetscInt ksp_its;
 82:   PetscInt ksp_its_initial;

 84: } AppCtx;


 87: PetscErrorCode FormFunction(TaoSolver, Vec, PetscReal*, void*);
 88: PetscErrorCode FormGradient(TaoSolver, Vec, Vec, void*);
 89: PetscErrorCode FormFunctionGradient(TaoSolver, Vec, PetscReal*, Vec, void*);
 90: PetscErrorCode FormJacobianState(TaoSolver, Vec, Mat*, Mat*, Mat*, MatStructure*,void*);
 91: PetscErrorCode FormJacobianDesign(TaoSolver, Vec, Mat*,void*);
 92: PetscErrorCode FormConstraints(TaoSolver, Vec, Vec, void*);
 93: PetscErrorCode FormHessian(TaoSolver, Vec, Mat*, Mat*, MatStructure*, void*);
 94: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 95: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 96: PetscErrorCode HyperbolicInitialize(AppCtx *user);
 97: PetscErrorCode HyperbolicDestroy(AppCtx *user);
 98: PetscErrorCode HyperbolicMonitor(TaoSolver, void*);

100: PetscErrorCode StateMatMult(Mat,Vec,Vec);
101: PetscErrorCode StateMatBlockMult(Mat,Vec,Vec);
102: PetscErrorCode StateMatBlockMultTranspose(Mat,Vec,Vec);
103: PetscErrorCode StateMatMultTranspose(Mat,Vec,Vec);
104: PetscErrorCode StateMatGetDiagonal(Mat,Vec);
105: PetscErrorCode StateMatDuplicate(Mat,MatDuplicateOption,Mat*);
106: PetscErrorCode StateMatInvMult(Mat,Vec,Vec);
107: PetscErrorCode StateMatInvTransposeMult(Mat,Vec,Vec);
108: PetscErrorCode StateMatBlockPrecMult(PC,Vec,Vec);

110: PetscErrorCode DesignMatMult(Mat,Vec,Vec);
111: PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);

113: PetscErrorCode Scatter_yi(Vec,Vec*,VecScatter*,PetscInt); /*  y to y1,y2,...,y_nt */
114: PetscErrorCode Gather_yi(Vec,Vec*,VecScatter*,PetscInt); 
115: PetscErrorCode Scatter_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt); /*  u to ux_1,uy_1,ux_2,uy_2,...,u */
116: PetscErrorCode Gather_uxi_uyi(Vec,Vec*,VecScatter*,Vec*,VecScatter*,PetscInt);

118: static  char help[]="";

122: int main(int argc, char **argv)
123: {
125:   Vec x,x0;
126:   
127:   TaoSolver tao;
128:   TaoSolverTerminationReason reason;
129:   AppCtx user;
130:   IS is_allstate,is_alldesign;
131:   PetscInt lo,hi,hi2,lo2,ksp_old;
132:   PetscBool flag,showtime;
133:   PetscInt ntests = 1;
134:   PetscLogDouble v1,v2;
135:   PetscInt i;
136:   int stages[1];
137:   

139:   PetscInitialize(&argc, &argv, (char*)0,help);
140:   TaoInitialize(&argc, &argv, (char*)0,help);

142:   showtime = PETSC_FALSE;
143:   PetscOptionsBool("-showtime","Display time elapsed","",showtime,&showtime,&flag); 
144:   user.mx = 32;
145:   PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,&flag); 
146:   user.nt = 16;
147:   PetscOptionsInt("-nt","Number of time steps","",user.nt,&user.nt,&flag); 
148:   user.ndata = 64;
149:   PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,&flag); 
150:   user.alpha = 10.0;
151:   PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,&flag); 
152:   user.T = 1.0/32.0;
153:   PetscOptionsReal("-Tfinal","Final time","",user.T,&user.T,&flag); 



157:   user.m = user.mx*user.mx*user.nt; /*  number of constraints */
158:   user.n = user.mx*user.mx*3*user.nt; /*  number of variables */
159:   user.ht = user.T/user.nt; /*  Time step */
160:   user.gamma = user.T*user.ht / (user.mx*user.mx);

162:   VecCreate(PETSC_COMM_WORLD,&user.u); 
163:   VecCreate(PETSC_COMM_WORLD,&user.y); 
164:   VecCreate(PETSC_COMM_WORLD,&user.c); 
165:   VecSetSizes(user.u,PETSC_DECIDE,user.n-user.m); 
166:   VecSetSizes(user.y,PETSC_DECIDE,user.m); 
167:   VecSetSizes(user.c,PETSC_DECIDE,user.m); 
168:   VecSetFromOptions(user.u); 
169:   VecSetFromOptions(user.y); 
170:   VecSetFromOptions(user.c); 

172:   /* Create scatters for reduced spaces.
173:      If the state vector y and design vector u are partitioned as 
174:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
175:      then the solution vector x is organized as
176:      [y_1; u_1; y_2; u_2; ...; y_np; u_np]. 
177:      The index sets user.s_is and user.d_is correspond to the indices of the
178:      state and design variables owned by the current processor.
179:   */
180:   VecCreate(PETSC_COMM_WORLD,&x); 

182:   VecGetOwnershipRange(user.y,&lo,&hi); 
183:   VecGetOwnershipRange(user.u,&lo2,&hi2);  

185:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate); 
186:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user.s_is); 

188:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign); 
189:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user.d_is); 

191:   VecSetSizes(x,hi-lo+hi2-lo2,user.n); 
192:   VecSetFromOptions(x); 

194:   VecScatterCreate(x,user.s_is,user.y,is_allstate,&user.state_scatter); 
195:   VecScatterCreate(x,user.d_is,user.u,is_alldesign,&user.design_scatter); 
196:   ISDestroy(&is_alldesign); 
197:   ISDestroy(&is_allstate); 



201:   /* Create TAO solver and set desired solution method */
202:   TaoCreate(PETSC_COMM_WORLD,&tao); 
203:   TaoSetType(tao,"tao_lcl"); 
204:   user.lcl = (TAO_LCL*)(tao->data);

206:   /* Set up initial vectors and matrices */
207:   HyperbolicInitialize(&user); 

209:   Gather(x,user.y,user.state_scatter,user.u,user.design_scatter); 
210:   VecDuplicate(x,&x0); 
211:   VecCopy(x,x0); 

213:   /* Set solution vector with an initial guess */
214:   TaoSetInitialVector(tao,x); 
215:   TaoSetObjectiveRoutine(tao, FormFunction, (void *)&user); 
216:   TaoSetGradientRoutine(tao, FormGradient, (void *)&user); 
217:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, (void *)&user); 

219:   TaoSetJacobianStateRoutine(tao, user.Js, user.Js, user.JsInv, FormJacobianState, (void *)&user);  

221:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, (void *)&user); 

223:   TaoSetFromOptions(tao); 
224:   TaoSetStateDesignIS(tao,user.s_is,user.d_is); 

226:   /* SOLVE THE APPLICATION */
227:   PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,&flag); 
228:   PetscLogStageRegister("Trials",&stages[0]); 
229:   PetscLogStagePush(stages[0]); 
230:   user.ksp_its_initial = user.ksp_its;
231:   ksp_old = user.ksp_its;
232:   for (i=0; i<ntests; i++){
233:     PetscGetTime(&v1); 
234:     TaoSolve(tao);  
235:     PetscGetTime(&v2); 
236:     if (showtime) {
237:       PetscPrintf(PETSC_COMM_WORLD,"Elapsed time = %G\n",v2-v1); 
238:     }
239:     PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its-ksp_old); 
240:     VecCopy(x0,x); 
241:     TaoSetInitialVector(tao,x); 
242:   }
243:   PetscLogStagePop(); 
244:   PetscBarrier((PetscObject)x); 
245:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");
246:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);
247:   PetscPrintf(PETSC_COMM_WORLD,"Total KSP iterations over %D trial(s): ",ntests);
248:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its);
249:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations per trial: ");
250:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",(user.ksp_its-user.ksp_its_initial)/ntests);

252:   TaoGetTerminationReason(tao,&reason); 

254:   if (reason < 0)
255:   {
256:     PetscPrintf(MPI_COMM_WORLD, "TAO failed to converge.\n");
257:   }
258:   else
259:   {
260:     PetscPrintf(MPI_COMM_WORLD, "Optimization terminated with status %D.\n", reason);
261:   }


264:   /* Free TAO data structures */
265:   TaoDestroy(&tao); 

267:   /* Free PETSc data structures */
268:   VecDestroy(&x); 
269:   VecDestroy(&x0); 
270:   HyperbolicDestroy(&user); 

272:   /* Finalize TAO, PETSc */
273:   TaoFinalize();
274:   PetscFinalize();

276:   return 0;
277: }
278: /* ------------------------------------------------------------------- */
281: /* 
282:    dwork = Qy - d  
283:    lwork = L*(u-ur).^2
284:    f = 1/2 * (dwork.dork + alpha*y.lwork)
285: */
286: PetscErrorCode FormFunction(TaoSolver tao,Vec X,PetscReal *f,void *ptr)
287: {
289:   PetscReal d1=0,d2=0;
290:   AppCtx *user = (AppCtx*)ptr;
292:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
293:   MatMult(user->Q,user->y,user->dwork); 
294:   VecAXPY(user->dwork,-1.0,user->d); 
295:   VecDot(user->dwork,user->dwork,&d1); 

297:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
298:   VecPointwiseMult(user->uwork,user->uwork,user->uwork); 
299:   MatMult(user->L,user->uwork,user->lwork); 
300:   VecDot(user->y,user->lwork,&d2); 
301:   
302:   *f = 0.5 * (d1 + user->alpha*d2); 
303:   return(0);
304: }

306: /* ------------------------------------------------------------------- */
309: /*  
310:     state: g_s = Q' *(Qy - d) + 0.5*alpha*L*(u-ur).^2
311:     design: g_d = alpha*(L'y).*(u-ur)
312: */
313: PetscErrorCode FormGradient(TaoSolver tao,Vec X,Vec G,void *ptr)
314: {
316:   AppCtx *user = (AppCtx*)ptr;
318:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
319:   MatMult(user->Q,user->y,user->dwork); 
320:   VecAXPY(user->dwork,-1.0,user->d); 

322:   MatMult(user->QT,user->dwork,user->ywork); 
323:   
324:   MatMult(user->LT,user->y,user->uwork); 
325:   VecWAXPY(user->vwork,-1.0,user->ur,user->u); 
326:   VecPointwiseMult(user->uwork,user->vwork,user->uwork); 
327:   VecScale(user->uwork,user->alpha); 

329:   VecPointwiseMult(user->vwork,user->vwork,user->vwork); 
330:   MatMult(user->L,user->vwork,user->lwork); 
331:   VecAXPY(user->ywork,0.5*user->alpha,user->lwork); 
332:                       
333:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
334:   return(0);
335: }

339: PetscErrorCode FormFunctionGradient(TaoSolver tao, Vec X, PetscReal *f, Vec G, void *ptr)
340: {
342:   PetscReal d1,d2;
343:   AppCtx *user = (AppCtx*)ptr;

346:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
347:   MatMult(user->Q,user->y,user->dwork); 
348:   VecAXPY(user->dwork,-1.0,user->d); 

350:   MatMult(user->QT,user->dwork,user->ywork); 
351:   
352:   VecDot(user->dwork,user->dwork,&d1); 

354:   MatMult(user->LT,user->y,user->uwork); 
355:   VecWAXPY(user->vwork,-1.0,user->ur,user->u); 
356:   VecPointwiseMult(user->uwork,user->vwork,user->uwork); 
357:   VecScale(user->uwork,user->alpha); 

359:   VecPointwiseMult(user->vwork,user->vwork,user->vwork); 
360:   MatMult(user->L,user->vwork,user->lwork); 
361:   VecAXPY(user->ywork,0.5*user->alpha,user->lwork); 

363:   VecDot(user->y,user->lwork,&d2); 

365:   *f = 0.5 * (d1 + user->alpha*d2);

367:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 

369:   return(0);

371: }


374: /* ------------------------------------------------------------------- */
377: /* A 
378: MatShell object
379: */
380: PetscErrorCode FormJacobianState(TaoSolver tao, Vec X, Mat *J, Mat* JPre, Mat* JInv, MatStructure* flag, void *ptr)
381: {
383:   PetscInt i;
384:   AppCtx *user = (AppCtx*)ptr;
386:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
387:   
388:   Scatter_yi(user->u,user->ui,user->ui_scatter,user->nt); 
389:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt); 
390:   for (i=0; i<user->nt; i++){
391:     MatCopy(user->Divxy[0],user->C[i],SAME_NONZERO_PATTERN); 
392:     MatCopy(user->Divxy[1],user->Cwork[i],SAME_NONZERO_PATTERN); 

394:     MatDiagonalScale(user->C[i],PETSC_NULL,user->uxi[i]); 
395:     MatDiagonalScale(user->Cwork[i],PETSC_NULL,user->uyi[i]); 
396:     MatAXPY(user->C[i],1.0,user->Cwork[i],SUBSET_NONZERO_PATTERN); 
397:     MatScale(user->C[i],user->ht); 
398:     MatShift(user->C[i],1.0); 
399:   }
400:     
401:   *JInv = user->JsInv;

403:   return(0);
404: }
405: /* ------------------------------------------------------------------- */
408: /* B */
409: PetscErrorCode FormJacobianDesign(TaoSolver tao, Vec X, Mat *J, void *ptr)
410: {
412:   AppCtx *user = (AppCtx*)ptr;

415:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 

417:   *J = user->Jd;

419:   return(0);
420: }



426: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y) 
427: {
429:   PetscInt i;
430:   void *ptr;
431:   AppCtx *user;
433:   MatShellGetContext(J_shell,&ptr); 
434:   user = (AppCtx*)ptr;

436:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt); 
437:   
438:   user->block_index = 0;
439:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]); 

441:   for (i=1; i<user->nt; i++){
442:     user->block_index = i;
443:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]); 
444:     MatMult(user->M,user->yi[i-1],user->ziwork[i-1]); 
445:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]); 
446:   }

448:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt); 

450:   return(0);
451: }

455: PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y) 
456: {
458:   PetscInt i;
459:   void *ptr;
460:   AppCtx *user;
462:   MatShellGetContext(J_shell,&ptr); 
463:   user = (AppCtx*)ptr;

465:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt); 

467:   for (i=0; i<user->nt-1; i++){
468:     user->block_index = i;
469:     MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]); 
470:     MatMult(user->M,user->yi[i+1],user->ziwork[i+1]); 
471:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i+1]); 
472:   }

474:   i = user->nt-1;
475:   user->block_index = i;
476:   MatMultTranspose(user->JsBlock,user->yi[i],user->yiwork[i]); 

478:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt); 

480:   return(0);
481: }

485: PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y) 
486: {
488:   PetscInt i;
489:   void *ptr;
490:   AppCtx *user;
492:   MatShellGetContext(J_shell,&ptr); 
493:   user = (AppCtx*)ptr;
494:   
495:   i = user->block_index;
496:   VecPointwiseMult(user->uxiwork[i],X,user->uxi[i]); 
497:   VecPointwiseMult(user->uyiwork[i],X,user->uyi[i]); 
498:   Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
499:   MatMult(user->Div,user->uiwork[i],Y); 
500:   VecAYPX(Y,user->ht,X); 

502:   return(0);
503: }

507: PetscErrorCode StateMatBlockMultTranspose(Mat J_shell, Vec X, Vec Y) 
508: {
510:   PetscInt i;
511:   void *ptr;
512:   AppCtx *user;
514:   MatShellGetContext(J_shell,&ptr); 
515:   user = (AppCtx*)ptr;
516:  
517:   i = user->block_index;
518:   MatMult(user->Grad,X,user->uiwork[i]); 
519:   Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
520:   VecPointwiseMult(user->uxiwork[i],user->uxi[i],user->uxiwork[i]); 
521:   VecPointwiseMult(user->uyiwork[i],user->uyi[i],user->uyiwork[i]); 
522:   VecWAXPY(Y,1.0,user->uxiwork[i],user->uyiwork[i]); 
523:   VecAYPX(Y,user->ht,X); 

525:   return(0);
526: }

530: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y) 
531: {
533:   void *ptr;
534:   PetscInt i;
535:   AppCtx *user;
537:   MatShellGetContext(J_shell,&ptr); 
538:   user = (AppCtx*)ptr;

540:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt); 
541:   Scatter_uxi_uyi(X,user->uxiwork,user->uxi_scatter,user->uyiwork,user->uyi_scatter,user->nt);
542:   for (i=0; i<user->nt; i++){
543:     VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]); 
544:     VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]); 
545:     Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
546:     MatMult(user->Div,user->uiwork[i],user->ziwork[i]); 
547:     VecScale(user->ziwork[i],user->ht); 
548:   }
549:   Gather_yi(Y,user->ziwork,user->yi_scatter,user->nt); 

551:   return(0);
552: }

556: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y) 
557: {
559:   void *ptr;
560:   PetscInt i;
561:   AppCtx *user;
563:   MatShellGetContext(J_shell,&ptr); 
564:   user = (AppCtx*)ptr;

566:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt); 
567:   Scatter_yi(X,user->yiwork,user->yi_scatter,user->nt); 
568:   for (i=0; i<user->nt; i++){
569:     MatMult(user->Grad,user->yiwork[i],user->uiwork[i]); 
570:     Scatter(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
571:     VecPointwiseMult(user->uxiwork[i],user->yi[i],user->uxiwork[i]); 
572:     VecPointwiseMult(user->uyiwork[i],user->yi[i],user->uyiwork[i]); 
573:     Gather(user->uiwork[i],user->uxiwork[i],user->ux_scatter[i],user->uyiwork[i],user->uy_scatter[i]); 
574:     VecScale(user->uiwork[i],user->ht); 
575:   }
576:   Gather_yi(Y,user->uiwork,user->ui_scatter,user->nt); 

578:   return(0);
579: }

583: PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y) 
584: {
586:   PetscInt i;
587:   void *ptr;
588:   AppCtx *user;
590:   PCShellGetContext(PC_shell,&ptr); 
591:   user = (AppCtx*)ptr;

593:   i = user->block_index;
594:   if (user->c_formed) {
595:     MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y); 
596:   }
597:   else {
598:     printf("C not formed"); abort();
599:   }

601:   return(0);
602: }

606: PetscErrorCode StateMatBlockPrecMultTranspose(PC PC_shell, Vec X, Vec Y) 
607: {
609:   PetscInt i;
610:   void *ptr;
611:   AppCtx *user;
613:   PCShellGetContext(PC_shell,&ptr); 
614:   user = (AppCtx*)ptr;

616:   i = user->block_index;
617:   if (user->c_formed) {
618:     MatSOR(user->C[i],X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y); 
619:   }
620:   else {
621:     printf("C not formed"); abort();
622:   }

624:   return(0);
625: }

629: PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
630: {
632:   void *ptr;
633:   AppCtx *user;
634:   PetscReal tau;
635:   PetscInt its,i;
637:   MatShellGetContext(J_shell,&ptr); 
638:   user = (AppCtx*)ptr;

640:   if (Y == user->ytrue) {
641:     KSPSetTolerances(user->solver,1e-4,1e-20,PETSC_DEFAULT,PETSC_DEFAULT);
642:   } else if (user->lcl) {
643:     tau = user->lcl->tau[user->lcl->solve_type];
644:     KSPSetTolerances(user->solver,tau,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
645:   }
646:     
647:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt); 
648:   Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt); 
649:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);
650:   
651:   user->block_index = 0;
652:   KSPSolve(user->solver,user->yi[0],user->yiwork[0]);  

654:   KSPGetIterationNumber(user->solver,&its); 
655:   user->ksp_its = user->ksp_its + its;


658:   for (i=1; i<user->nt; i++){
659:     MatMult(user->M,user->yiwork[i-1],user->ziwork[i-1]);
660:     VecAXPY(user->yi[i],1.0,user->ziwork[i-1]); 
661:     user->block_index = i;
662:     KSPSolve(user->solver,user->yi[i],user->yiwork[i]); 

664:     KSPGetIterationNumber(user->solver,&its); 
665:     user->ksp_its = user->ksp_its + its;


668:   }

670:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt); 


673:   return(0);
674: }

678: PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
679: {
681:   void *ptr;
682:   AppCtx *user;
683:   PetscReal tau;
684:   PetscInt its,i;
686:   MatShellGetContext(J_shell,&ptr); 
687:   user = (AppCtx*)ptr;

689:   if (user->lcl) {
690:     tau = user->lcl->tau[user->lcl->solve_type];
691:     KSPSetTolerances(user->solver,tau,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
692:   }
693:   Scatter_yi(X,user->yi,user->yi_scatter,user->nt); 
694:   Scatter_yi(Y,user->yiwork,user->yi_scatter,user->nt); 
695:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);
696:   
697:   i = user->nt - 1;
698:   user->block_index = i;
699:   KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]);  

701:   KSPGetIterationNumber(user->solver,&its); 
702:   user->ksp_its = user->ksp_its + its;


705:   for (i=user->nt-2; i>=0; i--){
706:     MatMult(user->M,user->yiwork[i+1],user->ziwork[i+1]); 
707:     VecAXPY(user->yi[i],1.0,user->ziwork[i+1]); 
708:     user->block_index = i;
709:     KSPSolveTranspose(user->solver,user->yi[i],user->yiwork[i]); 

711:     KSPGetIterationNumber(user->solver,&its); 
712:     user->ksp_its = user->ksp_its + its;

714:   
715:   }

717:   Gather_yi(Y,user->yiwork,user->yi_scatter,user->nt); 


720:   return(0);
721: }

725: PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
726: {
728:   void *ptr;
729:   AppCtx *user;
731:   MatShellGetContext(J_shell,&ptr); 
732:   user = (AppCtx*)ptr;

734:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,new_shell); 
735:   MatShellSetOperation(*new_shell,MATOP_MULT,(void(*)(void))StateMatMult); 
736:   MatShellSetOperation(*new_shell,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate); 
737:   MatShellSetOperation(*new_shell,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose); 
738:   MatShellSetOperation(*new_shell,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal); 
739:   

741:   

743:   return(0);
744: }

748: PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
749: {
751:   void *ptr;
752:   AppCtx *user;
754:   MatShellGetContext(J_shell,&ptr); 
755:   user = (AppCtx*)ptr;
756:    VecCopy(user->js_diag,X); 
757:   return(0);
758:   
759: }

763: PetscErrorCode FormConstraints(TaoSolver tao, Vec X, Vec C, void *ptr)
764: {
765:   /* con = Ay - q, A = [C(u1)  0     0     ...   0; 
766:                          -M  C(u2)   0     ...   0; 
767:                           0   -M   C(u3)   ...   0;
768:                                       ...         ;
769:                           0    ...      -M C(u_nt)] 
770:      C(u) = eye + ht*Div*[diag(u1); diag(u2)]       */
772:   PetscInt i;
773:   AppCtx *user = (AppCtx*)ptr;
775:    
776:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
777:   Scatter_yi(user->y,user->yi,user->yi_scatter,user->nt); 
778:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);

780:   user->block_index = 0;
781:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]); 

783:   for (i=1; i<user->nt; i++){
784:     user->block_index = i;
785:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]); 
786:     MatMult(user->M,user->yi[i-1],user->ziwork[i-1]); 
787:     VecAXPY(user->yiwork[i],-1.0,user->ziwork[i-1]);                     
788:   }

790:   Gather_yi(C,user->yiwork,user->yi_scatter,user->nt); 
791:   VecAXPY(C,-1.0,user->q); 

793:   return(0);
794: }


799: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
800: {
803:   VecScatterBegin(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD); 
804:   VecScatterEnd(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD); 

806:   VecScatterBegin(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD); 
807:   VecScatterEnd(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD); 
808:   return(0);
809: }

813: PetscErrorCode Scatter_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
814: {
816:   PetscInt i;
818:   for (i=0; i<nt; i++){
819:     VecScatterBegin(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD); 
820:     VecScatterEnd(scatx[i],u,uxi[i],INSERT_VALUES,SCATTER_FORWARD); 
821:     VecScatterBegin(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD); 
822:     VecScatterEnd(scaty[i],u,uyi[i],INSERT_VALUES,SCATTER_FORWARD); 
823:   }
824:   return(0);
825: }

829: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
830: {
833:   VecScatterBegin(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE); 
834:   VecScatterEnd(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE); 
835:   VecScatterBegin(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE); 
836:   VecScatterEnd(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE); 
837:   return(0);
838: }

842: PetscErrorCode Gather_uxi_uyi(Vec u, Vec *uxi, VecScatter *scatx, Vec *uyi, VecScatter *scaty, PetscInt nt)
843: {
845:   PetscInt i;
847:   for (i=0; i<nt; i++){
848:     VecScatterBegin(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE); 
849:     VecScatterEnd(scatx[i],uxi[i],u,INSERT_VALUES,SCATTER_REVERSE); 
850:     VecScatterBegin(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE); 
851:     VecScatterEnd(scaty[i],uyi[i],u,INSERT_VALUES,SCATTER_REVERSE); 
852:   }
853:   return(0);
854: }

858: PetscErrorCode Scatter_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
859: {
861:   PetscInt i;
863:   for (i=0; i<nt; i++){
864:     VecScatterBegin(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD); 
865:     VecScatterEnd(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD); 
866:   }
867:   return(0);
868: }

872: PetscErrorCode Gather_yi(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
873: {
875:   PetscInt i;
877:   for (i=0; i<nt; i++){
878:     VecScatterBegin(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE); 
879:     VecScatterEnd(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE); 
880:   }
881:   return(0);
882: }
883:  
884:     
887: PetscErrorCode HyperbolicInitialize(AppCtx *user)
888: {
890:   PetscInt n,i,j,linear_index,istart,iend,iblock,lo,hi;
891:   Vec XX,YY,XXwork,YYwork,yi,uxi,ui,bc;
892:   PetscReal h,sum;
893:   PetscScalar hinv,neg_hinv,quarter=0.25,one=1.0,half_hinv,neg_half_hinv;
894:   PetscScalar vx,vy,zero=0.0;
895:   IS is_from_y,is_to_yi,is_from_u,is_to_uxi,is_to_uyi;

898:   user->jformed = PETSC_FALSE;
899:   user->c_formed = PETSC_FALSE;

901:   user->ksp_its = 0;
902:   user->ksp_its_initial = 0;

904:   n = user->mx * user->mx;

906:   h = 1.0/user->mx;
907:   hinv = user->mx;
908:   neg_hinv = -hinv;
909:   half_hinv = hinv / 2.0;
910:   neg_half_hinv = neg_hinv / 2.0;

912:   /* Generate Grad matrix */
913:   MatCreate(PETSC_COMM_WORLD,&user->Grad); 
914:   MatSetSizes(user->Grad,PETSC_DECIDE,PETSC_DECIDE,2*n,n); 
915:   MatSetFromOptions(user->Grad); 
916:   MatMPIAIJSetPreallocation(user->Grad,3,PETSC_NULL,3,PETSC_NULL); 
917:   MatSeqAIJSetPreallocation(user->Grad,3,PETSC_NULL); 
918:   MatGetOwnershipRange(user->Grad,&istart,&iend); 

920:   for (i=istart; i<iend; i++){
921:     if (i<n){
922:       iblock = i / user->mx;
923:       j = iblock*user->mx + ((i+user->mx-1) % user->mx);
924:       MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES); 
925:       j = iblock*user->mx + ((i+1) % user->mx);
926:       MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES); 
927:     }
928:     if (i>=n){
929:       j = (i - user->mx) % n;
930:       MatSetValues(user->Grad,1,&i,1,&j,&half_hinv,INSERT_VALUES); 
931:       j = (j + 2*user->mx) % n;
932:       MatSetValues(user->Grad,1,&i,1,&j,&neg_half_hinv,INSERT_VALUES); 
933:     }
934:   }

936:   MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY); 
937:   MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY); 

939:   MatCreate(PETSC_COMM_WORLD,&user->Gradxy[0]); 
940:   MatSetSizes(user->Gradxy[0],PETSC_DECIDE,PETSC_DECIDE,n,n); 
941:   MatSetFromOptions(user->Gradxy[0]); 
942:   MatMPIAIJSetPreallocation(user->Gradxy[0],3,PETSC_NULL,3,PETSC_NULL); 
943:   MatSeqAIJSetPreallocation(user->Gradxy[0],3,PETSC_NULL); 
944:   MatGetOwnershipRange(user->Gradxy[0],&istart,&iend); 
945:   
946:   for (i=istart; i<iend; i++){
947:     iblock = i / user->mx;
948:     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
949:     MatSetValues(user->Gradxy[0],1,&i,1,&j,&half_hinv,INSERT_VALUES); 
950:     j = iblock*user->mx + ((i+1) % user->mx);
951:     MatSetValues(user->Gradxy[0],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES); 
952:     MatSetValues(user->Gradxy[0],1,&i,1,&i,&zero,INSERT_VALUES); 
953:   }
954:   MatAssemblyBegin(user->Gradxy[0],MAT_FINAL_ASSEMBLY); 
955:   MatAssemblyEnd(user->Gradxy[0],MAT_FINAL_ASSEMBLY); 

957:   MatCreate(PETSC_COMM_WORLD,&user->Gradxy[1]); 
958:   MatSetSizes(user->Gradxy[1],PETSC_DECIDE,PETSC_DECIDE,n,n); 
959:   MatSetFromOptions(user->Gradxy[1]); 
960:   MatMPIAIJSetPreallocation(user->Gradxy[1],3,PETSC_NULL,3,PETSC_NULL); 
961:   MatSeqAIJSetPreallocation(user->Gradxy[1],3,PETSC_NULL); 
962:   MatGetOwnershipRange(user->Gradxy[1],&istart,&iend); 

964:   for (i=istart; i<iend; i++){
965:     j = (i + n - user->mx) % n;
966:     MatSetValues(user->Gradxy[1],1,&i,1,&j,&half_hinv,INSERT_VALUES); 
967:     j = (j + 2*user->mx) % n;
968:     MatSetValues(user->Gradxy[1],1,&i,1,&j,&neg_half_hinv,INSERT_VALUES); 
969:     MatSetValues(user->Gradxy[1],1,&i,1,&i,&zero,INSERT_VALUES); 
970:   }
971:   MatAssemblyBegin(user->Gradxy[1],MAT_FINAL_ASSEMBLY); 
972:   MatAssemblyEnd(user->Gradxy[1],MAT_FINAL_ASSEMBLY); 

974:   /* Generate Div matrix */
975:   MatTranspose(user->Grad,MAT_INITIAL_MATRIX,&user->Div);
976:   MatTranspose(user->Gradxy[0],MAT_INITIAL_MATRIX,&user->Divxy[0]); 
977:   MatTranspose(user->Gradxy[1],MAT_INITIAL_MATRIX,&user->Divxy[1]); 

979:   /* Off-diagonal averaging matrix */
980:   MatCreate(PETSC_COMM_WORLD,&user->M); 
981:   MatSetSizes(user->M,PETSC_DECIDE,PETSC_DECIDE,n,n); 
982:   MatSetFromOptions(user->M); 
983:   MatMPIAIJSetPreallocation(user->M,4,PETSC_NULL,4,PETSC_NULL); 
984:   MatSeqAIJSetPreallocation(user->M,4,PETSC_NULL); 
985:   MatGetOwnershipRange(user->M,&istart,&iend); 

987:   for (i=istart; i<iend; i++){
988:     /* kron(Id,Av) */
989:     iblock = i / user->mx;
990:     j = iblock*user->mx + ((i+user->mx-1) % user->mx);
991:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES); 
992:     j = iblock*user->mx + ((i+1) % user->mx);
993:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES); 
994:     
995:     /* kron(Av,Id) */
996:     j = (i + user->mx) % n;
997:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES); 
998:     j = (i + n - user->mx) % n;
999:     MatSetValues(user->M,1,&i,1,&j,&quarter,INSERT_VALUES); 
1000:   }

1002:   MatAssemblyBegin(user->M,MAT_FINAL_ASSEMBLY); 
1003:   MatAssemblyEnd(user->M,MAT_FINAL_ASSEMBLY); 

1005:   /* Generate 2D grid */
1006:   VecCreate(PETSC_COMM_WORLD,&XX); 
1007:   VecCreate(PETSC_COMM_WORLD,&user->q); 
1008:   VecSetSizes(XX,PETSC_DECIDE,n); 
1009:   VecSetSizes(user->q,PETSC_DECIDE,n*user->nt); 
1010:   VecSetFromOptions(XX); 
1011:   VecSetFromOptions(user->q); 

1013:   VecDuplicate(XX,&YY); 
1014:   VecDuplicate(XX,&XXwork); 
1015:   VecDuplicate(XX,&YYwork); 
1016:   VecDuplicate(XX,&user->d); 
1017:   VecDuplicate(XX,&user->dwork); 

1019:   VecGetOwnershipRange(XX,&istart,&iend); 
1020:   for (linear_index=istart; linear_index<iend; linear_index++){
1021:     i = linear_index % user->mx;
1022:     j = (linear_index-i)/user->mx;
1023:     vx = h*(i+0.5); 
1024:     vy = h*(j+0.5);
1025:     VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES); 
1026:     VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES); 
1027:   }

1029:   VecAssemblyBegin(XX); 
1030:   VecAssemblyEnd(XX); 
1031:   VecAssemblyBegin(YY); 
1032:   VecAssemblyEnd(YY); 

1034:   /* Compute final density function yT
1035:      yT = 1.0 + exp(-30*((x-0.25)^2+(y-0.25)^2)) + exp(-30*((x-0.75)^2+(y-0.75)^2)) 
1036:      yT = yT / (h^2*sum(yT)) */
1037:   VecCopy(XX,XXwork); 
1038:   VecCopy(YY,YYwork); 

1040:   VecShift(XXwork,-0.25); 
1041:   VecShift(YYwork,-0.25); 

1043:   VecPointwiseMult(XXwork,XXwork,XXwork); 
1044:   VecPointwiseMult(YYwork,YYwork,YYwork); 

1046:   VecCopy(XXwork,user->dwork); 
1047:   VecAXPY(user->dwork,1.0,YYwork); 
1048:   VecScale(user->dwork,-30.0); 
1049:   VecExp(user->dwork); 
1050:   VecCopy(user->dwork,user->d); 

1052:   VecCopy(XX,XXwork); 
1053:   VecCopy(YY,YYwork); 

1055:   VecShift(XXwork,-0.75); 
1056:   VecShift(YYwork,-0.75); 

1058:   VecPointwiseMult(XXwork,XXwork,XXwork); 
1059:   VecPointwiseMult(YYwork,YYwork,YYwork); 

1061:   VecCopy(XXwork,user->dwork); 
1062:   VecAXPY(user->dwork,1.0,YYwork); 
1063:   VecScale(user->dwork,-30.0); 
1064:   VecExp(user->dwork); 

1066:   VecAXPY(user->d,1.0,user->dwork); 
1067:   VecShift(user->d,1.0); 
1068:   VecSum(user->d,&sum);   
1069:   VecScale(user->d,1.0/(h*h*sum)); 

1071:   /* Initial conditions of forward problem */
1072:   VecDuplicate(XX,&bc); 
1073:   VecCopy(XX,XXwork); 
1074:   VecCopy(YY,YYwork); 

1076:   VecShift(XXwork,-0.5); 
1077:   VecShift(YYwork,-0.5); 

1079:   VecPointwiseMult(XXwork,XXwork,XXwork); 
1080:   VecPointwiseMult(YYwork,YYwork,YYwork); 

1082:   VecWAXPY(bc,1.0,XXwork,YYwork); 
1083:   VecScale(bc,-50.0); 
1084:   VecExp(bc); 
1085:   VecShift(bc,1.0); 
1086:   VecSum(bc,&sum); 
1087:   VecScale(bc,1.0/(h*h*sum)); 

1089:   /* Create scatter from y to y_1,y_2,...,y_nt */
1090:   /*  TODO: Reorder for better parallelism. (This will require reordering Q and L as well.) */
1091:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->yi_scatter);
1092:   VecCreate(PETSC_COMM_WORLD,&yi); 
1093:   VecSetSizes(yi,PETSC_DECIDE,user->mx*user->mx); 
1094:   VecSetFromOptions(yi); 
1095:   VecDuplicateVecs(yi,user->nt,&user->yi); 
1096:   VecDuplicateVecs(yi,user->nt,&user->yiwork); 
1097:   VecDuplicateVecs(yi,user->nt,&user->ziwork); 
1098:   for (i=0; i<user->nt; i++){
1099:     VecGetOwnershipRange(user->yi[i],&lo,&hi); 
1100:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_yi); 
1101:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+i*user->mx*user->mx,1,&is_from_y); 
1102:     VecScatterCreate(user->y,is_from_y,user->yi[i],is_to_yi,&user->yi_scatter[i]); 
1103:     ISDestroy(&is_to_yi); 
1104:     ISDestroy(&is_from_y); 
1105:   }

1107:   /* Create scatter from u to ux_1,uy_1,ux_2,uy_2,...,ux_nt,uy_nt */
1108:   /*  TODO: reorder for better parallelism */
1109:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->uxi_scatter); 
1110:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->uyi_scatter); 
1111:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->ux_scatter); 
1112:   PetscMalloc(user->nt*user->mx*user->mx*sizeof(PetscInt),&user->uy_scatter); 
1113:   PetscMalloc(2*user->nt*user->mx*user->mx*sizeof(PetscInt),&user->ui_scatter); 
1114:   VecCreate(PETSC_COMM_WORLD,&uxi); 
1115:   VecCreate(PETSC_COMM_WORLD,&ui); 
1116:   VecSetSizes(uxi,PETSC_DECIDE,user->mx*user->mx); 
1117:   VecSetSizes(ui,PETSC_DECIDE,2*user->mx*user->mx); 
1118:   VecSetFromOptions(uxi); 
1119:   VecSetFromOptions(ui); 
1120:   VecDuplicateVecs(uxi,user->nt,&user->uxi); 
1121:   VecDuplicateVecs(uxi,user->nt,&user->uyi); 
1122:   VecDuplicateVecs(uxi,user->nt,&user->uxiwork); 
1123:   VecDuplicateVecs(uxi,user->nt,&user->uyiwork); 
1124:   VecDuplicateVecs(ui,user->nt,&user->ui); 
1125:   VecDuplicateVecs(ui,user->nt,&user->uiwork); 
1126:   for (i=0; i<user->nt; i++){
1127:     VecGetOwnershipRange(user->uxi[i],&lo,&hi); 
1128:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi); 
1129:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u); 
1130:     VecScatterCreate(user->u,is_from_u,user->uxi[i],is_to_uxi,&user->uxi_scatter[i]); 

1132:     ISDestroy(&is_to_uxi); 
1133:     ISDestroy(&is_from_u); 

1135:     VecGetOwnershipRange(user->uyi[i],&lo,&hi); 
1136:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi); 
1137:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+(2*i+1)*user->mx*user->mx,1,&is_from_u); 
1138:     VecScatterCreate(user->u,is_from_u,user->uyi[i],is_to_uyi,&user->uyi_scatter[i]); 

1140:     ISDestroy(&is_to_uyi); 
1141:     ISDestroy(&is_from_u); 

1143:     VecGetOwnershipRange(user->uxi[i],&lo,&hi); 
1144:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi); 
1145:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_from_u); 
1146:     VecScatterCreate(user->ui[i],is_from_u,user->uxi[i],is_to_uxi,&user->ux_scatter[i]);    

1148:     ISDestroy(&is_to_uxi); 
1149:     ISDestroy(&is_from_u); 

1151:     VecGetOwnershipRange(user->uyi[i],&lo,&hi); 
1152:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uyi); 
1153:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+user->mx*user->mx,1,&is_from_u); 
1154:     VecScatterCreate(user->ui[i],is_from_u,user->uyi[i],is_to_uyi,&user->uy_scatter[i]); 

1156:     ISDestroy(&is_to_uyi); 
1157:     ISDestroy(&is_from_u); 

1159:     VecGetOwnershipRange(user->ui[i],&lo,&hi); 
1160:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_uxi); 
1161:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+2*i*user->mx*user->mx,1,&is_from_u); 
1162:     VecScatterCreate(user->u,is_from_u,user->ui[i],is_to_uxi,&user->ui_scatter[i]); 

1164:     ISDestroy(&is_to_uxi); 
1165:     ISDestroy(&is_from_u); 
1166:   }

1168:   /* RHS of forward problem */
1169:   MatMult(user->M,bc,user->yiwork[0]);  
1170:   for (i=1; i<user->nt; i++){
1171:     VecSet(user->yiwork[i],0.0); 
1172:   }
1173:   Gather_yi(user->q,user->yiwork,user->yi_scatter,user->nt); 

1175:   /* Compute true velocity field utrue */
1176:   VecDuplicate(user->u,&user->utrue); 
1177:   for (i=0; i<user->nt; i++){
1178:     VecCopy(YY,user->uxi[i]); 
1179:     VecScale(user->uxi[i],150.0*i*user->ht); 
1180:     VecCopy(XX,user->uyi[i]); 
1181:     VecShift(user->uyi[i],-10.0); 
1182:     VecScale(user->uyi[i],15.0*i*user->ht); 
1183:   }
1184:   Gather_uxi_uyi(user->utrue,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt); 
1185:  
1186:   /* Initial guess and reference model */
1187:   VecDuplicate(user->utrue,&user->ur); 
1188:   for (i=0; i<user->nt; i++){
1189:     VecCopy(XX,user->uxi[i]); 
1190:     VecShift(user->uxi[i],i*user->ht); 
1191:     VecCopy(YY,user->uyi[i]); 
1192:     VecShift(user->uyi[i],-i*user->ht); 
1193:   }
1194:   Gather_uxi_uyi(user->ur,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt);   

1196:   /* Generate regularization matrix L */
1197:   MatCreate(PETSC_COMM_WORLD,&user->LT); 
1198:   MatSetSizes(user->LT,PETSC_DECIDE,PETSC_DECIDE,2*n*user->nt,n*user->nt); 
1199:   MatSetFromOptions(user->LT); 
1200:   MatMPIAIJSetPreallocation(user->LT,1,PETSC_NULL,1,PETSC_NULL); 
1201:   MatSeqAIJSetPreallocation(user->LT,1,PETSC_NULL); 
1202:   MatGetOwnershipRange(user->LT,&istart,&iend);

1204:   for (i=istart; i<iend; i++){
1205:     iblock = (i+n) / (2*n);
1206:     j = i - iblock*n;
1207:     MatSetValues(user->LT,1,&i,1,&j,&user->gamma,INSERT_VALUES); 
1208:   }

1210:   MatAssemblyBegin(user->LT,MAT_FINAL_ASSEMBLY); 
1211:   MatAssemblyEnd(user->LT,MAT_FINAL_ASSEMBLY); 

1213:   MatTranspose(user->LT,MAT_INITIAL_MATRIX,&user->L); 

1215:   /* Build work vectors and matrices */
1216:   VecCreate(PETSC_COMM_WORLD,&user->lwork); 
1217:   VecSetType(user->lwork,VECMPI); 
1218:   VecSetSizes(user->lwork,PETSC_DECIDE,user->m);  
1219:   VecSetFromOptions(user->lwork); 

1221:   MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork); 

1223:   VecDuplicate(user->y,&user->ywork); 
1224:   VecDuplicate(user->u,&user->uwork); 
1225:   VecDuplicate(user->u,&user->vwork); 
1226:   VecDuplicate(user->u,&user->js_diag); 
1227:   VecDuplicate(user->c,&user->cwork); 

1229:   /* Create matrix-free shell user->Js for computing A*x */
1230:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->Js); 
1231:   MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult); 
1232:   MatShellSetOperation(user->Js,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate); 
1233:   MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose); 
1234:   MatShellSetOperation(user->Js,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal); 

1236:   /* Diagonal blocks of user->Js */
1237:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlock); 
1238:   MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateMatBlockMult); 
1239:   MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockMultTranspose); 

1241:   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
1242:      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
1243:      This is an SOR preconditioner for user->JsBlock. */
1244:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,n,n,user,&user->JsBlockPrec); 
1245:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT,(void(*)(void))StateMatBlockPrecMult);  
1246:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockPrecMultTranspose); 
1247:   
1248:   /* Create a matrix-free shell user->Jd for computing B*x */
1249:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->n-user->m,user,&user->Jd); 
1250:   MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult); 
1251:   MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose); 

1253:   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
1254:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsInv); 
1255:   MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateMatInvMult); 
1256:   MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatInvTransposeMult); 


1259:   /* Build matrices for SOR preconditioner */
1260:   Scatter_uxi_uyi(user->u,user->uxi,user->uxi_scatter,user->uyi,user->uyi_scatter,user->nt); 
1261:   MatShift(user->Divxy[0],0.0);  /*  Force C[i] and Divxy[0] to share same nonzero pattern */
1262:   MatAXPY(user->Divxy[0],0.0,user->Divxy[1],DIFFERENT_NONZERO_PATTERN); 
1263:   PetscMalloc(5*n*sizeof(PetscReal),&user->C);
1264:   PetscMalloc(2*n*sizeof(PetscReal),&user->Cwork);
1265:   for (i=0; i<user->nt; i++){
1266:     MatDuplicate(user->Divxy[0],MAT_COPY_VALUES,&user->C[i]); 
1267:     MatDuplicate(user->Divxy[1],MAT_COPY_VALUES,&user->Cwork[i]); 

1269:     MatDiagonalScale(user->C[i],PETSC_NULL,user->uxi[i]); 
1270:     MatDiagonalScale(user->Cwork[i],PETSC_NULL,user->uyi[i]); 
1271:     MatAXPY(user->C[i],1.0,user->Cwork[i],SUBSET_NONZERO_PATTERN); 
1272:     MatScale(user->C[i],user->ht); 
1273:     MatShift(user->C[i],1.0); 
1274:   }

1276:   /* Solver options and tolerances */
1277:   KSPCreate(PETSC_COMM_WORLD,&user->solver); 
1278:   KSPSetType(user->solver,KSPGMRES); 
1279:   KSPSetOperators(user->solver,user->JsBlock,user->JsBlockPrec,DIFFERENT_NONZERO_PATTERN); 
1280:   KSPSetInitialGuessNonzero(user->solver,PETSC_FALSE);  /*  TODO: why is true slower? */
1281:   KSPSetTolerances(user->solver,1e-4,1e-20,1e3,500); 
1282:   /* KSPSetTolerances(user->solver,1e-8,1e-16,1e3,500);  */
1283:   KSPGetPC(user->solver,&user->prec); 
1284:   PCSetType(user->prec,PCSHELL); 

1286:   PCShellSetApply(user->prec,StateMatBlockPrecMult); 
1287:   PCShellSetApplyTranspose(user->prec,StateMatBlockPrecMultTranspose); 
1288:   PCShellSetContext(user->prec,user); 

1290:   /* Compute true state function yt given ut */
1291:   VecCreate(PETSC_COMM_WORLD,&user->ytrue); 
1292:   VecSetSizes(user->ytrue,PETSC_DECIDE,n*user->nt); 
1293:   VecSetFromOptions(user->ytrue); 
1294:   user->c_formed = PETSC_TRUE; 
1295:   VecCopy(user->utrue,user->u); /*  Set u=utrue temporarily for StateMatInv */
1296:   VecSet(user->ytrue,0.0);  /*  Initial guess */
1297:   StateMatInvMult(user->Js,user->q,user->ytrue); 
1298:   VecCopy(user->ur,user->u); /*  Reset u=ur */

1300:   /* Initial guess y0 for state given u0 */
1301:   user->lcl->solve_type = LCL_FORWARD1;
1302:   StateMatInvMult(user->Js,user->q,user->y); 

1304:   /* Data discretization */
1305:   MatCreate(PETSC_COMM_WORLD,&user->Q); 
1306:   MatSetSizes(user->Q,PETSC_DECIDE,PETSC_DECIDE,user->mx*user->mx,user->m); 
1307:   MatSetFromOptions(user->Q); 
1308:   MatMPIAIJSetPreallocation(user->Q,0,PETSC_NULL,1,PETSC_NULL); 
1309:   MatSeqAIJSetPreallocation(user->Q,1,PETSC_NULL); 

1311:   MatGetOwnershipRange(user->Q,&istart,&iend); 

1313:   for (i=istart; i<iend; i++){
1314:     j = i + user->m - user->mx*user->mx;
1315:     MatSetValues(user->Q,1,&i,1,&j,&one,INSERT_VALUES); 
1316:   }

1318:   MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY); 
1319:   MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY); 

1321:   MatTranspose(user->Q,MAT_INITIAL_MATRIX,&user->QT); 

1323:   VecDestroy(&XX); 
1324:   VecDestroy(&YY); 
1325:   VecDestroy(&XXwork); 
1326:   VecDestroy(&YYwork); 
1327:   VecDestroy(&yi); 
1328:   VecDestroy(&uxi); 
1329:   VecDestroy(&ui); 
1330:   VecDestroy(&bc); 

1332:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1333:   KSPSetFromOptions(user->solver); 


1336:   return(0);
1337: }

1341: PetscErrorCode HyperbolicDestroy(AppCtx *user)
1342: {
1344:   PetscInt i;
1346:   MatDestroy(&user->Q); 
1347:   MatDestroy(&user->QT); 
1348:   MatDestroy(&user->Div); 
1349:   MatDestroy(&user->Divwork); 
1350:   MatDestroy(&user->Grad); 
1351:   MatDestroy(&user->L); 
1352:   MatDestroy(&user->LT); 
1353:   KSPDestroy(&user->solver); 
1354:   MatDestroy(&user->Js); 
1355:   MatDestroy(&user->Jd); 
1356:   MatDestroy(&user->JsBlockPrec); 
1357:   MatDestroy(&user->JsInv); 
1358:   MatDestroy(&user->JsBlock); 
1359:   MatDestroy(&user->Divxy[0]); 
1360:   MatDestroy(&user->Divxy[1]); 
1361:   MatDestroy(&user->Gradxy[0]); 
1362:   MatDestroy(&user->Gradxy[1]); 
1363:   MatDestroy(&user->M); 
1364:   for (i=0; i<user->nt; i++){
1365:     MatDestroy(&user->C[i]); 
1366:     MatDestroy(&user->Cwork[i]); 
1367:   }
1368:   PetscFree(user->C); 
1369:   PetscFree(user->Cwork); 
1370:   VecDestroy(&user->u); 
1371:   VecDestroy(&user->uwork); 
1372:   VecDestroy(&user->vwork); 
1373:   VecDestroy(&user->utrue); 
1374:   VecDestroy(&user->y); 
1375:   VecDestroy(&user->ywork); 
1376:   VecDestroy(&user->ytrue);  
1377:   VecDestroyVecs(user->nt,&user->yi); 
1378:   VecDestroyVecs(user->nt,&user->yiwork); 
1379:   VecDestroyVecs(user->nt,&user->ziwork); 
1380:   VecDestroyVecs(user->nt,&user->uxi); 
1381:   VecDestroyVecs(user->nt,&user->uyi); 
1382:   VecDestroyVecs(user->nt,&user->uxiwork); 
1383:   VecDestroyVecs(user->nt,&user->uyiwork); 
1384:   VecDestroyVecs(user->nt,&user->ui); 
1385:   VecDestroyVecs(user->nt,&user->uiwork); 
1386:   VecDestroy(&user->c); 
1387:   VecDestroy(&user->cwork); 
1388:   VecDestroy(&user->ur); 
1389:   VecDestroy(&user->q); 
1390:   VecDestroy(&user->d); 
1391:   VecDestroy(&user->dwork); 
1392:   VecDestroy(&user->lwork); 
1393:   VecDestroy(&user->js_diag); 
1394:   ISDestroy(&user->s_is); 
1395:   ISDestroy(&user->d_is); 
1396:   VecScatterDestroy(&user->state_scatter); 
1397:   VecScatterDestroy(&user->design_scatter); 
1398:   for (i=0; i<user->nt; i++){
1399:     VecScatterDestroy(&user->uxi_scatter[i]); 
1400:     VecScatterDestroy(&user->uyi_scatter[i]); 
1401:     VecScatterDestroy(&user->ux_scatter[i]); 
1402:     VecScatterDestroy(&user->uy_scatter[i]); 
1403:     VecScatterDestroy(&user->ui_scatter[i]); 
1404:     VecScatterDestroy(&user->yi_scatter[i]); 
1405:   }
1406:   PetscFree(user->uxi_scatter); 
1407:   PetscFree(user->uyi_scatter); 
1408:   PetscFree(user->ux_scatter); 
1409:   PetscFree(user->uy_scatter); 
1410:   PetscFree(user->ui_scatter); 
1411:   PetscFree(user->yi_scatter); 
1412:   return(0);
1413: }

1417: PetscErrorCode HyperbolicMonitor(TaoSolver tao, void *ptr)
1418: {
1420:   Vec X;
1421:   PetscReal unorm,ynorm;
1422:   AppCtx *user = (AppCtx*)ptr;
1424:   TaoGetSolutionVector(tao,&X); 
1425:   Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
1426:   VecAXPY(user->ywork,-1.0,user->ytrue); 
1427:   VecAXPY(user->uwork,-1.0,user->utrue); 
1428:   VecNorm(user->uwork,NORM_2,&unorm); 
1429:   VecNorm(user->ywork,NORM_2,&ynorm); 
1430:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%G ||y-yt||=%G\n",unorm,ynorm); 
1431:   return(0);
1432: }