Actual source code: parabolic.c

tao-2.1-p0 2012-07-24
  1: #include "tao-private/taosolver_impl.h"
  2: #include "src/pde_constrained/impls/lcl/lcl.h"
  3: #include "petsctime.h"

  5: /*T
  6:    Concepts: TAO - Solving a system of nonlinear equations, nonlinear ;east squares
  7:    Routines: TaoInitialize(); TaoFinalize(); 
  8:    Routines: TaoCreate();
  9:    Routines: TaoSetType(); 
 10:    Routines: TaoSetInitialVector();
 11:    Routines: TaoSetObjectiveRoutine();
 12:    Routines: TaoSetGradientRoutine();
 13:    Routines: TaoSetConstraintsRoutine();
 14:    Routines: TaoSetJacobianStateRoutine();
 15:    Routines: TaoSetJacobianDesignRoutine();
 16:    Routines: TaoSetStateDesignIS();
 17:    Routines: TaoSetFromOptions();
 18:    Routines: TaoSetHistory(); TaoGetHistory();
 19:    Routines: TaoSolve();
 20:    Routines: TaoGetTerminationReason(); TaoDestroy(); 
 21:    Processors: n
 22: T*/

 24: typedef struct {
 25:   PetscInt n; /*  Number of variables */
 26:   PetscInt m; /*  Number of constraints per time step */
 27:   PetscInt mx; /*  grid points in each direction */
 28:   PetscInt nt; /*  Number of time steps; as of now, must be divisible by 8 */
 29:   PetscInt ndata; /*  Number of data points per sample */
 30:   PetscInt ns; /*  Number of samples */
 31:   PetscInt *sample_times; /*  Times of samples */
 32:   IS s_is;
 33:   IS d_is;
 34:   VecScatter state_scatter;
 35:   VecScatter design_scatter;
 36:   VecScatter *yi_scatter;
 37:   VecScatter *di_scatter;

 39:   Mat Js,Jd,JsBlockPrec,JsInv,JsBlock;
 40:   PetscBool jformed,dsg_formed;

 42:   PetscReal alpha; /*  Regularization parameter */
 43:   PetscReal beta; /*  Weight attributed to ||u||^2 in regularization functional */
 44:   PetscReal noise; /*  Amount of noise to add to data */
 45:   PetscReal ht; /*  Time step */

 47:   Mat Qblock,QblockT;
 48:   Mat L,LT;
 49:   Mat Div,Divwork;
 50:   Mat Grad;
 51:   Mat Av,Avwork,AvT;
 52:   Mat DSG;
 53:   Vec q;
 54:   Vec ur; /*  reference */

 56:   Vec d;
 57:   Vec dwork;
 58:   Vec *di;

 60:   Vec y; /*  state variables */
 61:   Vec ywork;

 63:   Vec ytrue;
 64:   Vec *yi,*yiwork;

 66:   Vec u; /*  design variables */
 67:   Vec uwork;

 69:   Vec utrue;
 70:  
 71:   Vec js_diag;
 72:   
 73:   Vec c; /*  constraint vector */
 74:   Vec cwork;
 75:   
 76:   Vec lwork;
 77:   Vec S;
 78:   Vec Rwork,Swork,Twork;
 79:   Vec Av_u;

 81:   KSP solver;
 82:   PC prec;

 84:   TAO_LCL *lcl;
 85:   PetscInt ksp_its;
 86:   PetscInt ksp_its_initial;

 88: } AppCtx;


 91: PetscErrorCode FormFunction(TaoSolver, Vec, PetscReal*, void*);
 92: PetscErrorCode FormGradient(TaoSolver, Vec, Vec, void*);
 93: PetscErrorCode FormFunctionGradient(TaoSolver, Vec, PetscReal*, Vec, void*);
 94: PetscErrorCode FormJacobianState(TaoSolver, Vec, Mat*, Mat*, Mat*, MatStructure*,void*);
 95: PetscErrorCode FormJacobianDesign(TaoSolver, Vec, Mat*, void*);
 96: PetscErrorCode FormConstraints(TaoSolver, Vec, Vec, void*);
 97: PetscErrorCode FormHessian(TaoSolver, Vec, Mat*, Mat*, MatStructure*, void*);
 98: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 99: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
100: PetscErrorCode ParabolicInitialize(AppCtx *user);
101: PetscErrorCode ParabolicDestroy(AppCtx *user);
102: PetscErrorCode ParabolicMonitor(TaoSolver, void*);

104: PetscErrorCode StateMatMult(Mat,Vec,Vec);
105: PetscErrorCode StateMatBlockMult(Mat,Vec,Vec);
106: PetscErrorCode StateMatMultTranspose(Mat,Vec,Vec);
107: PetscErrorCode StateMatGetDiagonal(Mat,Vec); 
108: PetscErrorCode StateMatDuplicate(Mat,MatDuplicateOption,Mat*);
109: PetscErrorCode StateMatInvMult(Mat,Vec,Vec);
110: PetscErrorCode StateMatInvTransposeMult(Mat,Vec,Vec);
111: PetscErrorCode StateMatBlockPrecMult(PC,Vec,Vec);

113: PetscErrorCode DesignMatMult(Mat,Vec,Vec);
114: PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);

116: PetscErrorCode Gather_i(Vec,Vec*,VecScatter*,PetscInt);
117: PetscErrorCode Scatter_i(Vec,Vec*,VecScatter*,PetscInt);

119: static  char help[]="";

123: int main(int argc, char **argv)
124: {
126:   Vec x,x0;
127:   
128:   TaoSolver tao;
129:   TaoSolverTerminationReason reason;
130:   AppCtx user;
131:   IS is_allstate,is_alldesign;
132:   PetscInt lo,hi,hi2,lo2,ksp_old;
133:   PetscBool flag,showtime;
134:   PetscInt ntests = 1;
135:   PetscLogDouble v1,v2;
136:   PetscInt i;
137:   int stages[1];
138:   

140:   PetscInitialize(&argc, &argv, (char*)0,help);
141:   TaoInitialize(&argc, &argv, (char*)0,help);

143:   showtime = PETSC_FALSE;
144:   PetscOptionsBool("-showtime","Display time elapsed","",showtime,&showtime,&flag); 
145:   user.mx = 8;
146:   PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,&flag); 
147:   user.nt = 8;
148:   PetscOptionsInt("-nt","Number of time steps","",user.nt,&user.nt,&flag); 
149:   user.ndata = 64;
150:   PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,&flag); 
151:   user.ns = 8;
152:   PetscOptionsInt("-ns","Number of samples","",user.ns,&user.ns,&flag); 
153:   user.alpha = 1.0;
154:   PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,&flag); 
155:   user.beta = 0.01;
156:   PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,&flag); 
157:   user.noise = 0.01;
158:   PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,&flag); 


161:   user.m = user.mx*user.mx*user.mx; /*  number of constraints per time step */
162:   user.n = user.m*(user.nt+1); /*  number of variables */
163:   user.ht = (PetscReal)1/user.nt;

165:   VecCreate(PETSC_COMM_WORLD,&user.u); 
166:   VecCreate(PETSC_COMM_WORLD,&user.y); 
167:   VecCreate(PETSC_COMM_WORLD,&user.c); 
168:   VecSetSizes(user.u,PETSC_DECIDE,user.n-user.m*user.nt); 
169:   VecSetSizes(user.y,PETSC_DECIDE,user.m*user.nt); 
170:   VecSetSizes(user.c,PETSC_DECIDE,user.m*user.nt); 
171:   VecSetFromOptions(user.u); 
172:   VecSetFromOptions(user.y); 
173:   VecSetFromOptions(user.c); 

175:   /* Create scatters for reduced spaces.
176:      If the state vector y and design vector u are partitioned as 
177:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
178:      then the solution vector x is organized as
179:      [y_1; u_1; y_2; u_2; ...; y_np; u_np]. 
180:      The index sets user.s_is and user.d_is correspond to the indices of the
181:      state and design variables owned by the current processor.
182:   */
183:   VecCreate(PETSC_COMM_WORLD,&x); 

185:   VecGetOwnershipRange(user.y,&lo,&hi); 
186:   VecGetOwnershipRange(user.u,&lo2,&hi2);  

188:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate); 
189:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user.s_is); 

191:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign); 
192:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user.d_is); 

194:   VecSetSizes(x,hi-lo+hi2-lo2,user.n); 
195:   VecSetFromOptions(x); 

197:   VecScatterCreate(x,user.s_is,user.y,is_allstate,&user.state_scatter); 
198:   VecScatterCreate(x,user.d_is,user.u,is_alldesign,&user.design_scatter); 
199:   ISDestroy(&is_alldesign); 
200:   ISDestroy(&is_allstate); 



204:   /* Create TAO solver and set desired solution method */
205:   TaoCreate(PETSC_COMM_WORLD,&tao); 
206:   TaoSetType(tao,"tao_lcl"); 
207:   user.lcl = (TAO_LCL*)(tao->data); 
208:   /* Set up initial vectors and matrices */
209:   ParabolicInitialize(&user); 

211:   Gather(x,user.y,user.state_scatter,user.u,user.design_scatter); 
212:   VecDuplicate(x,&x0); 
213:   VecCopy(x,x0); 

215:   /* Set solution vector with an initial guess */
216:   TaoSetInitialVector(tao,x); 
217:   TaoSetObjectiveRoutine(tao, FormFunction, (void *)&user); 
218:   TaoSetGradientRoutine(tao, FormGradient, (void *)&user); 
219:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, (void *)&user); 

221:   TaoSetJacobianStateRoutine(tao, user.Js, user.JsBlockPrec, user.JsInv, FormJacobianState, (void *)&user);  

223:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, (void *)&user); 

225:   TaoSetFromOptions(tao); 
226:   TaoSetStateDesignIS(tao,user.s_is,user.d_is); 

228:  /* SOLVE THE APPLICATION */
229:   PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,&flag); 
230:   PetscLogStageRegister("Trials",&stages[0]); 
231:   PetscLogStagePush(stages[0]); 
232:   user.ksp_its_initial = user.ksp_its;
233:   ksp_old = user.ksp_its;
234:   for (i=0; i<ntests; i++){
235:     PetscGetTime(&v1); 
236:     TaoSolve(tao);  
237:     PetscGetTime(&v2); 
238:     if (showtime) {
239:       PetscPrintf(PETSC_COMM_WORLD,"Elapsed time = %G\n",v2-v1); 
240:     }
241:     PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its-ksp_old); 
242:     VecCopy(x0,x); 
243:     TaoSetInitialVector(tao,x); 
244:   }
245:   PetscLogStagePop(); 
246:   PetscBarrier((PetscObject)x); 
247:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");
248:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);
249:   PetscPrintf(PETSC_COMM_WORLD,"Total KSP iterations over %D trial(s): ",ntests);
250:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its);
251:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations per trial: ");
252:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",(user.ksp_its-user.ksp_its_initial)/ntests);

254:   TaoGetTerminationReason(tao,&reason); 

256:   if (reason < 0)
257:   {
258:     PetscPrintf(MPI_COMM_WORLD, "TAO failed to converge.\n");
259:   }
260:   else
261:   {
262:     PetscPrintf(MPI_COMM_WORLD, "Optimization terminated with status %D.\n", reason);
263:   }


266:   /* Free TAO data structures */
267:   TaoDestroy(&tao); 

269:   /* Free PETSc data structures */
270:   VecDestroy(&x); 
271:   VecDestroy(&x0); 
272:   ParabolicDestroy(&user); 

274:   /* Finalize TAO, PETSc */
275:   TaoFinalize();
276:   PetscFinalize();

278:   return 0;
279: }
280: /* ------------------------------------------------------------------- */
283: /* 
284:    dwork = Qy - d  
285:    lwork = L*(u-ur)
286:    f = 1/2 * (dwork.dork + alpha*lwork.lwork)
287: */
288: PetscErrorCode FormFunction(TaoSolver tao,Vec X,PetscReal *f,void *ptr)
289: {
291:   PetscReal d1=0,d2=0;
292:   PetscInt i,j;
293:   AppCtx *user = (AppCtx*)ptr;
295:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
296:   Scatter_i(user->y,user->yi,user->yi_scatter,user->nt); 
297:   for (j=0; j<user->ns; j++){
298:     i = user->sample_times[j];
299:     MatMult(user->Qblock,user->yi[i],user->di[j]); 
300:   }
301:   Gather_i(user->dwork,user->di,user->di_scatter,user->ns); 
302:   VecAXPY(user->dwork,-1.0,user->d); 
303:   VecDot(user->dwork,user->dwork,&d1); 

305:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
306:   MatMult(user->L,user->uwork,user->lwork); 
307:   VecDot(user->lwork,user->lwork,&d2); 

309:   *f = 0.5 * (d1 + user->alpha*d2);
310:   return(0);
311: }

313: /* ------------------------------------------------------------------- */
316: /*  
317:     state: g_s = Q' *(Qy - d)
318:     design: g_d = alpha*L'*L*(u-ur)
319: */
320: PetscErrorCode FormGradient(TaoSolver tao,Vec X,Vec G,void *ptr)
321: {
323:   PetscInt i,j;
324:   AppCtx *user = (AppCtx*)ptr;
326:   CHKMEMQ;
327:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
328:   Scatter_i(user->y,user->yi,user->yi_scatter,user->nt); 
329:   for (j=0; j<user->ns; j++){
330:     i = user->sample_times[j];
331:     MatMult(user->Qblock,user->yi[i],user->di[j]); 
332:   }
333:   Gather_i(user->dwork,user->di,user->di_scatter,user->ns); 
334:   VecAXPY(user->dwork,-1.0,user->d); 
335:   Scatter_i(user->dwork,user->di,user->di_scatter,user->ns); 
336:   VecSet(user->ywork,0.0); 
337:   Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt); 
338:   for (j=0; j<user->ns; j++){
339:     i = user->sample_times[j];
340:     MatMult(user->QblockT,user->di[j],user->yiwork[i]); 
341:   }
342:   Gather_i(user->ywork,user->yiwork,user->yi_scatter,user->nt); 
343:   
344:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
345:   MatMult(user->L,user->uwork,user->lwork); 
346:   MatMult(user->LT,user->lwork,user->uwork); 
347:   VecScale(user->uwork, user->alpha); 

349:                       
350:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
351:   CHKMEMQ;
352:   return(0);
353: }

357: PetscErrorCode FormFunctionGradient(TaoSolver tao, Vec X, PetscReal *f, Vec G, void *ptr)
358: {
360:   PetscReal d1,d2;
361:   PetscInt i,j;
362:   AppCtx *user = (AppCtx*)ptr;
364:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 

366:   Scatter_i(user->y,user->yi,user->yi_scatter,user->nt); 
367:   for (j=0; j<user->ns; j++){
368:     i = user->sample_times[j];
369:     MatMult(user->Qblock,user->yi[i],user->di[j]); 
370:   }
371:   Gather_i(user->dwork,user->di,user->di_scatter,user->ns); 
372:   VecAXPY(user->dwork,-1.0,user->d); 
373:   VecDot(user->dwork,user->dwork,&d1); 
374:   Scatter_i(user->dwork,user->di,user->di_scatter,user->ns); 
375:   VecSet(user->ywork,0.0); 
376:   Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt); 
377:   for (j=0; j<user->ns; j++){
378:     i = user->sample_times[j];
379:     MatMult(user->QblockT,user->di[j],user->yiwork[i]); 
380:   }
381:   Gather_i(user->ywork,user->yiwork,user->yi_scatter,user->nt); 

383:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
384:   MatMult(user->L,user->uwork,user->lwork); 
385:   VecDot(user->lwork,user->lwork,&d2); 
386:   MatMult(user->LT,user->lwork,user->uwork); 
387:   VecScale(user->uwork, user->alpha); 
388:   *f = 0.5 * (d1 + user->alpha*d2); 
389:   
390:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
391:   return(0);

393: }


396: /* ------------------------------------------------------------------- */
399: /* A 
400: MatShell object
401: */
402: PetscErrorCode FormJacobianState(TaoSolver tao, Vec X, Mat *J, Mat* JPre, Mat* JInv, MatStructure* flag, void *ptr)
403: {
405:   AppCtx *user = (AppCtx*)ptr;
407:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
408:   VecSet(user->uwork,0); 
409:   VecAXPY(user->uwork,-1.0,user->u); 
410:   VecExp(user->uwork); 
411:   MatMult(user->Av,user->uwork,user->Av_u); 
412:   VecCopy(user->Av_u,user->Swork);  
413:   VecReciprocal(user->Swork); 
414:   MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN); 
415:   MatDiagonalScale(user->Divwork,PETSC_NULL,user->Swork); 
416:   if (user->dsg_formed) {
417:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
418:   } else {
419:     MatMatMultSymbolic(user->Divwork,user->Grad,PETSC_DECIDE,&user->DSG); 
420:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
421:     user->dsg_formed = PETSC_TRUE;
422:   }
423:   
424:   /* B = speye(nx^3) + ht*DSG; */
425:   MatScale(user->DSG,user->ht); 
426:   MatShift(user->DSG,1.0); 
427:     
428:   *JInv = user->JsInv;

430:   return(0);
431: }
432: /* ------------------------------------------------------------------- */
435: /* B */
436: PetscErrorCode FormJacobianDesign(TaoSolver tao, Vec X, Mat *J, void *ptr)
437: {
439:   AppCtx *user = (AppCtx*)ptr;

442:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 

444:   *J = user->Jd;

446:   return(0);
447: }



453: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y) 
454: {
456:   PetscInt i;
457:   void *ptr;
458:   AppCtx *user;
460:   MatShellGetContext(J_shell,&ptr); 
461:   user = (AppCtx*)ptr;

463:   Scatter_i(X,user->yi,user->yi_scatter,user->nt); 
464:   
465:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]); 

467:   for (i=1; i<user->nt; i++){
468:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]); 
469:     VecAXPY(user->yiwork[i],-1.0,user->yi[i-1]); 
470:   }

472:   Gather_i(Y,user->yiwork,user->yi_scatter,user->nt); 

474:   return(0);
475: }

479: PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y) 
480: {
482:   PetscInt i;
483:   void *ptr;
484:   AppCtx *user;
486:   MatShellGetContext(J_shell,&ptr); 
487:   user = (AppCtx*)ptr;

489:   Scatter_i(X,user->yi,user->yi_scatter,user->nt); 

491:   for (i=0; i<user->nt-1; i++){
492:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]); 
493:     VecAXPY(user->yiwork[i],-1.0,user->yi[i+1]); 
494:   }

496:   i = user->nt-1;
497:   MatMult(user->JsBlock,user->yi[i],user->yiwork[i]); 

499:   Gather_i(Y,user->yiwork,user->yi_scatter,user->nt); 

501:   return(0);
502: }

506: PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y) 
507: {
509:   void *ptr;
510:   AppCtx *user;
512:   MatShellGetContext(J_shell,&ptr); 
513:   user = (AppCtx*)ptr;
514:    
515:   MatMult(user->Grad,X,user->Swork);  
516:   VecPointwiseDivide(user->Swork,user->Swork,user->Av_u); 
517:   MatMult(user->Div,user->Swork,Y);  
518:   VecAYPX(Y,user->ht,X); 

520:   return(0);
521: }

525: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y) 
526: {
528:   void *ptr;
529:   PetscInt i;
530:   AppCtx *user;
532:   MatShellGetContext(J_shell,&ptr); 
533:   user = (AppCtx*)ptr;
534:  
535:   /* sdiag(1./v) */ 
536:   VecSet(user->uwork,0); 
537:   VecAXPY(user->uwork,-1.0,user->u); 
538:   VecExp(user->uwork);   

540:   /* sdiag(1./((Av*(1./v)).^2)) */
541:   MatMult(user->Av,user->uwork,user->Swork); 
542:   VecPointwiseMult(user->Swork,user->Swork,user->Swork); 
543:   VecReciprocal(user->Swork);  

545:   /* (Av * (sdiag(1./v) * b)) */ 
546:   VecPointwiseMult(user->uwork,user->uwork,X); 
547:   MatMult(user->Av,user->uwork,user->Twork); 

549:   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
550:   VecPointwiseMult(user->Swork,user->Twork,user->Swork);  

552:   Scatter_i(user->y,user->yi,user->yi_scatter,user->nt); 
553:   for (i=0; i<user->nt; i++){
554:     /* (sdiag(Grad*y(:,i)) */
555:     MatMult(user->Grad,user->yi[i],user->Twork); 
556:   
557:     /* ht * Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
558:     VecPointwiseMult(user->Twork,user->Twork,user->Swork);  
559:     MatMult(user->Div,user->Twork,user->yiwork[i]); 
560:     VecScale(user->yiwork[i],user->ht); 
561:   }
562:   Gather_i(Y,user->yiwork,user->yi_scatter,user->nt); 

564:   return(0);
565: }

569: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y) 
570: {
572:   void *ptr;
573:   PetscInt i;
574:   AppCtx *user;
576:   MatShellGetContext(J_shell,&ptr); 
577:   user = (AppCtx*)ptr;

579:   /* sdiag(1./((Av*(1./v)).^2)) */
580:   VecSet(user->uwork,0); 
581:   VecAXPY(user->uwork,-1.0,user->u); 
582:   VecExp(user->uwork); 
583:   MatMult(user->Av,user->uwork,user->Rwork); 
584:   VecPointwiseMult(user->Rwork,user->Rwork,user->Rwork); 
585:   VecReciprocal(user->Rwork); 

587:   VecSet(Y,0.0); 
588:   Scatter_i(user->y,user->yi,user->yi_scatter,user->nt); 
589:   Scatter_i(X,user->yiwork,user->yi_scatter,user->nt); 
590:   for (i=0; i<user->nt; i++){
591:     /* (Div' * b(:,i)) */
592:     MatMult(user->Grad,user->yiwork[i],user->Swork); 

594:     /* sdiag(Grad*y(:,i)) */
595:     MatMult(user->Grad,user->yi[i],user->Twork); 

597:     /* (sdiag(Grad*y(:,i)) * (Div' * b(:,i))) */
598:     VecPointwiseMult(user->Twork,user->Swork,user->Twork); 

600:     /* (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i)))) */
601:     VecPointwiseMult(user->Twork,user->Rwork,user->Twork); 

603:     /* (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
604:     MatMult(user->AvT,user->Twork,user->yiwork[i]); 
605:   
606:     /* sdiag(1./v) * (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
607:     VecPointwiseMult(user->yiwork[i],user->uwork,user->yiwork[i]); 
608:     VecAXPY(Y,user->ht,user->yiwork[i]); 
609:   }

611:   return(0);
612: }

616: PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y) 
617: {
619:   void *ptr;
620:   AppCtx *user;
622:   PCShellGetContext(PC_shell,&ptr); 
623:   user = (AppCtx*)ptr;

625:   if (user->dsg_formed) {
626:     MatSOR(user->DSG,X,1.0,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP),0.0,1,1,Y); 
627:   }
628:   else {
629:     printf("DSG not formed"); abort();
630:   }

632:   return(0);
633: }

637: PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
638: {
640:   void *ptr;
641:   AppCtx *user;
642:   PetscReal tau;
643:   PetscInt its,i;
645:   MatShellGetContext(J_shell,&ptr); 
646:   user = (AppCtx*)ptr;

648:   if (Y == user->ytrue) {
649:     KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
650:   } else if (user->lcl) {
651:     tau = user->lcl->tau[user->lcl->solve_type];
652:     KSPSetTolerances(user->solver,tau,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
653:   }

655:   Scatter_i(X,user->yi,user->yi_scatter,user->nt); 
656:   
657:   KSPSolve(user->solver,user->yi[0],user->yiwork[0]); 

659:   KSPGetIterationNumber(user->solver,&its); 
660:   user->ksp_its = user->ksp_its + its;


663:   for (i=1; i<user->nt; i++){
664:     VecAXPY(user->yi[i],1.0,user->yiwork[i-1]); 
665:     KSPSolve(user->solver,user->yi[i],user->yiwork[i]); 

667:     KSPGetIterationNumber(user->solver,&its); 
668:     user->ksp_its = user->ksp_its + its;

670:   }

672:   Gather_i(Y,user->yiwork,user->yi_scatter,user->nt); 


675:   return(0);
676: }

680: PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
681: {
683:   void *ptr;
684:   PetscReal tau;
685:   AppCtx *user;
686:   PetscInt its,i;

689:   MatShellGetContext(J_shell,&ptr); 
690:   user = (AppCtx*)ptr;
691:   if (user->lcl) {
692:     tau = user->lcl->tau[user->lcl->solve_type];
693:     KSPSetTolerances(user->solver,tau,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
694:   }


697:   Scatter_i(X,user->yi,user->yi_scatter,user->nt); 
698:   
699:   i = user->nt - 1;
700:   KSPSolve(user->solver,user->yi[i],user->yiwork[i]); 
701:  
702:   KSPGetIterationNumber(user->solver,&its); 
703:   user->ksp_its = user->ksp_its + its;

705:   for (i=user->nt-2; i>=0; i--){
706:     VecAXPY(user->yi[i],1.0,user->yiwork[i+1]); 
707:     KSPSolve(user->solver,user->yi[i],user->yiwork[i]); 

709:     KSPGetIterationNumber(user->solver,&its); 
710:     user->ksp_its = user->ksp_its + its;

712:   }

714:   Gather_i(Y,user->yiwork,user->yi_scatter,user->nt); 
715:   return(0);
716: }

720: PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
721: {
723:   void *ptr;
724:   AppCtx *user;
726:   MatShellGetContext(J_shell,&ptr); 
727:   user = (AppCtx*)ptr;

729:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,new_shell); 
730:   MatShellSetOperation(*new_shell,MATOP_MULT,(void(*)(void))StateMatMult); 
731:   MatShellSetOperation(*new_shell,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate); 
732:   MatShellSetOperation(*new_shell,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose); 
733:   MatShellSetOperation(*new_shell,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal); 
734:   
735:   return(0);
736: }

740: PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
741: {
743:   void *ptr;
744:   AppCtx *user;
746:   MatShellGetContext(J_shell,&ptr); 
747:   user = (AppCtx*)ptr;
748:    VecCopy(user->js_diag,X); 
749:   return(0);
750:   
751: }

755: PetscErrorCode FormConstraints(TaoSolver tao, Vec X, Vec C, void *ptr)
756: {
757:   /* con = Ay - q, A = [B  0  0 ... 0; 
758:                        -I  B  0 ... 0; 
759:                         0 -I  B ... 0;
760:                              ...     ;
761:                         0    ... -I B] 
762:      B = ht * Div * Sigma * Grad + eye */
764:   PetscInt i;
765:   AppCtx *user = (AppCtx*)ptr;
767:    
768:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
769:   Scatter_i(user->y,user->yi,user->yi_scatter,user->nt); 
770:    
771:   MatMult(user->JsBlock,user->yi[0],user->yiwork[0]); 

773:   for (i=1; i<user->nt; i++){
774:     MatMult(user->JsBlock,user->yi[i],user->yiwork[i]); 
775:     VecAXPY(user->yiwork[i],-1.0,user->yi[i-1]);                     
776:   }

778:   Gather_i(C,user->yiwork,user->yi_scatter,user->nt); 
779:   VecAXPY(C,-1.0,user->q); 

781:   return(0);
782: }


787: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
788: {
791:   VecScatterBegin(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD); 
792:   VecScatterEnd(s_scat,x,state,INSERT_VALUES,SCATTER_FORWARD); 

794:   VecScatterBegin(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD); 
795:   VecScatterEnd(d_scat,x,design,INSERT_VALUES,SCATTER_FORWARD); 
796:   return(0);
797: }

801: PetscErrorCode Scatter_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
802: {
804:   PetscInt i;
806:   for (i=0; i<nt; i++){
807:     VecScatterBegin(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD); 
808:     VecScatterEnd(scat[i],y,yi[i],INSERT_VALUES,SCATTER_FORWARD); 
809:   }
810:   return(0);
811: }


816: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
817: {
820:   VecScatterBegin(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE); 
821:   VecScatterEnd(s_scat,state,x,INSERT_VALUES,SCATTER_REVERSE); 
822:   VecScatterBegin(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE); 
823:   VecScatterEnd(d_scat,design,x,INSERT_VALUES,SCATTER_REVERSE); 
824:   return(0);
825: }

829: PetscErrorCode Gather_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
830: {
832:   PetscInt i;
834:   for (i=0; i<nt; i++){
835:     VecScatterBegin(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE); 
836:     VecScatterEnd(scat[i],yi[i],y,INSERT_VALUES,SCATTER_REVERSE); 
837:   }
838:   return(0);
839: }
840:   
841:     
844: PetscErrorCode ParabolicInitialize(AppCtx *user)
845: {
847:   PetscInt m,n,i,j,k,linear_index,istart,iend,iblock,lo,hi,lo2,hi2;
848:   Vec XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork,yi,di,bc;
849:   PetscReal *x, *y, *z;
850:   PetscReal h,stime;
851:   PetscScalar hinv,neg_hinv,half = 0.5,sqrt_beta;
852:   PetscInt im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
853:   PetscReal xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
854:   PetscScalar v,vx,vy,vz;
855:   IS is_from_y,is_to_yi,is_from_d,is_to_di;
856:   /* Data locations */
857:   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,     
858:                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,     
859:                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,     
860:                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,     
861:                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,     
862:                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,     
863:                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,     
864:                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
865:   
866:   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,     
867:                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,     
868:                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,     
869:                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,     
870:                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,     
871:                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,     
872:                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,     
873:                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
874:   
875:   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,     
876:                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,     
877:                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,     
878:                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,     
879:                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,     
880:                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,     
881:                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,     
882:                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};

885:   PetscMalloc(user->mx*sizeof(PetscReal),&x); 
886:   PetscMalloc(user->mx*sizeof(PetscReal),&y); 
887:   PetscMalloc(user->mx*sizeof(PetscReal),&z); 
888:   user->jformed = PETSC_FALSE;
889:   user->dsg_formed = PETSC_FALSE;

891:   n = user->mx * user->mx * user->mx;
892:   m = 3 * user->mx * user->mx * (user->mx-1);
893:   sqrt_beta = PetscSqrtScalar(user->beta);

895:   user->ksp_its = 0;
896:   user->ksp_its_initial = 0;

898:   stime = (PetscReal)user->nt/user->ns;
899:   PetscMalloc(user->ns*sizeof(PetscInt),&user->sample_times); 
900:   for (i=0; i<user->ns; i++){
901:     /* round((i+1)*stime)-1 */
902:     user->sample_times[i] = (PetscInt)(stime*((PetscReal)i+1.0)-0.5);
903:   }

905:   VecCreate(PETSC_COMM_WORLD,&XX); 
906:   VecCreate(PETSC_COMM_WORLD,&user->q); 
907:   VecSetSizes(XX,PETSC_DECIDE,n); 
908:   VecSetSizes(user->q,PETSC_DECIDE,n*user->nt); 
909:   VecSetFromOptions(XX); 
910:   VecSetFromOptions(user->q); 

912:   VecDuplicate(XX,&YY); 
913:   VecDuplicate(XX,&ZZ); 
914:   VecDuplicate(XX,&XXwork); 
915:   VecDuplicate(XX,&YYwork); 
916:   VecDuplicate(XX,&ZZwork); 
917:   VecDuplicate(XX,&UTwork); 
918:   VecDuplicate(XX,&user->utrue); 
919:   VecDuplicate(XX,&bc); 

921:   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
922:   h = 1.0/user->mx; 
923:   hinv = user->mx;
924:   neg_hinv = -hinv;
925:  
926:   VecGetOwnershipRange(XX,&istart,&iend); 
927:   for (linear_index=istart; linear_index<iend; linear_index++){
928:     i = linear_index % user->mx;
929:     j = ((linear_index-i)/user->mx) % user->mx;
930:     k = ((linear_index-i)/user->mx-j) / user->mx;
931:     vx = h*(i+0.5);
932:     vy = h*(j+0.5);
933:     vz = h*(k+0.5);        
934:     VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES); 
935:     VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES); 
936:     VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES); 
937:     if ((vx<0.6) && (vx>0.4) && (vy<0.6) && (vy>0.4) && (vy<0.6) && (vz<0.6) && (vz>0.4)){
938:       v = 1000.0;
939:       VecSetValues(bc,1,&linear_index,&v,INSERT_VALUES); 
940:     }
941:   }

943:   VecAssemblyBegin(XX); 
944:   VecAssemblyEnd(XX); 
945:   VecAssemblyBegin(YY); 
946:   VecAssemblyEnd(YY); 
947:   VecAssemblyBegin(ZZ); 
948:   VecAssemblyEnd(ZZ); 
949:   VecAssemblyBegin(bc); 
950:   VecAssemblyEnd(bc);   

952:   /* Compute true parameter function
953:      ut = 0.5 + exp(-10*((x-0.5)^2+(y-0.5)^2+(z-0.5)^2)) */
954:   VecCopy(XX,XXwork); 
955:   VecCopy(YY,YYwork); 
956:   VecCopy(ZZ,ZZwork); 

958:   VecShift(XXwork,-0.5); 
959:   VecShift(YYwork,-0.5); 
960:   VecShift(ZZwork,-0.5); 

962:   VecPointwiseMult(XXwork,XXwork,XXwork); 
963:   VecPointwiseMult(YYwork,YYwork,YYwork); 
964:   VecPointwiseMult(ZZwork,ZZwork,ZZwork); 

966:   VecCopy(XXwork,user->utrue); 
967:   VecAXPY(user->utrue,1.0,YYwork); 
968:   VecAXPY(user->utrue,1.0,ZZwork); 
969:   VecScale(user->utrue,-10.0); 
970:   VecExp(user->utrue); 
971:   VecShift(user->utrue,0.5); 

973:   VecDestroy(&XX); 
974:   VecDestroy(&YY); 
975:   VecDestroy(&ZZ); 
976:   VecDestroy(&XXwork); 
977:   VecDestroy(&YYwork); 
978:   VecDestroy(&ZZwork); 
979:   VecDestroy(&UTwork); 
980:  
981:   /* Initial guess and reference model */
982:   VecDuplicate(user->utrue,&user->ur); 
983:   VecSet(user->ur,0.0); 

985:   /* Generate Grad matrix */
986:   MatCreate(PETSC_COMM_WORLD,&user->Grad); 
987:   MatSetSizes(user->Grad,PETSC_DECIDE,PETSC_DECIDE,m,n); 
988:   MatSetFromOptions(user->Grad); 
989:   MatMPIAIJSetPreallocation(user->Grad,2,PETSC_NULL,2,PETSC_NULL); 
990:   MatSeqAIJSetPreallocation(user->Grad,2,PETSC_NULL); 
991:   MatGetOwnershipRange(user->Grad,&istart,&iend); 

993:   for (i=istart; i<iend; i++){
994:     if (i<m/3){
995:       iblock = i / (user->mx-1);
996:       j = iblock*user->mx + (i % (user->mx-1));
997:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
998:       j = j+1;
999:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES); 
1000:     }
1001:     if (i>=m/3 && i<2*m/3){
1002:       iblock = (i-m/3) / (user->mx*(user->mx-1));
1003:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
1004:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1005:       j = j + user->mx;
1006:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES); 
1007:     }
1008:     if (i>=2*m/3){
1009:       j = i-2*m/3;
1010:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1011:       j = j + user->mx*user->mx;
1012:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES); 
1013:     }
1014:   }

1016:   MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY); 
1017:   MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY); 


1020:   /* Generate arithmetic averaging matrix Av */
1021:   MatCreate(PETSC_COMM_WORLD,&user->Av); 
1022:   MatSetSizes(user->Av,PETSC_DECIDE,PETSC_DECIDE,m,n); 
1023:   MatSetFromOptions(user->Av); 
1024:   MatMPIAIJSetPreallocation(user->Av,2,PETSC_NULL,2,PETSC_NULL); 
1025:   MatSeqAIJSetPreallocation(user->Av,2,PETSC_NULL); 
1026:   MatGetOwnershipRange(user->Av,&istart,&iend); 

1028:   for (i=istart; i<iend; i++){
1029:     if (i<m/3){
1030:       iblock = i / (user->mx-1);
1031:       j = iblock*user->mx + (i % (user->mx-1));
1032:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1033:       j = j+1;
1034:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1035:     }
1036:     if (i>=m/3 && i<2*m/3){
1037:       iblock = (i-m/3) / (user->mx*(user->mx-1));
1038:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
1039:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1040:       j = j + user->mx;
1041:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1042:     }
1043:     if (i>=2*m/3){
1044:       j = i-2*m/3;
1045:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1046:       j = j + user->mx*user->mx;
1047:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1048:     }
1049:   }

1051:   MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY); 
1052:   MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY); 

1054:   /* Generate transpose of averaging matrix Av */
1055:   MatTranspose(user->Av,MAT_INITIAL_MATRIX,&user->AvT); 



1059:   MatCreate(PETSC_COMM_WORLD,&user->L); 
1060:   MatSetSizes(user->L,PETSC_DECIDE,PETSC_DECIDE,m+n,n); 
1061:   MatSetFromOptions(user->L); 
1062:   MatMPIAIJSetPreallocation(user->L,2,PETSC_NULL,2,PETSC_NULL); 
1063:   MatSeqAIJSetPreallocation(user->L,2,PETSC_NULL); 
1064:   MatGetOwnershipRange(user->L,&istart,&iend);

1066:   for (i=istart; i<iend; i++){
1067:     if (i<m/3){
1068:       iblock = i / (user->mx-1);
1069:       j = iblock*user->mx + (i % (user->mx-1));
1070:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1071:       j = j+1;
1072:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES); 
1073:     }
1074:     if (i>=m/3 && i<2*m/3){
1075:       iblock = (i-m/3) / (user->mx*(user->mx-1));
1076:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
1077:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1078:       j = j + user->mx;
1079:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES); 
1080:     }
1081:     if (i>=2*m/3 && i<m){
1082:       j = i-2*m/3;
1083:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1084:       j = j + user->mx*user->mx;
1085:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES); 
1086:     }
1087:     if (i>=m){
1088:       j = i - m;
1089:       MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES); 
1090:     }
1091:   }

1093:   MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY); 
1094:   MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY); 

1096:   MatScale(user->L,PetscPowScalar(h,1.5)); 

1098:   /* Generate Div matrix */
1099:   MatTranspose(user->Grad,MAT_INITIAL_MATRIX,&user->Div);


1102:   /* Build work vectors and matrices */
1103:   VecCreate(PETSC_COMM_WORLD,&user->S); 
1104:   VecSetSizes(user->S, PETSC_DECIDE, user->mx*user->mx*(user->mx-1)*3); 
1105:   VecSetFromOptions(user->S); 

1107:   VecCreate(PETSC_COMM_WORLD,&user->lwork); 
1108:   VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx);  
1109:   VecSetFromOptions(user->lwork); 

1111:   MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork); 

1113:   MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork); 

1115:   VecCreate(PETSC_COMM_WORLD,&user->d); 
1116:   VecSetSizes(user->d,PETSC_DECIDE,user->ndata*user->nt); 
1117:   VecSetFromOptions(user->d); 

1119:   VecDuplicate(user->S,&user->Swork); 
1120:   VecDuplicate(user->S,&user->Av_u); 
1121:   VecDuplicate(user->S,&user->Twork); 
1122:   VecDuplicate(user->S,&user->Rwork); 
1123:   VecDuplicate(user->y,&user->ywork); 
1124:   VecDuplicate(user->u,&user->uwork); 
1125:   VecDuplicate(user->u,&user->js_diag); 
1126:   VecDuplicate(user->c,&user->cwork); 
1127:   VecDuplicate(user->d,&user->dwork); 

1129:   /* Create matrix-free shell user->Js for computing A*x */
1130:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m*user->nt,user,&user->Js); 
1131:   MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult); 
1132:   MatShellSetOperation(user->Js,MATOP_DUPLICATE,(void(*)(void))StateMatDuplicate); 
1133:   MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMultTranspose); 
1134:   MatShellSetOperation(user->Js,MATOP_GET_DIAGONAL,(void(*)(void))StateMatGetDiagonal); 

1136:   /* Diagonal blocks of user->Js */
1137:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsBlock); 
1138:   MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateMatBlockMult); 
1139:   /* Blocks are symmetric */
1140:   MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockMult); 

1142:   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
1143:      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
1144:      This is an SSOR preconditioner for user->JsBlock. */
1145:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m,user->m,user,&user->JsBlockPrec); 
1146:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT,(void(*)(void))StateMatBlockPrecMult);  
1147:   /* JsBlockPrec is symmetric */
1148:   MatShellSetOperation(user->JsBlockPrec,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatBlockPrecMult); 
1149:   MatSetOption(user->JsBlockPrec,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 
1150:   
1151:   /* Create a matrix-free shell user->Jd for computing B*x */
1152:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m,user,&user->Jd); 
1153:   MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult); 
1154:   MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose); 

1156:   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
1157:   MatCreateShell(PETSC_COMM_WORLD,PETSC_DETERMINE,PETSC_DETERMINE,user->m*user->nt,user->m*user->nt,user,&user->JsInv); 
1158:   MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateMatInvMult); 
1159:   MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatInvTransposeMult); 

1161:   /* Solver options and tolerances */
1162:   KSPCreate(PETSC_COMM_WORLD,&user->solver); 
1163:   KSPSetType(user->solver,KSPCG); 
1164:   KSPSetOperators(user->solver,user->JsBlock,user->JsBlockPrec,DIFFERENT_NONZERO_PATTERN); 
1165:   KSPSetInitialGuessNonzero(user->solver,PETSC_FALSE); 
1166:   KSPSetTolerances(user->solver,1e-4,1e-20,1e3,500); 
1167:   KSPGetPC(user->solver,&user->prec); 
1168:   PCSetType(user->prec,PCSHELL); 

1170:   PCShellSetApply(user->prec,StateMatBlockPrecMult); 
1171:   PCShellSetApplyTranspose(user->prec,StateMatBlockPrecMult); 
1172:   PCShellSetContext(user->prec,user); 


1175:   /* Create scatter from y to y_1,y_2,...,y_nt */
1176:   PetscMalloc(user->nt*user->m*sizeof(PetscInt),&user->yi_scatter);
1177:   VecCreate(PETSC_COMM_WORLD,&yi); 
1178:   VecSetSizes(yi,PETSC_DECIDE,user->mx*user->mx*user->mx); 
1179:   VecSetFromOptions(yi); 
1180:   VecDuplicateVecs(yi,user->nt,&user->yi); 
1181:   VecDuplicateVecs(yi,user->nt,&user->yiwork); 

1183:   VecGetOwnershipRange(user->y,&lo2,&hi2); 
1184:   istart = 0;
1185:   for (i=0; i<user->nt; i++){
1186:     VecGetOwnershipRange(user->yi[i],&lo,&hi); 
1187:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_yi); 
1188:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y); 
1189:     VecScatterCreate(user->y,is_from_y,user->yi[i],is_to_yi,&user->yi_scatter[i]); 
1190:     istart = istart + hi-lo;
1191:     ISDestroy(&is_to_yi); 
1192:     ISDestroy(&is_from_y); 
1193:   }
1194:   VecDestroy(&yi); 

1196:   /* Create scatter from d to d_1,d_2,...,d_ns */
1197:   PetscMalloc(user->ns*user->ndata*sizeof(PetscInt),&user->di_scatter);
1198:   VecCreate(PETSC_COMM_WORLD,&di); 
1199:   VecSetSizes(di,PETSC_DECIDE,user->ndata); 
1200:   VecSetFromOptions(di); 
1201:   VecDuplicateVecs(di,user->ns,&user->di); 
1202:   VecGetOwnershipRange(user->d,&lo2,&hi2); 
1203:   istart = 0;
1204:   for (i=0; i<user->ns; i++){
1205:     VecGetOwnershipRange(user->di[i],&lo,&hi); 
1206:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_to_di); 
1207:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d); 
1208:     VecScatterCreate(user->d,is_from_d,user->di[i],is_to_di,&user->di_scatter[i]); 
1209:     istart = istart + hi-lo;
1210:     ISDestroy(&is_to_di); 
1211:     ISDestroy(&is_from_d); 
1212:   }
1213:   VecDestroy(&di); 

1215:   /* Assemble RHS of forward problem */
1216:   VecCopy(bc,user->yiwork[0]);
1217:   for (i=1; i<user->nt; i++){
1218:     VecSet(user->yiwork[i],0.0); 
1219:   }
1220:   Gather_i(user->q,user->yiwork,user->yi_scatter,user->nt); 
1221:   VecDestroy(&bc); 

1223:   /* Compute true state function yt given ut */
1224:   VecCreate(PETSC_COMM_WORLD,&user->ytrue); 
1225:   VecSetSizes(user->ytrue,PETSC_DECIDE,n*user->nt); 
1226:   VecSetFromOptions(user->ytrue); 

1228:   /* First compute Av_u = Av*exp(-u) */
1229:   VecSet(user->uwork,0);
1230:   VecAXPY(user->uwork,-1.0,user->utrue); /*  Note: user->utrue */
1231:   VecExp(user->uwork); 
1232:   MatMult(user->Av,user->uwork,user->Av_u); 

1234:   MatMatMultSymbolic(user->Div,user->Grad,PETSC_DEFAULT,&user->DSG); 
1235:   user->dsg_formed = PETSC_TRUE;

1237:   /* Next form DSG = Div*S*Grad */
1238:   MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN); 
1239:   MatDiagonalScale(user->Divwork,PETSC_NULL,user->Av_u); 
1240:   if (user->dsg_formed) {
1241:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
1242:   } else {
1243:     MatMatMultSymbolic(user->Div,user->Grad,PETSC_DEFAULT,&user->DSG); 
1244:     MatMatMultNumeric(user->Div,user->Grad,user->DSG); 
1245:     user->dsg_formed = PETSC_TRUE;
1246:   }
1247:   /* B = speye(nx^3) + ht*DSG; */
1248:   MatScale(user->DSG,user->ht); 
1249:   MatShift(user->DSG,1.0); 

1251:   /* Now solve for ytrue */
1252:   StateMatInvMult(user->Js,user->q,user->ytrue); 

1254:   /* Initial guess y0 for state given u0 */

1256:   /* First compute Av_u = Av*exp(-u) */
1257:   VecSet(user->uwork,0);
1258:   VecAXPY(user->uwork,-1.0,user->u); /*  Note: user->u */
1259:   VecExp(user->uwork); 
1260:   MatMult(user->Av,user->uwork,user->Av_u); 

1262:   /* Next form DSG = Div*S*Grad */
1263:   MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN); 
1264:   MatDiagonalScale(user->Divwork,PETSC_NULL,user->Av_u); 
1265:   if (user->dsg_formed) {
1266:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
1267:   } else {
1268:     MatMatMultSymbolic(user->Div,user->Grad,PETSC_DEFAULT,&user->DSG); 
1269:     MatMatMultNumeric(user->Div,user->Grad,user->DSG); 
1270:     user->dsg_formed = PETSC_TRUE;
1271:   }
1272:   /* B = speye(nx^3) + ht*DSG; */
1273:   MatScale(user->DSG,user->ht); 
1274:   MatShift(user->DSG,1.0); 

1276:   /* Now solve for y */
1277:   user->lcl->solve_type = LCL_FORWARD1;
1278:   StateMatInvMult(user->Js,user->q,user->y); 
1279:   
1280:   /* Construct projection matrix Q, a block diagonal matrix consisting of nt copies of Qblock along the diagonal */
1281:   MatCreate(PETSC_COMM_WORLD,&user->Qblock); 
1282:   MatSetSizes(user->Qblock,PETSC_DECIDE,PETSC_DECIDE,user->ndata,n); 
1283:   MatSetFromOptions(user->Qblock); 
1284:   MatMPIAIJSetPreallocation(user->Qblock,8,PETSC_NULL,8,PETSC_NULL); 
1285:   MatSeqAIJSetPreallocation(user->Qblock,8,PETSC_NULL); 

1287:  
1288:   for (i=0; i<user->mx; i++){
1289:     x[i] = h*(i+0.5);
1290:     y[i] = h*(i+0.5);
1291:     z[i] = h*(i+0.5);
1292:   }
1293:   
1294:   MatGetOwnershipRange(user->Qblock,&istart,&iend);

1296:   nx = user->mx; ny = user->mx; nz = user->mx;
1297:   for (i=istart; i<iend; i++){
1298:     
1299:     xri = xr[i];
1300:     im = 0;
1301:     xim = x[im];
1302:     while (xri>xim && im<nx){
1303:       im = im+1;
1304:       xim = x[im];
1305:     }
1306:     indx1 = im-1;
1307:     indx2 = im;
1308:     dx1 = xri - x[indx1];
1309:     dx2 = x[indx2] - xri;

1311:     yri = yr[i];
1312:     im = 0;
1313:     yim = y[im];
1314:     while (yri>yim && im<ny){
1315:       im = im+1;
1316:       yim = y[im];
1317:     }
1318:     indy1 = im-1;
1319:     indy2 = im;
1320:     dy1 = yri - y[indy1];
1321:     dy2 = y[indy2] - yri;
1322:     
1323:     zri = zr[i];
1324:     im = 0;
1325:     zim = z[im];
1326:     while (zri>zim && im<nz){
1327:       im = im+1;
1328:       zim = z[im];
1329:     }
1330:     indz1 = im-1;
1331:     indz2 = im;
1332:     dz1 = zri - z[indz1];
1333:     dz2 = z[indz2] - zri;

1335:     Dx = x[indx2] - x[indx1];
1336:     Dy = y[indy2] - y[indy1];
1337:     Dz = z[indz2] - z[indz1];

1339:     j = indx1 + indy1*nx + indz1*nx*ny;
1340:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1341:     MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES); 

1343:     j = indx1 + indy1*nx + indz2*nx*ny;
1344:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1345:     MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES); 

1347:     j = indx1 + indy2*nx + indz1*nx*ny;
1348:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1349:     MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES); 

1351:     j = indx1 + indy2*nx + indz2*nx*ny;
1352:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1353:     MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES); 

1355:     j = indx2 + indy1*nx + indz1*nx*ny;
1356:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1357:     MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES); 

1359:     j = indx2 + indy1*nx + indz2*nx*ny;
1360:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1361:     MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES); 

1363:     j = indx2 + indy2*nx + indz1*nx*ny;
1364:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1365:     MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES); 

1367:     j = indx2 + indy2*nx + indz2*nx*ny;
1368:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1369:     MatSetValues(user->Qblock,1,&i,1,&j,&v,INSERT_VALUES); 

1371:   }

1373:   MatAssemblyBegin(user->Qblock,MAT_FINAL_ASSEMBLY); 
1374:   MatAssemblyEnd(user->Qblock,MAT_FINAL_ASSEMBLY); 


1377:   MatTranspose(user->Qblock,MAT_INITIAL_MATRIX,&user->QblockT); 
1378:  

1380:   MatTranspose(user->L,MAT_INITIAL_MATRIX,&user->LT); 

1382:   /* Add noise to the measurement data */
1383:   VecSet(user->ywork,1.0); 
1384:   VecAYPX(user->ywork,user->noise,user->ytrue); 
1385:   Scatter_i(user->ywork,user->yiwork,user->yi_scatter,user->nt); 
1386:   for (j=0; j<user->ns; j++){
1387:     i = user->sample_times[j];
1388:     MatMult(user->Qblock,user->yiwork[i],user->di[j]);
1389:   }
1390:   Gather_i(user->d,user->di,user->di_scatter,user->ns); 

1392:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1393:   KSPSetFromOptions(user->solver); 
1394:   PetscFree(x); 
1395:   PetscFree(y); 
1396:   PetscFree(z); 


1399:   return(0);
1400: }

1404: PetscErrorCode ParabolicDestroy(AppCtx *user)
1405: {
1407:   PetscInt i;
1409:   MatDestroy(&user->Qblock); 
1410:   MatDestroy(&user->QblockT); 
1411:   MatDestroy(&user->Div); 
1412:   MatDestroy(&user->Divwork); 
1413:   MatDestroy(&user->Grad); 
1414:   MatDestroy(&user->Av); 
1415:   MatDestroy(&user->Avwork); 
1416:   MatDestroy(&user->AvT); 
1417:   MatDestroy(&user->DSG); 
1418:   MatDestroy(&user->L); 
1419:   MatDestroy(&user->LT); 
1420:   KSPDestroy(&user->solver); 
1421:   MatDestroy(&user->Js); 
1422:   MatDestroy(&user->Jd); 
1423:   MatDestroy(&user->JsInv); 
1424:   MatDestroy(&user->JsBlock); 
1425:   MatDestroy(&user->JsBlockPrec); 
1426:   VecDestroy(&user->u); 
1427:   VecDestroy(&user->uwork); 
1428:   VecDestroy(&user->utrue); 
1429:   VecDestroy(&user->y); 
1430:   VecDestroy(&user->ywork); 
1431:   VecDestroy(&user->ytrue); 
1432:   VecDestroyVecs(user->nt,&user->yi); 
1433:   VecDestroyVecs(user->nt,&user->yiwork); 
1434:   VecDestroyVecs(user->ns,&user->di); 
1435:   PetscFree(user->yi); 
1436:   PetscFree(user->yiwork); 
1437:   PetscFree(user->di); 
1438:   VecDestroy(&user->c); 
1439:   VecDestroy(&user->cwork); 
1440:   VecDestroy(&user->ur); 
1441:   VecDestroy(&user->q); 
1442:   VecDestroy(&user->d); 
1443:   VecDestroy(&user->dwork); 
1444:   VecDestroy(&user->lwork); 
1445:   VecDestroy(&user->S); 
1446:   VecDestroy(&user->Swork); 
1447:   VecDestroy(&user->Av_u); 
1448:   VecDestroy(&user->Twork); 
1449:   VecDestroy(&user->Rwork); 
1450:   VecDestroy(&user->js_diag); 
1451:   ISDestroy(&user->s_is); 
1452:   ISDestroy(&user->d_is); 
1453:   VecScatterDestroy(&user->state_scatter); 
1454:   VecScatterDestroy(&user->design_scatter); 
1455:   for (i=0; i<user->nt; i++){
1456:     VecScatterDestroy(&user->yi_scatter[i]); 
1457:   }
1458:   for (i=0; i<user->ns; i++){
1459:     VecScatterDestroy(&user->di_scatter[i]); 
1460:   }
1461:   PetscFree(user->yi_scatter); 
1462:   PetscFree(user->di_scatter); 
1463:   PetscFree(user->sample_times); 
1464:   return(0);
1465: }

1469: PetscErrorCode ParabolicMonitor(TaoSolver tao, void *ptr)
1470: {
1472:   Vec X;
1473:   PetscReal unorm,ynorm;
1474:   AppCtx *user = (AppCtx*)ptr;
1476:   TaoGetSolutionVector(tao,&X); 
1477:   Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
1478:   VecAXPY(user->ywork,-1.0,user->ytrue); 
1479:   VecAXPY(user->uwork,-1.0,user->utrue); 
1480:   VecNorm(user->uwork,NORM_2,&unorm); 
1481:   VecNorm(user->ywork,NORM_2,&ynorm); 
1482:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%G ||y-yt||=%G\n",unorm,ynorm); 
1483:   return(0);
1484: }