Actual source code: elliptic.c

tao-2.1-p0 2012-07-24
  1: #include "tao-private/taosolver_impl.h"
  2: #include "src/pde_constrained/impls/lcl/lcl.h"
  3: #include "petsctime.h"


  6: /*T
  7:    Concepts: TAO - Solving a system of nonlinear equations, nonlinear ;east squares
  8:    Routines: TaoInitialize(); TaoFinalize(); 
  9:    Routines: TaoCreate();
 10:    Routines: TaoSetType(); 
 11:    Routines: TaoSetInitialVector();
 12:    Routines: TaoSetObjectiveRoutine();
 13:    Routines: TaoSetGradientRoutine();
 14:    Routines: TaoSetConstraintsRoutine();
 15:    Routines: TaoSetJacobianStateRoutine();
 16:    Routines: TaoSetJacobianDesignRoutine();
 17:    Routines: TaoSetStateDesignIS();
 18:    Routines: TaoSetFromOptions();
 19:    Routines: TaoSetHistory(); TaoGetHistory();
 20:    Routines: TaoSolve();
 21:    Routines: TaoGetTerminationReason(); TaoDestroy(); 
 22:    Processors: n
 23: T*/

 25: typedef struct {
 26:   PetscInt n; /* Number of total variables */
 27:   PetscInt m; /* Number of constraints */
 28:   PetscInt nstate;
 29:   PetscInt ndesign;
 30:   PetscInt mx; /* grid points in each direction */
 31:   PetscInt ns; /* Number of data samples (1<=ns<=8) 
 32:                   Currently only ns=1 is supported */
 33:   PetscInt ndata; /* Number of data points per sample */
 34:   IS s_is;
 35:   IS d_is;

 37:   VecScatter state_scatter;
 38:   VecScatter design_scatter;
 39:   VecScatter *yi_scatter, *di_scatter;
 40:   Vec suby,subq,subd;
 41:   Mat Js,Jd,JsPrec,JsInv,JsBlock;

 43:   PetscReal alpha; /* Regularization parameter */
 44:   PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */
 45:   PetscReal noise; /* Amount of noise to add to data */
 46:   PetscReal *ones;
 47:   Mat Q;
 48:   Mat MQ; 
 49:   Mat L;

 51:   Mat Grad;
 52:   Mat Av,Avwork;
 53:   Mat Div, Divwork;
 54:   Mat DSG;
 55:   Mat Diag,Ones;
 56:   

 58:   Vec q;
 59:   Vec ur; /* reference */

 61:   Vec d;
 62:   Vec dwork;

 64:   Vec x; /* super vec of y,u */

 66:   Vec y; /* state variables */
 67:   Vec ywork;

 69:   Vec ytrue;

 71:   Vec u; /* design variables */
 72:   Vec uwork;

 74:   Vec utrue;
 75:  
 76:   Vec js_diag;
 77:   
 78:   Vec c; /* constraint vector */
 79:   Vec cwork;
 80:   
 81:   Vec lwork;
 82:   Vec S;
 83:   Vec Swork,Twork,Sdiag,Ywork;
 84:   Vec Av_u;

 86:   KSP solver;
 87:   PC prec;

 89:   PetscReal tola,tolb,tolc,told;
 90:   PetscInt ksp_its;
 91:   PetscInt ksp_its_initial;
 92:   PetscInt stages[10];
 93:   PetscBool use_ptap;
 94:   PetscBool use_lrc;
 95:   TAO_LCL* lcl;

 97: } AppCtx;

 99: PetscErrorCode FormFunction(TaoSolver, Vec, PetscReal*, void*);
100: PetscErrorCode FormGradient(TaoSolver, Vec, Vec, void*);
101: PetscErrorCode FormFunctionGradient(TaoSolver, Vec, PetscReal*, Vec, void*);
102: PetscErrorCode FormJacobianState(TaoSolver, Vec, Mat*, Mat*, Mat*, MatStructure*,void*);
103: PetscErrorCode FormJacobianDesign(TaoSolver, Vec, Mat*,void*);
104: PetscErrorCode FormConstraints(TaoSolver, Vec, Vec, void*);
105: PetscErrorCode FormHessian(TaoSolver, Vec, Mat*, Mat*, MatStructure*, void*);
106: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
107: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
108: PetscErrorCode EllipticInitialize(AppCtx *user);
109: PetscErrorCode EllipticDestroy(AppCtx *user);
110: PetscErrorCode EllipticMonitor(TaoSolver, void*);

112: PetscErrorCode StateBlockMatMult(Mat,Vec,Vec);
113: PetscErrorCode StateMatMult(Mat,Vec,Vec);

115: PetscErrorCode StateInvMatMult(Mat,Vec,Vec);
116: PetscErrorCode DesignMatMult(Mat,Vec,Vec);
117: PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);

119: PetscErrorCode QMatMult(Mat,Vec,Vec);
120: PetscErrorCode QMatMultTranspose(Mat,Vec,Vec);

122: static  char help[]="";

126: int main(int argc, char **argv)
127: {
129:   Vec x0;
130:   
131:   TaoSolver tao;
132:   TaoSolverTerminationReason reason;
133:   AppCtx user;
134:   PetscBool flag,showtime;
135:   PetscInt ntests = 1;
136:   PetscLogDouble v1,v2;
137:   PetscInt i;


140:   PetscInitialize(&argc, &argv, (char*)0,help);
141:   TaoInitialize(&argc, &argv, (char*)0,help);
142:   showtime = PETSC_FALSE;
143:   PetscOptionsBool("-showtime","Display time elapsed","",showtime,&showtime,&flag); 
144:   user.mx = 8;
145:   PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,&flag); 
146:   user.ns = 6;
147:   PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,&flag); 
148:   user.ndata = 64;
149:   PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,&flag); 
150:   user.alpha = 0.1;
151:   PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,&flag); 
152:   user.beta = 0.00001;
153:   PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,&flag); 
154:   user.noise = 0.01;
155:   PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,&flag); 

157:   PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",PETSC_FALSE,&user.use_ptap,&flag); 
158:   PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",PETSC_FALSE,&user.use_lrc,&flag); 

160:   user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */
161:   user.nstate =  user.m;
162:   user.ndesign = user.mx*user.mx*user.mx;
163:   user.n = user.nstate + user.ndesign; /* number of variables */




168:   /* Create TAO solver and set desired solution method */
169:   TaoCreate(PETSC_COMM_WORLD,&tao); 
170:   TaoSetType(tao,"tao_lcl"); 
171:   user.lcl = (TAO_LCL*)(tao->data); 

173:   /* Set up initial vectors and matrices */
174:   EllipticInitialize(&user); 


177:   Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter); 
178:   VecDuplicate(user.x,&x0); 
179:   VecCopy(user.x,x0); 
180:   


183:   /* Set solution vector with an initial guess */
184:   TaoSetInitialVector(tao,user.x); 
185:   TaoSetObjectiveRoutine(tao, FormFunction, (void *)&user); 
186:   TaoSetGradientRoutine(tao, FormGradient, (void *)&user); 
187:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, (void *)&user); 
188:  
189:   TaoSetJacobianStateRoutine(tao, user.Js, PETSC_NULL, user.JsInv, FormJacobianState, (void *)&user);  
190:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, (void *)&user); 

192:   TaoSetStateDesignIS(tao,user.s_is,user.d_is); 
193:   TaoSetFromOptions(tao); 
194:   


197:   /* SOLVE THE APPLICATION */
198:   PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,&flag); 
199:   PetscLogStageRegister("Trials",&user.stages[1]); 
200:   PetscLogStagePush(user.stages[1]); 
201:   for (i=0; i<ntests; i++){
202:     PetscGetTime(&v1); 
203:     TaoSolve(tao);  
204:     PetscGetTime(&v2); 
205:     if (showtime) {
206:       PetscPrintf(PETSC_COMM_WORLD,"Elapsed time = %G\n",v2-v1); 
207:     }
208:     PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its); 
209:     VecCopy(x0,user.x); 
210:   }
211:   PetscLogStagePop(); 
212:   PetscBarrier((PetscObject)user.x); 
213:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");
214:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);

216:   TaoGetTerminationReason(tao,&reason); 

218:   if (reason < 0)
219:   {
220:     PetscPrintf(MPI_COMM_WORLD, "TAO failed to converge.\n");
221:   }
222:   else
223:   {
224:     PetscPrintf(MPI_COMM_WORLD, "Optimization terminated with status %D.\n", reason);
225:   }


228:   /* Free TAO data structures */
229:   TaoDestroy(&tao); 

231:   /* Free PETSc data structures */
232:   VecDestroy(&x0); 

234:   EllipticDestroy(&user); 

236:   /* Finalize TAO, PETSc */
237:   TaoFinalize();
238:   PetscFinalize(); 

240:   return 0;
241: }
242: /* ------------------------------------------------------------------- */
245: /* 
246:    dwork = Qy - d  
247:    lwork = L*(u-ur)
248:    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
249: */
250: PetscErrorCode FormFunction(TaoSolver tao,Vec X,PetscReal *f,void *ptr)
251: {
253:   PetscReal d1=0,d2=0;
254:   AppCtx *user = (AppCtx*)ptr;
256:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
257:   MatMult(user->MQ,user->y,user->dwork); 
258:   VecAXPY(user->dwork,-1.0,user->d); 
259:   VecDot(user->dwork,user->dwork,&d1); 

261:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
262:   MatMult(user->L,user->uwork,user->lwork); 
263:   VecDot(user->lwork,user->lwork,&d2); 
264:   
265:   *f = 0.5 * (d1 + user->alpha*d2); 
266:   return(0);
267: }

269: /* ------------------------------------------------------------------- */
272: /*  
273:     state: g_s = Q' *(Qy - d)
274:     design: g_d = alpha*L'*L*(u-ur)
275: */
276: PetscErrorCode FormGradient(TaoSolver tao,Vec X,Vec G,void *ptr)
277: {
279:   AppCtx *user = (AppCtx*)ptr;
281:   CHKMEMQ;
282:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
283:   MatMult(user->MQ,user->y,user->dwork); 
284:   VecAXPY(user->dwork,-1.0,user->d); 

286:   MatMultTranspose(user->MQ,user->dwork,user->ywork); 
287:   
288:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
289:   MatMult(user->L,user->uwork,user->lwork); 
290:   MatMultTranspose(user->L,user->lwork,user->uwork); 
291:   VecScale(user->uwork, user->alpha); 

293:                       
294:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
295:   CHKMEMQ;
296:   return(0);
297: }

301: PetscErrorCode FormFunctionGradient(TaoSolver tao, Vec X, PetscReal *f, Vec G, void *ptr)
302: {
304:   PetscReal d1,d2;
305:   AppCtx *user = (AppCtx*)ptr;
307:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 

309:   MatMult(user->MQ,user->y,user->dwork); 
310:   VecAXPY(user->dwork,-1.0,user->d); 
311:   VecDot(user->dwork,user->dwork,&d1); 
312:   MatMultTranspose(user->MQ,user->dwork,user->ywork); 

314:   VecWAXPY(user->uwork,-1.0,user->ur,user->u); 
315:   MatMult(user->L,user->uwork,user->lwork); 
316:   VecDot(user->lwork,user->lwork,&d2); 
317:   MatMultTranspose(user->L,user->lwork,user->uwork); 
318:   VecScale(user->uwork, user->alpha); 
319:   *f = 0.5 * (d1 + user->alpha*d2); 

321:   
322:   
323:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
324:   return(0);

326: }


329: /* ------------------------------------------------------------------- */
332: /* A 
333: MatShell object
334: */
335: PetscErrorCode FormJacobianState(TaoSolver tao, Vec X, Mat *J, Mat* JPre, Mat* JInv, MatStructure* flag, void *ptr)
336: {
338:   AppCtx *user = (AppCtx*)ptr;

341:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
342:   /* DSG = Div * (1/Av_u) * Grad */
343:   VecSet(user->uwork,0); 
344:   VecAXPY(user->uwork,-1.0,user->u); 
345:   VecExp(user->uwork); 
346:   MatMult(user->Av,user->uwork,user->Av_u); 
347:   VecCopy(user->Av_u,user->Swork);  
348:   VecReciprocal(user->Swork); 
349:   if (user->use_ptap) {
350:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES); 
351:     MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);
352:   } else {
353:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN); 
354:     MatDiagonalScale(user->Divwork,PETSC_NULL,user->Swork); 
355:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
356:   }
357:   *flag = SAME_NONZERO_PATTERN;
358:   return(0);
359: }
360: /* ------------------------------------------------------------------- */
363: /* B */
364: PetscErrorCode FormJacobianDesign(TaoSolver tao, Vec X, Mat *J, void *ptr)
365: {
367:   AppCtx *user = (AppCtx*)ptr;

370:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
371:   *J = user->Jd;

373:   return(0);
374: }

378: PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y) 
379: {
381:   PetscReal sum;
382:   void *ptr;
383:   AppCtx *user;
385:   MatShellGetContext(J_shell,&ptr); 
386:   user = (AppCtx*)ptr;
387:   MatMult(user->DSG,X,Y);
388:   VecSum(X,&sum); 
389:   sum /= user->ndesign;
390:   VecShift(Y,sum); 
391:   return(0);
392: }



398: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y) 
399: {
401:   void *ptr;
402:   PetscInt i;
403:   AppCtx *user;
405:   MatShellGetContext(J_shell,&ptr); 
406:   user = (AppCtx*)ptr;
407:   if (user->ns == 1) {
408:     MatMult(user->JsBlock,X,Y); 
409:   } else {
410:     for (i=0;i<user->ns;i++) {
411:       Scatter(X,user->subq,user->yi_scatter[i],0,0); 
412:       Scatter(Y,user->suby,user->yi_scatter[i],0,0); 
413:       MatMult(user->JsBlock,user->subq,user->suby); 
414:       Gather(X,user->subq,user->yi_scatter[i],0,0); 
415:       Gather(Y,user->suby,user->yi_scatter[i],0,0); 

417:     }
418:   }
419:   return(0);
420: }

424: PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y) 
425: {
427:   PetscInt its,i;
428:   PetscReal tau;
429:   void *ptr;
430:   AppCtx *user;
432:   MatShellGetContext(J_shell,&ptr); 
433:   user = (AppCtx*)ptr;
434:   KSPSetOperators(user->solver,user->JsBlock,user->DSG,SAME_NONZERO_PATTERN); 

436:   if (Y == user->ytrue) {
437:     KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
438:   } else if (user->lcl) {
439:     tau = user->lcl->tau[user->lcl->solve_type];
440:     KSPSetTolerances(user->solver,tau,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
441:   }

443:   if (user->ns == 1) {
444:     KSPSolve(user->solver,X,Y); 
445:     KSPGetIterationNumber(user->solver,&its); 
446:     user->ksp_its+=its;
447:   } else {
448:     for (i=0;i<user->ns;i++) {
449:       Scatter(X,user->subq,user->yi_scatter[i],0,0); 
450:       Scatter(Y,user->suby,user->yi_scatter[i],0,0); 
451:       KSPSolve(user->solver,user->subq,user->suby); 
452:       KSPGetIterationNumber(user->solver,&its); 
453:       user->ksp_its+=its;
454:       Gather(X,user->subq,user->yi_scatter[i],0,0); 
455:       Gather(Y,user->suby,user->yi_scatter[i],0,0); 
456:     }    
457:   }
458:   

460:   return(0);
461: }
464: PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
465: {
467:   void *ptr;
468:   AppCtx *user;
469:   PetscInt i;
471:   MatShellGetContext(J_shell,&ptr); 
472:   user = (AppCtx*)ptr;
473:   if (user->ns == 1) {
474:     MatMult(user->Q,X,Y); 
475:   } else {
476:     for (i=0;i<user->ns;i++) {
477:       Scatter(X,user->subq,user->yi_scatter[i],0,0); 
478:       Scatter(Y,user->subd,user->di_scatter[i],0,0); 
479:       MatMult(user->Q,user->subq,user->subd); 
480:       Gather(X,user->subq,user->yi_scatter[i],0,0); 
481:       Gather(Y,user->subd,user->di_scatter[i],0,0); 
482:     }
483:   }
484:     
485:   return(0);

487: }

491: PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
492: {
494:   void *ptr;
495:   AppCtx *user;
496:   PetscInt i;
498:   MatShellGetContext(J_shell,&ptr); 
499:   user = (AppCtx*)ptr;
500:   if (user->ns == 1) {
501:       MatMultTranspose(user->Q,X,Y); 
502:   } else {
503:     for (i=0;i<user->ns;i++) {
504:       Scatter(X,user->subd,user->di_scatter[i],0,0); 
505:       Scatter(Y,user->suby,user->yi_scatter[i],0,0); 
506:       MatMultTranspose(user->Q,user->subd,user->suby); 
507:       Gather(X,user->subd,user->di_scatter[i],0,0); 
508:       Gather(Y,user->suby,user->yi_scatter[i],0,0); 
509:     }
510:   }
511:   return(0);

513: }

517: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y) 
518: {
520:   void *ptr;
521:   PetscInt i;
522:   AppCtx *user;
524:   MatShellGetContext(J_shell,&ptr); 
525:   user = (AppCtx*)ptr;

527:   /* sdiag(1./v) */ 
528:   VecSet(user->uwork,0); 
529:   VecAXPY(user->uwork,-1.0,user->u); 
530:   VecExp(user->uwork);   

532:   /* sdiag(1./((Av*(1./v)).^2)) */
533:   MatMult(user->Av,user->uwork,user->Swork); 
534:   VecPointwiseMult(user->Swork,user->Swork,user->Swork); 
535:   VecReciprocal(user->Swork);  

537:   /* (Av * (sdiag(1./v) * b)) */ 
538:   VecPointwiseMult(user->uwork,user->uwork,X); 
539:   MatMult(user->Av,user->uwork,user->Twork); 

541:   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
542:   VecPointwiseMult(user->Swork,user->Twork,user->Swork);  

544:   if (user->ns == 1) {
545:     /* (sdiag(Grad*y(:,i)) */
546:     MatMult(user->Grad,user->y,user->Twork); 
547:   
548:     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
549:     VecPointwiseMult(user->Swork,user->Twork,user->Swork);  
550:     MatMultTranspose(user->Grad,user->Swork,Y); 
551:   } else {
552:     for (i=0;i<user->ns;i++) {
553:       Scatter(user->y,user->suby,user->yi_scatter[i],0,0); 
554:       Scatter(Y,user->subq,user->yi_scatter[i],0,0); 
555:   
556:       MatMult(user->Grad,user->suby,user->Twork); 
557:       VecPointwiseMult(user->Twork,user->Twork,user->Swork); 
558:       MatMultTranspose(user->Grad,user->Twork,user->subq); 
559:       Gather(user->y,user->suby,user->yi_scatter[i],0,0); 
560:       Gather(Y,user->subq,user->yi_scatter[i],0,0); 


563:     }
564:   }
565:   return(0);
566: }

570: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y) 
571: {
573:   void *ptr;
574:   PetscInt i;
575:   AppCtx *user;
577:   MatShellGetContext(J_shell,&ptr); 
578:   user = (AppCtx*)ptr;

580:   VecZeroEntries(Y); 

582:   /* Sdiag = 1./((Av*(1./v)).^2) */
583:   VecSet(user->uwork,0); 
584:   VecAXPY(user->uwork,-1.0,user->u); 
585:   VecExp(user->uwork); 
586:   MatMult(user->Av,user->uwork,user->Swork); 
587:   VecPointwiseMult(user->Sdiag,user->Swork,user->Swork); 
588:   VecReciprocal(user->Sdiag); 
589:   
590:   for (i=0;i<user->ns;i++) {
591:     Scatter(X,user->subq,user->yi_scatter[i],0,0); 
592:     Scatter(user->y,user->suby,user->yi_scatter[i],0,0); 

594:     /* Swork = (Div' * b(:,i)) */
595:     MatMult(user->Grad,user->subq,user->Swork); 

597:     /* Twork = Grad*y(:,i) */
598:     MatMult(user->Grad,user->suby,user->Twork); 

600:     /* Twork = sdiag(Twork) * Swork */
601:     VecPointwiseMult(user->Twork,user->Swork,user->Twork); 
602:   

604:     /* Swork = pointwisemult(Sdiag,Twork) */
605:     VecPointwiseMult(user->Swork,user->Twork,user->Sdiag); 

607:     /* Ywork = Av' * Swork */
608:     MatMultTranspose(user->Av,user->Swork,user->Ywork); 
609:   
610:     /* Ywork = pointwisemult(uwork,Ywork) */
611:     VecPointwiseMult(user->Ywork,user->uwork,user->Ywork); 
612:     
613:     VecAXPY(Y,1.0,user->Ywork); 
614:     
615:     Gather(X,user->subq,user->yi_scatter[i],0,0); 
616:     Gather(user->y,user->suby,user->yi_scatter[i],0,0); 
617:   }  

619:   return(0);
620: }

624: PetscErrorCode FormConstraints(TaoSolver tao, Vec X, Vec C, void *ptr)
625: {
626:    /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
628:    PetscReal sum;
629:    PetscInt i;
630:    AppCtx *user = (AppCtx*)ptr;
632:    
633:    Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter); 
634:    
635:    if (user->ns == 1) {
636:      MatMult(user->Grad,user->y,user->Swork);  
637:      VecPointwiseDivide(user->Swork,user->Swork,user->Av_u); 
638:      MatMultTranspose(user->Grad,user->Swork,C);  
639:  
640:      VecSum(user->y,&sum); 
641:      sum /= user->ndesign;
642:      VecShift(C,sum); 

644:    } else {
645:      for (i=0;i<user->ns;i++) {
646:       Scatter(user->y,user->suby,user->yi_scatter[i],0,0); 
647:       Scatter(C,user->subq,user->yi_scatter[i],0,0); 
648:       MatMult(user->Grad,user->suby,user->Swork);  
649:       VecPointwiseDivide(user->Swork,user->Swork,user->Av_u); 
650:       MatMultTranspose(user->Grad,user->Swork,user->subq);  
651:  

653:       VecSum(user->suby,&sum); 
654:       sum /= user->ndesign;
655:       VecShift(user->subq,sum); 

657:       Gather(user->y,user->suby,user->yi_scatter[i],0,0); 
658:       Gather(C,user->subq,user->yi_scatter[i],0,0); 
659:      }
660:    }
661:    VecAXPY(C,-1.0,user->q); 
662:    return(0);
663: }

667: PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
668: {
671:   VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD); 
672:   VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD); 
673:   if (sub2) {
674:     VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD); 
675:     VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD); 
676:   }
677:   return(0);
678: }

682: PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
683: {
686:   VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE); 
687:   VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE); 
688:   if (sub2) {
689:     VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE); 
690:     VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE); 
691:   }
692:   return(0);
693: }
694:   
695:     
698: PetscErrorCode EllipticInitialize(AppCtx *user)
699: {
701:   PetscInt m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock;
702:   Vec XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork;
703:   PetscReal *x,*y,*z;
704:   PetscReal h,meanut;
705:   PetscScalar PI = 3.141592653589793238,hinv,neg_hinv,half = 0.5,sqrt_beta;
706:   PetscInt im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
707:   IS is_alldesign,is_allstate;
708:   IS is_from_d;
709:   IS is_from_y;
710:   PetscInt lo,hi,hi2,lo2,ysubnlocal,dsubnlocal;
711:   const PetscInt *ranges, *subranges;
712:   PetscMPIInt mpisize,mpirank;
713:   PetscReal xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
714:   PetscScalar v,vx,vy,vz;
715:   PetscInt offset,subindex,subvec,nrank,kk;
716:   /* Data locations */
717:   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,     
718:                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,     
719:                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,     
720:                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,     
721:                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,     
722:                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,     
723:                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,     
724:                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};
725:   
726:   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,     
727:                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,     
728:                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,     
729:                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,     
730:                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,     
731:                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,     
732:                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,     
733:                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};
734:   
735:   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,     
736:                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,     
737:                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,     
738:                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,     
739:                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,     
740:                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,     
741:                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,     
742:                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};

745:   MPI_Comm_size(PETSC_COMM_WORLD,&mpisize); 
746:   MPI_Comm_rank(PETSC_COMM_WORLD,&mpirank); 
747:   PetscLogStageRegister("Elliptic Setup",&user->stages[0]); 
748:   PetscLogStagePush(user->stages[0]); 

750:   /* Create u,y,c,x */
751:   VecCreate(PETSC_COMM_WORLD,&user->u); 
752:   VecCreate(PETSC_COMM_WORLD,&user->y); 
753:   VecCreate(PETSC_COMM_WORLD,&user->c); 
754:   VecSetSizes(user->u,PETSC_DECIDE,user->ndesign); 
755:   VecSetFromOptions(user->u); 
756:   VecGetLocalSize(user->u,&ysubnlocal); 
757:   VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate); 
758:   VecSetSizes(user->c,ysubnlocal*user->ns,user->m); 
759:   VecSetFromOptions(user->y); 
760:   VecSetFromOptions(user->c); 

762:   /* 
763:      *******************************
764:      Create scatters for x <-> y,u
765:      *******************************

767:      If the state vector y and design vector u are partitioned as 
768:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
769:      then the solution vector x is organized as
770:      [y_1; u_1; y_2; u_2; ...; y_np; u_np]. 
771:      The index sets user->s_is and user->d_is correspond to the indices of the
772:      state and design variables owned by the current processor.
773:   */
774:   VecCreate(PETSC_COMM_WORLD,&user->x); 

776:   VecGetOwnershipRange(user->y,&lo,&hi); 
777:   VecGetOwnershipRange(user->u,&lo2,&hi2);  

779:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate); 
780:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is); 
781:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign); 
782:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is); 
783:   
784:   VecSetSizes(user->x,hi-lo+hi2-lo2,user->n); 
785:   VecSetFromOptions(user->x); 

787:   VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter); 
788:   VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter); 
789:   ISDestroy(&is_alldesign); 
790:   ISDestroy(&is_allstate); 
791:   /* 
792:      *******************************
793:      Create scatter from y to y_1,y_2,...,y_ns 
794:      *******************************
795:   */
796:   PetscMalloc(user->ns*sizeof(VecScatter),&user->yi_scatter);
797:   VecDuplicate(user->u,&user->suby); 
798:   VecDuplicate(user->u,&user->subq); 

800:   VecGetOwnershipRange(user->y,&lo2,&hi2); 
801:   istart = 0;
802:   for (i=0; i<user->ns; i++){
803:     VecGetOwnershipRange(user->suby,&lo,&hi); 
804:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y); 
805:     VecScatterCreate(user->y,is_from_y,user->suby,PETSC_NULL,&user->yi_scatter[i]); 
806:     istart = istart + hi-lo;
807:     ISDestroy(&is_from_y); 
808:   }
809:   /* 
810:      *******************************
811:      Create scatter from d to d_1,d_2,...,d_ns 
812:      *******************************
813:   */
814:   VecCreate(PETSC_COMM_WORLD,&user->subd); 
815:   VecSetSizes(user->subd,PETSC_DECIDE,user->ndata); 
816:   VecSetFromOptions(user->subd); 
817:   VecCreate(PETSC_COMM_WORLD,&user->d); 
818:   VecGetLocalSize(user->subd,&dsubnlocal); 
819:   VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns); 
820:   VecSetFromOptions(user->d); 
821:   PetscMalloc(user->ns*sizeof(VecScatter),&user->di_scatter);

823:   VecGetOwnershipRange(user->d,&lo2,&hi2); 
824:   istart = 0;
825:   for (i=0; i<user->ns; i++){
826:     VecGetOwnershipRange(user->subd,&lo,&hi); 
827:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d); 
828:     VecScatterCreate(user->d,is_from_d,user->subd,PETSC_NULL,&user->di_scatter[i]); 
829:     istart = istart + hi-lo;
830:     ISDestroy(&is_from_d); 
831:   }
832:   
833:   PetscMalloc(user->mx*sizeof(PetscReal),&x); 
834:   PetscMalloc(user->mx*sizeof(PetscReal),&y); 
835:   PetscMalloc(user->mx*sizeof(PetscReal),&z); 

837:   user->ksp_its = 0;
838:   user->ksp_its_initial = 0;

840:   n = user->mx * user->mx * user->mx;
841:   m = 3 * user->mx * user->mx * (user->mx-1);
842:   sqrt_beta = PetscSqrtScalar(user->beta);


845:   VecCreate(PETSC_COMM_WORLD,&XX); 
846:   VecCreate(PETSC_COMM_WORLD,&user->q); 
847:   VecSetSizes(XX,ysubnlocal,n); 
848:   VecSetSizes(user->q,ysubnlocal*user->ns,user->m); 
849:   VecSetFromOptions(XX); 
850:   VecSetFromOptions(user->q); 

852:   VecDuplicate(XX,&YY); 
853:   VecDuplicate(XX,&ZZ); 
854:   VecDuplicate(XX,&XXwork); 
855:   VecDuplicate(XX,&YYwork); 
856:   VecDuplicate(XX,&ZZwork); 
857:   VecDuplicate(XX,&UTwork); 
858:   VecDuplicate(XX,&user->utrue); 

860:   /* map for striding q */
861:   /*
862:   PetscMalloc((mpisize+1)*sizeof(PetscInt),&ranges); 
863:   PetscMalloc((mpisize+1)*sizeof(PetscInt),&subranges);  */
864:   VecGetOwnershipRanges(user->q,&ranges); 
865:   VecGetOwnershipRanges(user->u,&subranges); 
866:   
867:   VecGetOwnershipRange(user->q,&lo2,&hi2); 
868:   VecGetOwnershipRange(user->u,&lo,&hi); 
869:   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
870:   h = 1.0/user->mx;
871:   hinv = user->mx;
872:   neg_hinv = -hinv;

874:   VecGetOwnershipRange(XX,&istart,&iend); 
875:   for (linear_index=istart; linear_index<iend; linear_index++){
876:     i = linear_index % user->mx;
877:     j = ((linear_index-i)/user->mx) % user->mx;
878:     k = ((linear_index-i)/user->mx-j) / user->mx;
879:     vx = h*(i+0.5);
880:     vy = h*(j+0.5);
881:     vz = h*(k+0.5);        
882:     VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES); 
883:     VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES); 
884:     VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES); 
885:     for (is=0; is<2; is++){
886:       for (js=0; js<2; js++){
887:         for (ks=0; ks<2; ks++){
888:           ls = is*4 + js*2 + ks;
889:           if (ls<user->ns){
890:             l =ls*n + linear_index;
891:             /* remap */
892:             subindex = l%n;
893:             subvec = l/n;
894:             nrank=0;
895:             while (subindex >= subranges[nrank+1]) nrank++;
896:             offset = subindex - subranges[nrank];
897:             istart=0;
898:             for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]);
899:             istart += (subranges[nrank+1]-subranges[nrank])*subvec;
900:             l = istart+offset;
901:             v = 100*PetscSinScalar(2*PI*(vx+0.25*is))*sin(2*PI*(vy+0.25*js))*sin(2*PI*(vz+0.25*ks));
902:             VecSetValues(user->q,1,&l,&v,INSERT_VALUES); 
903:           }
904:         }
905:       }
906:     }
907:   }

909:   VecAssemblyBegin(XX); 
910:   VecAssemblyEnd(XX); 
911:   VecAssemblyBegin(YY); 
912:   VecAssemblyEnd(YY); 
913:   VecAssemblyBegin(ZZ); 
914:   VecAssemblyEnd(ZZ); 
915:   VecAssemblyBegin(user->q); 
916:   VecAssemblyEnd(user->q);   
917:   
918:   /* Compute true parameter function
919:      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
920:   VecCopy(XX,XXwork); 
921:   VecCopy(YY,YYwork); 
922:   VecCopy(ZZ,ZZwork); 

924:   VecShift(XXwork,-0.25); 
925:   VecShift(YYwork,-0.25); 
926:   VecShift(ZZwork,-0.25); 

928:   VecPointwiseMult(XXwork,XXwork,XXwork); 
929:   VecPointwiseMult(YYwork,YYwork,YYwork); 
930:   VecPointwiseMult(ZZwork,ZZwork,ZZwork); 

932:   VecCopy(XXwork,UTwork); 
933:   VecAXPY(UTwork,1.0,YYwork); 
934:   VecAXPY(UTwork,1.0,ZZwork); 
935:   VecScale(UTwork,-20.0); 
936:   VecExp(UTwork); 
937:   VecCopy(UTwork,user->utrue); 

939:   VecCopy(XX,XXwork); 
940:   VecCopy(YY,YYwork); 
941:   VecCopy(ZZ,ZZwork); 

943:   VecShift(XXwork,-0.75); 
944:   VecShift(YYwork,-0.75); 
945:   VecShift(ZZwork,-0.75); 

947:   VecPointwiseMult(XXwork,XXwork,XXwork); 
948:   VecPointwiseMult(YYwork,YYwork,YYwork); 
949:   VecPointwiseMult(ZZwork,ZZwork,ZZwork); 

951:   VecCopy(XXwork,UTwork); 
952:   VecAXPY(UTwork,1.0,YYwork); 
953:   VecAXPY(UTwork,1.0,ZZwork); 
954:   VecScale(UTwork,-20.0); 
955:   VecExp(UTwork); 

957:   VecAXPY(user->utrue,-1.0,UTwork); 

959:   VecDestroy(&XX); 
960:   VecDestroy(&YY); 
961:   VecDestroy(&ZZ); 
962:   VecDestroy(&XXwork); 
963:   VecDestroy(&YYwork); 
964:   VecDestroy(&ZZwork); 
965:   VecDestroy(&UTwork); 
966:  
967:   /* Initial guess and reference model */
968:   VecDuplicate(user->utrue,&user->ur); 
969:   VecSum(user->utrue,&meanut); 
970:   meanut = meanut / n;
971:   VecSet(user->ur,meanut); 
972:   VecCopy(user->ur,user->u); 

974:   /* Generate Grad matrix */
975:   MatCreate(PETSC_COMM_WORLD,&user->Grad); 
976:   MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n); 
977:   MatSetFromOptions(user->Grad); 
978:   MatMPIAIJSetPreallocation(user->Grad,2,PETSC_NULL,2,PETSC_NULL); 
979:   MatSeqAIJSetPreallocation(user->Grad,2,PETSC_NULL); 
980:   MatGetOwnershipRange(user->Grad,&istart,&iend);

982:   for (i=istart; i<iend; i++){
983:     if (i<m/3){
984:       iblock = i / (user->mx-1);
985:       j = iblock*user->mx + (i % (user->mx-1));
986:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
987:       j = j+1;
988:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES); 
989:     }
990:     if (i>=m/3 && i<2*m/3){
991:       iblock = (i-m/3) / (user->mx*(user->mx-1));
992:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
993:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
994:       j = j + user->mx;
995:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES); 
996:     }
997:     if (i>=2*m/3){
998:       j = i-2*m/3;
999:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1000:       j = j + user->mx*user->mx;
1001:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES); 
1002:     }
1003:   }

1005:   MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY); 
1006:   MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY); 



1010:   /* Generate arithmetic averaging matrix Av */
1011:   MatCreate(PETSC_COMM_WORLD,&user->Av); 
1012:   MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n); 
1013:   MatSetFromOptions(user->Av); 
1014:   MatMPIAIJSetPreallocation(user->Av,2,PETSC_NULL,2,PETSC_NULL); 
1015:   MatSeqAIJSetPreallocation(user->Av,2,PETSC_NULL); 
1016:   MatGetOwnershipRange(user->Av,&istart,&iend);

1018:   for (i=istart; i<iend; i++){
1019:     if (i<m/3){
1020:       iblock = i / (user->mx-1);
1021:       j = iblock*user->mx + (i % (user->mx-1));
1022:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1023:       j = j+1;
1024:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1025:     }
1026:     if (i>=m/3 && i<2*m/3){
1027:       iblock = (i-m/3) / (user->mx*(user->mx-1));
1028:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
1029:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1030:       j = j + user->mx;
1031:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1032:     }
1033:     if (i>=2*m/3){
1034:       j = i-2*m/3;
1035:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1036:       j = j + user->mx*user->mx;
1037:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES); 
1038:     }
1039:   }

1041:   MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY); 
1042:   MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY); 


1045:   MatCreate(PETSC_COMM_WORLD,&user->L); 
1046:   MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n); 
1047:   MatSetFromOptions(user->L); 
1048:   MatMPIAIJSetPreallocation(user->L,2,PETSC_NULL,2,PETSC_NULL); 
1049:   MatSeqAIJSetPreallocation(user->L,2,PETSC_NULL); 
1050:   MatGetOwnershipRange(user->L,&istart,&iend);

1052:   for (i=istart; i<iend; i++){
1053:     if (i<m/3){
1054:       iblock = i / (user->mx-1);
1055:       j = iblock*user->mx + (i % (user->mx-1));
1056:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1057:       j = j+1;
1058:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES); 
1059:     }
1060:     if (i>=m/3 && i<2*m/3){
1061:       iblock = (i-m/3) / (user->mx*(user->mx-1));
1062:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
1063:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1064:       j = j + user->mx;
1065:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES); 
1066:     }
1067:     if (i>=2*m/3 && i<m){
1068:       j = i-2*m/3;
1069:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES); 
1070:       j = j + user->mx*user->mx;
1071:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES); 
1072:     }
1073:     if (i>=m){
1074:       j = i - m;
1075:       MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES); 
1076:     }
1077:   }

1079:   MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY); 
1080:   MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY); 

1082:   MatScale(user->L,PetscPowScalar(h,1.5)); 

1084:   /* Generate Div matrix */
1085:   if (!user->use_ptap) {
1086:     /* Generate Div matrix */
1087:     MatCreate(PETSC_COMM_WORLD,&user->Div); 
1088:     MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m); 
1089:     MatSetFromOptions(user->Div); 
1090:     MatMPIAIJSetPreallocation(user->Div,4,PETSC_NULL,4,PETSC_NULL); 
1091:     MatSeqAIJSetPreallocation(user->Div,6,PETSC_NULL); 
1092:     MatGetOwnershipRange(user->Grad,&istart,&iend);

1094:     for (i=istart; i<iend; i++){
1095:       if (i<m/3){
1096:         iblock = i / (user->mx-1);
1097:         j = iblock*user->mx + (i % (user->mx-1));
1098:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES); 
1099:         j = j+1;
1100:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES); 
1101:       }
1102:       if (i>=m/3 && i<2*m/3){
1103:         iblock = (i-m/3) / (user->mx*(user->mx-1));
1104:         j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
1105:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES); 
1106:         j = j + user->mx;
1107:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES); 
1108:       }
1109:       if (i>=2*m/3){
1110:         j = i-2*m/3;
1111:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES); 
1112:         j = j + user->mx*user->mx;
1113:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES); 
1114:       }
1115:     }

1117:     MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY); 
1118:     MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY); 

1120:     MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork); 
1121:   } else {
1122:     MatCreate(PETSC_COMM_WORLD,&user->Diag); 
1123:     MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m);
1124:     MatSetFromOptions(user->Diag); 
1125:     MatMPIAIJSetPreallocation(user->Diag,1,PETSC_NULL,0,PETSC_NULL); 
1126:     MatSeqAIJSetPreallocation(user->Diag,1,PETSC_NULL); 
1127:   

1129:   }


1132:   /* Build work vectors and matrices */
1133:   VecCreate(PETSC_COMM_WORLD,&user->S); 
1134:   VecSetSizes(user->S, PETSC_DECIDE, m); 
1135:   VecSetFromOptions(user->S); 

1137:   VecCreate(PETSC_COMM_WORLD,&user->lwork); 
1138:   VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx);  
1139:   VecSetFromOptions(user->lwork); 

1141:   MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork); 


1144:   VecDuplicate(user->S,&user->Swork); 
1145:   VecDuplicate(user->S,&user->Sdiag); 
1146:   VecDuplicate(user->S,&user->Av_u); 
1147:   VecDuplicate(user->S,&user->Twork); 
1148:   VecDuplicate(user->y,&user->ywork); 
1149:   VecDuplicate(user->u,&user->Ywork); 
1150:   VecDuplicate(user->u,&user->uwork); 

1152:   VecDuplicate(user->u,&user->js_diag); 

1154:   VecDuplicate(user->c,&user->cwork); 
1155:   VecDuplicate(user->d,&user->dwork); 

1157:   
1158:   /* Create a matrix-free shell user->Jd for computing B*x */
1159:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd); 
1160:   MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult); 
1161:   MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose); 


1164:   /* Compute true state function ytrue given utrue */
1165:   VecDuplicate(user->y,&user->ytrue); 

1167:   /* First compute Av_u = Av*exp(-u) */
1168:   VecSet(user->uwork, 0); 
1169:   VecAXPY(user->uwork,-1.0,user->utrue); /* Note: user->utrue */
1170:   VecExp(user->uwork); 
1171:   MatMult(user->Av,user->uwork,user->Av_u); 

1173:   /* Next form DSG = Div*S*Grad */
1174:   VecCopy(user->Av_u,user->Swork);  
1175:   VecReciprocal(user->Swork); 
1176:   if (user->use_ptap) {
1177:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES); 
1178:     MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG);
1179:   } else {
1180:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN); 
1181:     MatDiagonalScale(user->Divwork,PETSC_NULL,user->Swork); 
1182:     MatMatMultSymbolic(user->Divwork,user->Grad,1.0,&user->DSG); 
1183:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
1184:   }
1185:     
1186:   MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE); 
1187:   MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 

1189:   if (user->use_lrc == PETSC_TRUE) {
1190:     v=PetscSqrtReal(1.0 /user->ndesign);
1191:     PetscMalloc(user->ndesign*sizeof(PetscReal),&user->ones); 
1192:     
1193:     for (i=0;i<user->ndesign;i++) {
1194:       user->ones[i]=v;
1195:     }
1196:     MatCreateDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones); 
1197:     MatAssemblyBegin(user->Ones, MAT_FINAL_ASSEMBLY); 
1198:     MatAssemblyEnd(user->Ones, MAT_FINAL_ASSEMBLY); 
1199:     MatCreateLRC(user->DSG,user->Ones,user->Ones,&user->JsBlock); 
1200:     MatSetUp(user->JsBlock); 
1201:   } else {
1202:     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1203:     MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock); 
1204:     MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult); 
1205:     MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult); 

1207:   }
1208:   MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE); 
1209:   MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 
1210:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js); 
1211:   MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult); 
1212:   MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult); 
1213:   MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE); 
1214:   MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 


1217:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv); 
1218:   MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult); 
1219:   MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult); 
1220:   MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE); 
1221:   MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 




1226:     
1227:   MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE); 
1228:   MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE); 
1229:   /* Now solve for ytrue */
1230:   KSPCreate(PETSC_COMM_WORLD,&user->solver); 
1231:   KSPSetFromOptions(user->solver); 


1234:   KSPSetOperators(user->solver,user->JsBlock,user->DSG,SAME_NONZERO_PATTERN); 
1235:   user->lcl->solve_type = LCL_FORWARD1;
1236:   MatMult(user->JsInv,user->q,user->ytrue); 
1237:   /* First compute Av_u = Av*exp(-u) */
1238:   VecSet(user->uwork,0);
1239:   VecAXPY(user->uwork,-1.0,user->u); /* Note: user->u */
1240:   VecExp(user->uwork); 
1241:   MatMult(user->Av,user->uwork,user->Av_u); 
1242:  
1243:   /* Next update DSG = Div*S*Grad  with user->u */
1244:   VecCopy(user->Av_u,user->Swork);  
1245:   VecReciprocal(user->Swork); 
1246:   if (user->use_ptap) {
1247:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES); 
1248:     MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);
1249:   } else {
1250:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN); 
1251:     MatDiagonalScale(user->Divwork,PETSC_NULL,user->Av_u); 
1252:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG); 
1253:   }


1256:   /* Now solve for y */
1257:   user->lcl->solve_type = LCL_FORWARD1;
1258:   MatMult(user->JsInv,user->q,user->y); 

1260:   user->ksp_its_initial = user->ksp_its;
1261:   user->ksp_its = 0;
1262:   /* Construct projection matrix Q (blocks) */
1263:   MatCreate(PETSC_COMM_WORLD,&user->Q); 
1264:   MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign); 
1265:   MatSetFromOptions(user->Q); 
1266:   MatMPIAIJSetPreallocation(user->Q,8,PETSC_NULL,8,PETSC_NULL); 
1267:   MatSeqAIJSetPreallocation(user->Q,8,PETSC_NULL); 

1269:   for (i=0; i<user->mx; i++){
1270:     x[i] = h*(i+0.5);
1271:     y[i] = h*(i+0.5);
1272:     z[i] = h*(i+0.5);
1273:   }
1274:   
1275:   MatGetOwnershipRange(user->Q,&istart,&iend);

1277:   nx = user->mx; ny = user->mx; nz = user->mx;
1278:   for (i=istart; i<iend; i++){
1279:     
1280:     xri = xr[i];
1281:     im = 0;
1282:     xim = x[im];
1283:     while (xri>xim && im<nx){
1284:       im = im+1;
1285:       xim = x[im];
1286:     }
1287:     indx1 = im-1;
1288:     indx2 = im;
1289:     dx1 = xri - x[indx1];
1290:     dx2 = x[indx2] - xri;

1292:     yri = yr[i];
1293:     im = 0;
1294:     yim = y[im];
1295:     while (yri>yim && im<ny){
1296:       im = im+1;
1297:       yim = y[im];
1298:     }
1299:     indy1 = im-1;
1300:     indy2 = im;
1301:     dy1 = yri - y[indy1];
1302:     dy2 = y[indy2] - yri;
1303:     
1304:     zri = zr[i];
1305:     im = 0;
1306:     zim = z[im];
1307:     while (zri>zim && im<nz){
1308:       im = im+1;
1309:       zim = z[im];
1310:     }
1311:     indz1 = im-1;
1312:     indz2 = im;
1313:     dz1 = zri - z[indz1];
1314:     dz2 = z[indz2] - zri;

1316:     Dx = x[indx2] - x[indx1];
1317:     Dy = y[indy2] - y[indy1];
1318:     Dz = z[indz2] - z[indz1];

1320:     j = indx1 + indy1*nx + indz1*nx*ny;
1321:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1322:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1324:     j = indx1 + indy1*nx + indz2*nx*ny;
1325:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1326:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1328:     j = indx1 + indy2*nx + indz1*nx*ny;
1329:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1330:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1332:     j = indx1 + indy2*nx + indz2*nx*ny;
1333:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1334:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1336:     j = indx2 + indy1*nx + indz1*nx*ny;
1337:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1338:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1340:     j = indx2 + indy1*nx + indz2*nx*ny;
1341:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1342:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1344:     j = indx2 + indy2*nx + indz1*nx*ny;
1345:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1346:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1348:     j = indx2 + indy2*nx + indz2*nx*ny;
1349:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1350:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES); 

1352:   }

1354:   MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY); 
1355:   MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY);   
1356:   /* Create MQ (composed of blocks of Q */
1357:   MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ); 
1358:   MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult); 
1359:   MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose); 


1362:   /* Add noise to the measurement data */
1363:   VecSet(user->ywork,1.0); 
1364:   VecAYPX(user->ywork,user->noise,user->ytrue); 
1365:   MatMult(user->MQ,user->ywork,user->d); 

1367:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1368:   PetscFree(x); 
1369:   PetscFree(y); 
1370:   PetscFree(z); 
1371:   PetscLogStagePop(); 

1373:   return(0);
1374: }

1378: PetscErrorCode EllipticDestroy(AppCtx *user)
1379: {
1381:   PetscInt i;

1384:   MatDestroy(&user->DSG); 
1385:   KSPDestroy(&user->solver); 
1386:   MatDestroy(&user->Q); 
1387:   MatDestroy(&user->MQ); 
1388:   if (!user->use_ptap) {
1389:     MatDestroy(&user->Div); 
1390:     MatDestroy(&user->Divwork); 
1391:   } else {
1392:     MatDestroy(&user->Diag); 
1393:   }
1394:   if (user->use_lrc) {
1395:     MatDestroy(&user->Ones); 
1396:   }

1398:   MatDestroy(&user->Grad); 
1399:   MatDestroy(&user->Av); 
1400:   MatDestroy(&user->Avwork); 
1401:   MatDestroy(&user->L); 
1402:   
1403:   MatDestroy(&user->Js); 
1404:   MatDestroy(&user->Jd); 
1405:   MatDestroy(&user->JsBlock); 
1406:   MatDestroy(&user->JsInv); 

1408:   VecDestroy(&user->x); 
1409:   VecDestroy(&user->u); 
1410:   VecDestroy(&user->uwork); 
1411:   VecDestroy(&user->utrue); 
1412:   VecDestroy(&user->y); 
1413:   VecDestroy(&user->ywork); 
1414:   VecDestroy(&user->ytrue); 
1415:   VecDestroy(&user->c); 
1416:   VecDestroy(&user->cwork); 
1417:   VecDestroy(&user->ur); 
1418:   VecDestroy(&user->q); 
1419:   VecDestroy(&user->d); 
1420:   VecDestroy(&user->dwork); 
1421:   VecDestroy(&user->lwork); 
1422:   VecDestroy(&user->S); 
1423:   VecDestroy(&user->Swork); 
1424:   VecDestroy(&user->Sdiag); 
1425:   VecDestroy(&user->Ywork); 
1426:   VecDestroy(&user->Twork); 
1427:   VecDestroy(&user->Av_u); 
1428:   VecDestroy(&user->js_diag); 
1429:   ISDestroy(&user->s_is); 
1430:   ISDestroy(&user->d_is); 
1431:   VecDestroy(&user->suby); 
1432:   VecDestroy(&user->subd); 
1433:   VecDestroy(&user->subq); 
1434:   VecScatterDestroy(&user->state_scatter); 
1435:   VecScatterDestroy(&user->design_scatter); 
1436:   for (i=0;i<user->ns;i++) {
1437:     VecScatterDestroy(&user->yi_scatter[i]); 
1438:     VecScatterDestroy(&user->di_scatter[i]); 
1439:   }
1440:   PetscFree(user->yi_scatter); 
1441:   PetscFree(user->di_scatter); 
1442:   if (user->use_lrc) {
1443:     PetscFree(user->ones); 
1444:     MatDestroy(&user->Ones); 
1445:   }

1447:   return(0);
1448: }

1452: PetscErrorCode EllipticMonitor(TaoSolver tao, void *ptr)
1453: {
1455:   Vec X;
1456:   PetscReal unorm,ynorm;
1457:   AppCtx *user = (AppCtx*)ptr;
1459:   TaoGetSolutionVector(tao,&X); 
1460:   Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter); 
1461:   VecAXPY(user->ywork,-1.0,user->ytrue); 
1462:   VecAXPY(user->uwork,-1.0,user->utrue); 
1463:   VecNorm(user->uwork,NORM_2,&unorm); 
1464:   VecNorm(user->ywork,NORM_2,&ynorm); 
1465:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%G ||y-yt||=%G\n",unorm,ynorm); 
1466:   return(0);
1467: }