Actual source code: elliptic.c

petsc-3.10.2 2018-10-09
Report Typos and Errors
  1: #include <petsc/private/taoimpl.h>

  3: /*T
  4:    Concepts: TAO^Solving a system of nonlinear equations, nonlinear least squares
  5:    Routines: TaoCreate();
  6:    Routines: TaoSetType();
  7:    Routines: TaoSetInitialVector();
  8:    Routines: TaoSetObjectiveRoutine();
  9:    Routines: TaoSetGradientRoutine();
 10:    Routines: TaoSetConstraintsRoutine();
 11:    Routines: TaoSetJacobianStateRoutine();
 12:    Routines: TaoSetJacobianDesignRoutine();
 13:    Routines: TaoSetStateDesignIS();
 14:    Routines: TaoSetFromOptions();
 15:    Routines: TaoSetHistory(); TaoGetHistory();
 16:    Routines: TaoSolve();
 17:    Routines: TaoDestroy();
 18:    Processors: n
 19: T*/



 23: typedef struct {
 24:   PetscInt n; /* Number of total variables */
 25:   PetscInt m; /* Number of constraints */
 26:   PetscInt nstate;
 27:   PetscInt ndesign;
 28:   PetscInt mx; /* grid points in each direction */
 29:   PetscInt ns; /* Number of data samples (1<=ns<=8)
 30:                   Currently only ns=1 is supported */
 31:   PetscInt ndata; /* Number of data points per sample */
 32:   IS       s_is;
 33:   IS       d_is;

 35:   VecScatter state_scatter;
 36:   VecScatter design_scatter;
 37:   VecScatter *yi_scatter, *di_scatter;
 38:   Vec        suby,subq,subd;
 39:   Mat        Js,Jd,JsPrec,JsInv,JsBlock;

 41:   PetscReal alpha; /* Regularization parameter */
 42:   PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */
 43:   PetscReal noise; /* Amount of noise to add to data */
 44:   PetscReal *ones;
 45:   Mat       Q;
 46:   Mat       MQ;
 47:   Mat       L;

 49:   Mat Grad;
 50:   Mat Av,Avwork;
 51:   Mat Div, Divwork;
 52:   Mat DSG;
 53:   Mat Diag,Ones;


 56:   Vec q;
 57:   Vec ur; /* reference */

 59:   Vec d;
 60:   Vec dwork;

 62:   Vec x; /* super vec of y,u */

 64:   Vec y; /* state variables */
 65:   Vec ywork;

 67:   Vec ytrue;

 69:   Vec u; /* design variables */
 70:   Vec uwork;

 72:   Vec utrue;

 74:   Vec js_diag;

 76:   Vec c; /* constraint vector */
 77:   Vec cwork;

 79:   Vec lwork;
 80:   Vec S;
 81:   Vec Swork,Twork,Sdiag,Ywork;
 82:   Vec Av_u;

 84:   KSP solver;
 85:   PC  prec;

 87:   PetscReal tola,tolb,tolc,told;
 88:   PetscInt  ksp_its;
 89:   PetscInt  ksp_its_initial;
 90:   PetscLogStage stages[10];
 91:   PetscBool use_ptap;
 92:   PetscBool use_lrc;
 93: } AppCtx;

 95: PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
 96: PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
 97: PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
 98: PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
 99: PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
100: PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
101: PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
102: PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
103: PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
104: PetscErrorCode EllipticInitialize(AppCtx*);
105: PetscErrorCode EllipticDestroy(AppCtx*);
106: PetscErrorCode EllipticMonitor(Tao, void*);

108: PetscErrorCode StateBlockMatMult(Mat,Vec,Vec);
109: PetscErrorCode StateMatMult(Mat,Vec,Vec);

111: PetscErrorCode StateInvMatMult(Mat,Vec,Vec);
112: PetscErrorCode DesignMatMult(Mat,Vec,Vec);
113: PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);

115: PetscErrorCode QMatMult(Mat,Vec,Vec);
116: PetscErrorCode QMatMultTranspose(Mat,Vec,Vec);

118: static  char help[]="";

120: int main(int argc, char **argv)
121: {
122:   PetscErrorCode     ierr;
123:   Vec                x0;
124:   Tao                tao;
125:   AppCtx             user;
126:   PetscInt           ntests = 1;
127:   PetscInt           i;

129:   PetscInitialize(&argc, &argv, (char*)0,help);if (ierr) return ierr;
130:   user.mx = 8;
131:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,NULL,NULL);
132:   PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL);
133:   user.ns = 6;
134:   PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,NULL);
135:   user.ndata = 64;
136:   PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL);
137:   user.alpha = 0.1;
138:   PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL);
139:   user.beta = 0.00001;
140:   PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL);
141:   user.noise = 0.01;
142:   PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL);

144:   user.use_ptap = PETSC_FALSE;
145:   PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",user.use_ptap,&user.use_ptap,NULL);
146:   user.use_lrc = PETSC_FALSE;
147:   PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",user.use_lrc,&user.use_lrc,NULL);
148:   user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */
149:   user.nstate =  user.m;
150:   user.ndesign = user.mx*user.mx*user.mx;
151:   user.n = user.nstate + user.ndesign; /* number of variables */

153:   /* Create TAO solver and set desired solution method */
154:   TaoCreate(PETSC_COMM_WORLD,&tao);
155:   TaoSetType(tao,TAOLCL);

157:   /* Set up initial vectors and matrices */
158:   EllipticInitialize(&user);

160:   Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter);
161:   VecDuplicate(user.x,&x0);
162:   VecCopy(user.x,x0);

164:   /* Set solution vector with an initial guess */
165:   TaoSetInitialVector(tao,user.x);
166:   TaoSetObjectiveRoutine(tao, FormFunction, (void *)&user);
167:   TaoSetGradientRoutine(tao, FormGradient, (void *)&user);
168:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, (void *)&user);

170:   TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, (void *)&user);
171:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, (void *)&user);

173:   TaoSetStateDesignIS(tao,user.s_is,user.d_is);
174:   TaoSetFromOptions(tao);

176:   /* SOLVE THE APPLICATION */
177:   PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL);
178:   PetscOptionsEnd();
179:   PetscLogStageRegister("Trials",&user.stages[1]);
180:   PetscLogStagePush(user.stages[1]);
181:   for (i=0; i<ntests; i++){
182:     TaoSolve(tao);
183:     PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its);
184:     VecCopy(x0,user.x);
185:   }
186:   PetscLogStagePop();
187:   PetscBarrier((PetscObject)user.x);
188:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");
189:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);

191:   TaoDestroy(&tao);
192:   VecDestroy(&x0);
193:   EllipticDestroy(&user);
194:   PetscFinalize();
195:   return ierr;
196: }
197: /* ------------------------------------------------------------------- */
198: /*
199:    dwork = Qy - d
200:    lwork = L*(u-ur)
201:    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
202: */
203: PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
204: {
206:   PetscReal      d1=0,d2=0;
207:   AppCtx         *user = (AppCtx*)ptr;

210:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
211:   MatMult(user->MQ,user->y,user->dwork);
212:   VecAXPY(user->dwork,-1.0,user->d);
213:   VecDot(user->dwork,user->dwork,&d1);
214:   VecWAXPY(user->uwork,-1.0,user->ur,user->u);
215:   MatMult(user->L,user->uwork,user->lwork);
216:   VecDot(user->lwork,user->lwork,&d2);
217:   *f = 0.5 * (d1 + user->alpha*d2);
218:   return(0);
219: }

221: /* ------------------------------------------------------------------- */
222: /*
223:     state: g_s = Q' *(Qy - d)
224:     design: g_d = alpha*L'*L*(u-ur)
225: */
226: PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
227: {
229:   AppCtx         *user = (AppCtx*)ptr;

232:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
233:   MatMult(user->MQ,user->y,user->dwork);
234:   VecAXPY(user->dwork,-1.0,user->d);
235:   MatMultTranspose(user->MQ,user->dwork,user->ywork);
236:   VecWAXPY(user->uwork,-1.0,user->ur,user->u);
237:   MatMult(user->L,user->uwork,user->lwork);
238:   MatMultTranspose(user->L,user->lwork,user->uwork);
239:   VecScale(user->uwork, user->alpha);
240:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
241:   return(0);
242: }

244: PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
245: {
247:   PetscReal      d1,d2;
248:   AppCtx         *user = (AppCtx*)ptr;

251:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
252:   MatMult(user->MQ,user->y,user->dwork);
253:   VecAXPY(user->dwork,-1.0,user->d);
254:   VecDot(user->dwork,user->dwork,&d1);
255:   MatMultTranspose(user->MQ,user->dwork,user->ywork);

257:   VecWAXPY(user->uwork,-1.0,user->ur,user->u);
258:   MatMult(user->L,user->uwork,user->lwork);
259:   VecDot(user->lwork,user->lwork,&d2);
260:   MatMultTranspose(user->L,user->lwork,user->uwork);
261:   VecScale(user->uwork, user->alpha);
262:   *f = 0.5 * (d1 + user->alpha*d2);
263:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
264:   return(0);
265: }

267: /* ------------------------------------------------------------------- */
268: /* A
269: MatShell object
270: */
271: PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
272: {
274:   AppCtx         *user = (AppCtx*)ptr;

277:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
278:   /* DSG = Div * (1/Av_u) * Grad */
279:   VecSet(user->uwork,0);
280:   VecAXPY(user->uwork,-1.0,user->u);
281:   VecExp(user->uwork);
282:   MatMult(user->Av,user->uwork,user->Av_u);
283:   VecCopy(user->Av_u,user->Swork);
284:   VecReciprocal(user->Swork);
285:   if (user->use_ptap) {
286:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES);
287:     MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);
288:   } else {
289:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN);
290:     MatDiagonalScale(user->Divwork,NULL,user->Swork);
291:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG);
292:   }
293:   return(0);
294: }
295: /* ------------------------------------------------------------------- */
296: /* B */
297: PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
298: {
300:   AppCtx         *user = (AppCtx*)ptr;

303:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
304:   return(0);
305: }

307: PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y)
308: {
310:   PetscReal      sum;
311:   AppCtx         *user;

314:   MatShellGetContext(J_shell,(void**)&user);
315:   MatMult(user->DSG,X,Y);
316:   VecSum(X,&sum);
317:   sum /= user->ndesign;
318:   VecShift(Y,sum);
319:   return(0);
320: }

322: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
323: {
325:   PetscInt       i;
326:   AppCtx         *user;

329:   MatShellGetContext(J_shell,(void**)&user);
330:   if (user->ns == 1) {
331:     MatMult(user->JsBlock,X,Y);
332:   } else {
333:     for (i=0;i<user->ns;i++) {
334:       Scatter(X,user->subq,user->yi_scatter[i],0,0);
335:       Scatter(Y,user->suby,user->yi_scatter[i],0,0);
336:       MatMult(user->JsBlock,user->subq,user->suby);
337:       Gather(Y,user->suby,user->yi_scatter[i],0,0);
338:     }
339:   }
340:   return(0);
341: }

343: PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y)
344: {
346:   PetscInt       its,i;
347:   AppCtx         *user;

350:   MatShellGetContext(J_shell,(void**)&user);
351:   KSPSetOperators(user->solver,user->JsBlock,user->DSG);
352:   if (Y == user->ytrue) {
353:     /* First solve is done using true solution to set up problem */
354:     KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
355:   } else {
356:     KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
357:   }
358:   if (user->ns == 1) {
359:     KSPSolve(user->solver,X,Y);
360:     KSPGetIterationNumber(user->solver,&its);
361:     user->ksp_its+=its;
362:   } else {
363:     for (i=0;i<user->ns;i++) {
364:       Scatter(X,user->subq,user->yi_scatter[i],0,0);
365:       Scatter(Y,user->suby,user->yi_scatter[i],0,0);
366:       KSPSolve(user->solver,user->subq,user->suby);
367:       KSPGetIterationNumber(user->solver,&its);
368:       user->ksp_its+=its;
369:       Gather(Y,user->suby,user->yi_scatter[i],0,0);
370:     }
371:   }
372:   return(0);
373: }
374: PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
375: {
377:   AppCtx         *user;
378:   PetscInt       i;

381:   MatShellGetContext(J_shell,(void**)&user);
382:   if (user->ns == 1) {
383:     MatMult(user->Q,X,Y);
384:   } else {
385:     for (i=0;i<user->ns;i++) {
386:       Scatter(X,user->subq,user->yi_scatter[i],0,0);
387:       Scatter(Y,user->subd,user->di_scatter[i],0,0);
388:       MatMult(user->Q,user->subq,user->subd);
389:       Gather(Y,user->subd,user->di_scatter[i],0,0);
390:     }
391:   }
392:   return(0);
393: }

395: PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
396: {
398:   AppCtx         *user;
399:   PetscInt       i;

402:   MatShellGetContext(J_shell,(void**)&user);
403:   if (user->ns == 1) {
404:     MatMultTranspose(user->Q,X,Y);
405:   } else {
406:     for (i=0;i<user->ns;i++) {
407:       Scatter(X,user->subd,user->di_scatter[i],0,0);
408:       Scatter(Y,user->suby,user->yi_scatter[i],0,0);
409:       MatMultTranspose(user->Q,user->subd,user->suby);
410:       Gather(Y,user->suby,user->yi_scatter[i],0,0);
411:     }
412:   }
413:   return(0);
414: }

416: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
417: {
419:   PetscInt       i;
420:   AppCtx         *user;

423:   MatShellGetContext(J_shell,(void**)&user);

425:   /* sdiag(1./v) */
426:   VecSet(user->uwork,0);
427:   VecAXPY(user->uwork,-1.0,user->u);
428:   VecExp(user->uwork);

430:   /* sdiag(1./((Av*(1./v)).^2)) */
431:   MatMult(user->Av,user->uwork,user->Swork);
432:   VecPointwiseMult(user->Swork,user->Swork,user->Swork);
433:   VecReciprocal(user->Swork);

435:   /* (Av * (sdiag(1./v) * b)) */
436:   VecPointwiseMult(user->uwork,user->uwork,X);
437:   MatMult(user->Av,user->uwork,user->Twork);

439:   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
440:   VecPointwiseMult(user->Swork,user->Twork,user->Swork);

442:   if (user->ns == 1) {
443:     /* (sdiag(Grad*y(:,i)) */
444:     MatMult(user->Grad,user->y,user->Twork);

446:     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
447:     VecPointwiseMult(user->Swork,user->Twork,user->Swork);
448:     MatMultTranspose(user->Grad,user->Swork,Y);
449:   } else {
450:     for (i=0;i<user->ns;i++) {
451:       Scatter(user->y,user->suby,user->yi_scatter[i],0,0);
452:       Scatter(Y,user->subq,user->yi_scatter[i],0,0);

454:       MatMult(user->Grad,user->suby,user->Twork);
455:       VecPointwiseMult(user->Twork,user->Twork,user->Swork);
456:       MatMultTranspose(user->Grad,user->Twork,user->subq);
457:       Gather(user->y,user->suby,user->yi_scatter[i],0,0);
458:       Gather(Y,user->subq,user->yi_scatter[i],0,0);
459:     }
460:   }
461:   return(0);
462: }

464: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
465: {
467:   PetscInt       i;
468:   AppCtx         *user;

471:   MatShellGetContext(J_shell,(void**)&user);
472:   VecZeroEntries(Y);

474:   /* Sdiag = 1./((Av*(1./v)).^2) */
475:   VecSet(user->uwork,0);
476:   VecAXPY(user->uwork,-1.0,user->u);
477:   VecExp(user->uwork);
478:   MatMult(user->Av,user->uwork,user->Swork);
479:   VecPointwiseMult(user->Sdiag,user->Swork,user->Swork);
480:   VecReciprocal(user->Sdiag);

482:   for (i=0;i<user->ns;i++) {
483:     Scatter(X,user->subq,user->yi_scatter[i],0,0);
484:     Scatter(user->y,user->suby,user->yi_scatter[i],0,0);

486:     /* Swork = (Div' * b(:,i)) */
487:     MatMult(user->Grad,user->subq,user->Swork);

489:     /* Twork = Grad*y(:,i) */
490:     MatMult(user->Grad,user->suby,user->Twork);

492:     /* Twork = sdiag(Twork) * Swork */
493:     VecPointwiseMult(user->Twork,user->Swork,user->Twork);


496:     /* Swork = pointwisemult(Sdiag,Twork) */
497:     VecPointwiseMult(user->Swork,user->Twork,user->Sdiag);

499:     /* Ywork = Av' * Swork */
500:     MatMultTranspose(user->Av,user->Swork,user->Ywork);

502:     /* Ywork = pointwisemult(uwork,Ywork) */
503:     VecPointwiseMult(user->Ywork,user->uwork,user->Ywork);
504:     VecAXPY(Y,1.0,user->Ywork);
505:     Gather(user->y,user->suby,user->yi_scatter[i],0,0);
506:   }
507:   return(0);
508: }

510: PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
511: {
512:    /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
514:    PetscReal      sum;
515:    PetscInt       i;
516:    AppCtx         *user = (AppCtx*)ptr;

519:    Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
520:    if (user->ns == 1) {
521:      MatMult(user->Grad,user->y,user->Swork);
522:      VecPointwiseDivide(user->Swork,user->Swork,user->Av_u);
523:      MatMultTranspose(user->Grad,user->Swork,C);
524:      VecSum(user->y,&sum);
525:      sum /= user->ndesign;
526:      VecShift(C,sum);
527:    } else {
528:      for (i=0;i<user->ns;i++) {
529:       Scatter(user->y,user->suby,user->yi_scatter[i],0,0);
530:       Scatter(C,user->subq,user->yi_scatter[i],0,0);
531:       MatMult(user->Grad,user->suby,user->Swork);
532:       VecPointwiseDivide(user->Swork,user->Swork,user->Av_u);
533:       MatMultTranspose(user->Grad,user->Swork,user->subq);

535:       VecSum(user->suby,&sum);
536:       sum /= user->ndesign;
537:       VecShift(user->subq,sum);

539:       Gather(user->y,user->suby,user->yi_scatter[i],0,0);
540:       Gather(C,user->subq,user->yi_scatter[i],0,0);
541:      }
542:    }
543:    VecAXPY(C,-1.0,user->q);
544:    return(0);
545: }

547: PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
548: {

552:   VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD);
553:   VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD);
554:   if (sub2) {
555:     VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD);
556:     VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD);
557:   }
558:   return(0);
559: }

561: PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
562: {

566:   VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE);
567:   VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE);
568:   if (sub2) {
569:     VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE);
570:     VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE);
571:   }
572:   return(0);
573: }

575: PetscErrorCode EllipticInitialize(AppCtx *user)
576: {
578:   PetscInt       m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock;
579:   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork;
580:   PetscReal      *x,*y,*z;
581:   PetscReal      h,meanut;
582:   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
583:   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
584:   IS             is_alldesign,is_allstate;
585:   IS             is_from_d;
586:   IS             is_from_y;
587:   PetscInt       lo,hi,hi2,lo2,ysubnlocal,dsubnlocal;
588:   const PetscInt *ranges, *subranges;
589:   PetscMPIInt    size;
590:   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
591:   PetscScalar    v,vx,vy,vz;
592:   PetscInt       offset,subindex,subvec,nrank,kk;

594:   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
595:                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
596:                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
597:                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
598:                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
599:                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
600:                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
601:                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};

603:   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
604:                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
605:                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
606:                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
607:                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
608:                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
609:                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
610:                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};

612:   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
613:                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
614:                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
615:                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
616:                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
617:                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
618:                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
619:                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};

622:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
623:   PetscLogStageRegister("Elliptic Setup",&user->stages[0]);
624:   PetscLogStagePush(user->stages[0]);

626:   /* Create u,y,c,x */
627:   VecCreate(PETSC_COMM_WORLD,&user->u);
628:   VecCreate(PETSC_COMM_WORLD,&user->y);
629:   VecCreate(PETSC_COMM_WORLD,&user->c);
630:   VecSetSizes(user->u,PETSC_DECIDE,user->ndesign);
631:   VecSetFromOptions(user->u);
632:   VecGetLocalSize(user->u,&ysubnlocal);
633:   VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate);
634:   VecSetSizes(user->c,ysubnlocal*user->ns,user->m);
635:   VecSetFromOptions(user->y);
636:   VecSetFromOptions(user->c);

638:   /*
639:      *******************************
640:      Create scatters for x <-> y,u
641:      *******************************

643:      If the state vector y and design vector u are partitioned as
644:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
645:      then the solution vector x is organized as
646:      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
647:      The index sets user->s_is and user->d_is correspond to the indices of the
648:      state and design variables owned by the current processor.
649:   */
650:   VecCreate(PETSC_COMM_WORLD,&user->x);

652:   VecGetOwnershipRange(user->y,&lo,&hi);
653:   VecGetOwnershipRange(user->u,&lo2,&hi2);

655:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate);
656:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is);
657:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign);
658:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is);

660:   VecSetSizes(user->x,hi-lo+hi2-lo2,user->n);
661:   VecSetFromOptions(user->x);

663:   VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter);
664:   VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter);
665:   ISDestroy(&is_alldesign);
666:   ISDestroy(&is_allstate);
667:   /*
668:      *******************************
669:      Create scatter from y to y_1,y_2,...,y_ns
670:      *******************************
671:   */
672:   PetscMalloc1(user->ns,&user->yi_scatter);
673:   VecDuplicate(user->u,&user->suby);
674:   VecDuplicate(user->u,&user->subq);

676:   VecGetOwnershipRange(user->y,&lo2,&hi2);
677:   istart = 0;
678:   for (i=0; i<user->ns; i++){
679:     VecGetOwnershipRange(user->suby,&lo,&hi);
680:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y);
681:     VecScatterCreate(user->y,is_from_y,user->suby,NULL,&user->yi_scatter[i]);
682:     istart = istart + hi-lo;
683:     ISDestroy(&is_from_y);
684:   }
685:   /*
686:      *******************************
687:      Create scatter from d to d_1,d_2,...,d_ns
688:      *******************************
689:   */
690:   VecCreate(PETSC_COMM_WORLD,&user->subd);
691:   VecSetSizes(user->subd,PETSC_DECIDE,user->ndata);
692:   VecSetFromOptions(user->subd);
693:   VecCreate(PETSC_COMM_WORLD,&user->d);
694:   VecGetLocalSize(user->subd,&dsubnlocal);
695:   VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns);
696:   VecSetFromOptions(user->d);
697:   PetscMalloc1(user->ns,&user->di_scatter);

699:   VecGetOwnershipRange(user->d,&lo2,&hi2);
700:   istart = 0;
701:   for (i=0; i<user->ns; i++){
702:     VecGetOwnershipRange(user->subd,&lo,&hi);
703:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d);
704:     VecScatterCreate(user->d,is_from_d,user->subd,NULL,&user->di_scatter[i]);
705:     istart = istart + hi-lo;
706:     ISDestroy(&is_from_d);
707:   }

709:   PetscMalloc1(user->mx,&x);
710:   PetscMalloc1(user->mx,&y);
711:   PetscMalloc1(user->mx,&z);

713:   user->ksp_its = 0;
714:   user->ksp_its_initial = 0;

716:   n = user->mx * user->mx * user->mx;
717:   m = 3 * user->mx * user->mx * (user->mx-1);
718:   sqrt_beta = PetscSqrtScalar(user->beta);

720:   VecCreate(PETSC_COMM_WORLD,&XX);
721:   VecCreate(PETSC_COMM_WORLD,&user->q);
722:   VecSetSizes(XX,ysubnlocal,n);
723:   VecSetSizes(user->q,ysubnlocal*user->ns,user->m);
724:   VecSetFromOptions(XX);
725:   VecSetFromOptions(user->q);

727:   VecDuplicate(XX,&YY);
728:   VecDuplicate(XX,&ZZ);
729:   VecDuplicate(XX,&XXwork);
730:   VecDuplicate(XX,&YYwork);
731:   VecDuplicate(XX,&ZZwork);
732:   VecDuplicate(XX,&UTwork);
733:   VecDuplicate(XX,&user->utrue);

735:   /* map for striding q */
736:   VecGetOwnershipRanges(user->q,&ranges);
737:   VecGetOwnershipRanges(user->u,&subranges);

739:   VecGetOwnershipRange(user->q,&lo2,&hi2);
740:   VecGetOwnershipRange(user->u,&lo,&hi);
741:   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
742:   h = 1.0/user->mx;
743:   hinv = user->mx;
744:   neg_hinv = -hinv;

746:   VecGetOwnershipRange(XX,&istart,&iend);
747:   for (linear_index=istart; linear_index<iend; linear_index++){
748:     i = linear_index % user->mx;
749:     j = ((linear_index-i)/user->mx) % user->mx;
750:     k = ((linear_index-i)/user->mx-j) / user->mx;
751:     vx = h*(i+0.5);
752:     vy = h*(j+0.5);
753:     vz = h*(k+0.5);
754:     VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES);
755:     VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES);
756:     VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES);
757:     for (is=0; is<2; is++){
758:       for (js=0; js<2; js++){
759:         for (ks=0; ks<2; ks++){
760:           ls = is*4 + js*2 + ks;
761:           if (ls<user->ns){
762:             l =ls*n + linear_index;
763:             /* remap */
764:             subindex = l%n;
765:             subvec = l/n;
766:             nrank=0;
767:             while (subindex >= subranges[nrank+1]) nrank++;
768:             offset = subindex - subranges[nrank];
769:             istart=0;
770:             for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]);
771:             istart += (subranges[nrank+1]-subranges[nrank])*subvec;
772:             l = istart+offset;
773:             v = 100*PetscSinScalar(2*PETSC_PI*(vx+0.25*is))*PetscSinScalar(2*PETSC_PI*(vy+0.25*js))*PetscSinScalar(2*PETSC_PI*(vz+0.25*ks));
774:             VecSetValues(user->q,1,&l,&v,INSERT_VALUES);
775:           }
776:         }
777:       }
778:     }
779:   }

781:   VecAssemblyBegin(XX);
782:   VecAssemblyEnd(XX);
783:   VecAssemblyBegin(YY);
784:   VecAssemblyEnd(YY);
785:   VecAssemblyBegin(ZZ);
786:   VecAssemblyEnd(ZZ);
787:   VecAssemblyBegin(user->q);
788:   VecAssemblyEnd(user->q);

790:   /* Compute true parameter function
791:      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
792:   VecCopy(XX,XXwork);
793:   VecCopy(YY,YYwork);
794:   VecCopy(ZZ,ZZwork);

796:   VecShift(XXwork,-0.25);
797:   VecShift(YYwork,-0.25);
798:   VecShift(ZZwork,-0.25);

800:   VecPointwiseMult(XXwork,XXwork,XXwork);
801:   VecPointwiseMult(YYwork,YYwork,YYwork);
802:   VecPointwiseMult(ZZwork,ZZwork,ZZwork);

804:   VecCopy(XXwork,UTwork);
805:   VecAXPY(UTwork,1.0,YYwork);
806:   VecAXPY(UTwork,1.0,ZZwork);
807:   VecScale(UTwork,-20.0);
808:   VecExp(UTwork);
809:   VecCopy(UTwork,user->utrue);

811:   VecCopy(XX,XXwork);
812:   VecCopy(YY,YYwork);
813:   VecCopy(ZZ,ZZwork);

815:   VecShift(XXwork,-0.75);
816:   VecShift(YYwork,-0.75);
817:   VecShift(ZZwork,-0.75);

819:   VecPointwiseMult(XXwork,XXwork,XXwork);
820:   VecPointwiseMult(YYwork,YYwork,YYwork);
821:   VecPointwiseMult(ZZwork,ZZwork,ZZwork);

823:   VecCopy(XXwork,UTwork);
824:   VecAXPY(UTwork,1.0,YYwork);
825:   VecAXPY(UTwork,1.0,ZZwork);
826:   VecScale(UTwork,-20.0);
827:   VecExp(UTwork);

829:   VecAXPY(user->utrue,-1.0,UTwork);

831:   VecDestroy(&XX);
832:   VecDestroy(&YY);
833:   VecDestroy(&ZZ);
834:   VecDestroy(&XXwork);
835:   VecDestroy(&YYwork);
836:   VecDestroy(&ZZwork);
837:   VecDestroy(&UTwork);

839:   /* Initial guess and reference model */
840:   VecDuplicate(user->utrue,&user->ur);
841:   VecSum(user->utrue,&meanut);
842:   meanut = meanut / n;
843:   VecSet(user->ur,meanut);
844:   VecCopy(user->ur,user->u);

846:   /* Generate Grad matrix */
847:   MatCreate(PETSC_COMM_WORLD,&user->Grad);
848:   MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n);
849:   MatSetFromOptions(user->Grad);
850:   MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL);
851:   MatSeqAIJSetPreallocation(user->Grad,2,NULL);
852:   MatGetOwnershipRange(user->Grad,&istart,&iend);

854:   for (i=istart; i<iend; i++){
855:     if (i<m/3){
856:       iblock = i / (user->mx-1);
857:       j = iblock*user->mx + (i % (user->mx-1));
858:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
859:       j = j+1;
860:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES);
861:     }
862:     if (i>=m/3 && i<2*m/3){
863:       iblock = (i-m/3) / (user->mx*(user->mx-1));
864:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
865:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
866:       j = j + user->mx;
867:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES);
868:     }
869:     if (i>=2*m/3){
870:       j = i-2*m/3;
871:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
872:       j = j + user->mx*user->mx;
873:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES);
874:     }
875:   }

877:   MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY);
878:   MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY);

880:   /* Generate arithmetic averaging matrix Av */
881:   MatCreate(PETSC_COMM_WORLD,&user->Av);
882:   MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n);
883:   MatSetFromOptions(user->Av);
884:   MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL);
885:   MatSeqAIJSetPreallocation(user->Av,2,NULL);
886:   MatGetOwnershipRange(user->Av,&istart,&iend);

888:   for (i=istart; i<iend; i++){
889:     if (i<m/3){
890:       iblock = i / (user->mx-1);
891:       j = iblock*user->mx + (i % (user->mx-1));
892:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
893:       j = j+1;
894:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
895:     }
896:     if (i>=m/3 && i<2*m/3){
897:       iblock = (i-m/3) / (user->mx*(user->mx-1));
898:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
899:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
900:       j = j + user->mx;
901:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
902:     }
903:     if (i>=2*m/3){
904:       j = i-2*m/3;
905:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
906:       j = j + user->mx*user->mx;
907:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
908:     }
909:   }

911:   MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY);
912:   MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY);

914:   MatCreate(PETSC_COMM_WORLD,&user->L);
915:   MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n);
916:   MatSetFromOptions(user->L);
917:   MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL);
918:   MatSeqAIJSetPreallocation(user->L,2,NULL);
919:   MatGetOwnershipRange(user->L,&istart,&iend);

921:   for (i=istart; i<iend; i++){
922:     if (i<m/3){
923:       iblock = i / (user->mx-1);
924:       j = iblock*user->mx + (i % (user->mx-1));
925:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
926:       j = j+1;
927:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES);
928:     }
929:     if (i>=m/3 && i<2*m/3){
930:       iblock = (i-m/3) / (user->mx*(user->mx-1));
931:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
932:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
933:       j = j + user->mx;
934:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES);
935:     }
936:     if (i>=2*m/3 && i<m){
937:       j = i-2*m/3;
938:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
939:       j = j + user->mx*user->mx;
940:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES);
941:     }
942:     if (i>=m){
943:       j = i - m;
944:       MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES);
945:     }
946:   }
947:   MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY);
948:   MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY);
949:   MatScale(user->L,PetscPowScalar(h,1.5));

951:   /* Generate Div matrix */
952:   if (!user->use_ptap) {
953:     /* Generate Div matrix */
954:     MatCreate(PETSC_COMM_WORLD,&user->Div);
955:     MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m);
956:     MatSetFromOptions(user->Div);
957:     MatMPIAIJSetPreallocation(user->Div,4,NULL,4,NULL);
958:     MatSeqAIJSetPreallocation(user->Div,6,NULL);
959:     MatGetOwnershipRange(user->Grad,&istart,&iend);

961:     for (i=istart; i<iend; i++){
962:       if (i<m/3){
963:         iblock = i / (user->mx-1);
964:         j = iblock*user->mx + (i % (user->mx-1));
965:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES);
966:         j = j+1;
967:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES);
968:       }
969:       if (i>=m/3 && i<2*m/3){
970:         iblock = (i-m/3) / (user->mx*(user->mx-1));
971:         j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
972:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES);
973:         j = j + user->mx;
974:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES);
975:       }
976:       if (i>=2*m/3){
977:         j = i-2*m/3;
978:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES);
979:         j = j + user->mx*user->mx;
980:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES);
981:       }
982:     }

984:     MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY);
985:     MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY);
986:     MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork);
987:   } else {
988:     MatCreate(PETSC_COMM_WORLD,&user->Diag);
989:     MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m);
990:     MatSetFromOptions(user->Diag);
991:     MatMPIAIJSetPreallocation(user->Diag,1,NULL,0,NULL);
992:     MatSeqAIJSetPreallocation(user->Diag,1,NULL);
993:   }

995:   /* Build work vectors and matrices */
996:   VecCreate(PETSC_COMM_WORLD,&user->S);
997:   VecSetSizes(user->S, PETSC_DECIDE, m);
998:   VecSetFromOptions(user->S);

1000:   VecCreate(PETSC_COMM_WORLD,&user->lwork);
1001:   VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx);
1002:   VecSetFromOptions(user->lwork);

1004:   MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork);

1006:   VecDuplicate(user->S,&user->Swork);
1007:   VecDuplicate(user->S,&user->Sdiag);
1008:   VecDuplicate(user->S,&user->Av_u);
1009:   VecDuplicate(user->S,&user->Twork);
1010:   VecDuplicate(user->y,&user->ywork);
1011:   VecDuplicate(user->u,&user->Ywork);
1012:   VecDuplicate(user->u,&user->uwork);
1013:   VecDuplicate(user->u,&user->js_diag);
1014:   VecDuplicate(user->c,&user->cwork);
1015:   VecDuplicate(user->d,&user->dwork);

1017:   /* Create a matrix-free shell user->Jd for computing B*x */
1018:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd);
1019:   MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult);
1020:   MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose);

1022:   /* Compute true state function ytrue given utrue */
1023:   VecDuplicate(user->y,&user->ytrue);

1025:   /* First compute Av_u = Av*exp(-u) */
1026:   VecSet(user->uwork, 0);
1027:   VecAXPY(user->uwork,-1.0,user->utrue); /* Note: user->utrue */
1028:   VecExp(user->uwork);
1029:   MatMult(user->Av,user->uwork,user->Av_u);

1031:   /* Next form DSG = Div*S*Grad */
1032:   VecCopy(user->Av_u,user->Swork);
1033:   VecReciprocal(user->Swork);
1034:   if (user->use_ptap) {
1035:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES);
1036:     MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG);
1037:   } else {
1038:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN);
1039:     MatDiagonalScale(user->Divwork,NULL,user->Swork);
1040:     MatMatMultSymbolic(user->Divwork,user->Grad,1.0,&user->DSG);
1041:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG);
1042:   }

1044:   MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE);
1045:   MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);

1047:   if (user->use_lrc == PETSC_TRUE) {
1048:     v=PetscSqrtReal(1.0 /user->ndesign);
1049:     PetscMalloc1(user->ndesign,&user->ones);

1051:     for (i=0;i<user->ndesign;i++) {
1052:       user->ones[i]=v;
1053:     }
1054:     MatCreateDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones);
1055:     MatAssemblyBegin(user->Ones, MAT_FINAL_ASSEMBLY);
1056:     MatAssemblyEnd(user->Ones, MAT_FINAL_ASSEMBLY);
1057:     MatCreateLRC(user->DSG,user->Ones,NULL,user->Ones,&user->JsBlock);
1058:     MatSetUp(user->JsBlock);
1059:   } else {
1060:     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1061:     MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock);
1062:     MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult);
1063:     MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult);
1064:   }
1065:   MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE);
1066:   MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);
1067:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js);
1068:   MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult);
1069:   MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult);
1070:   MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE);
1071:   MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);

1073:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv);
1074:   MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult);
1075:   MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult);
1076:   MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE);
1077:   MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);

1079:   MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE);
1080:   MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);
1081:   /* Now solve for ytrue */
1082:   KSPCreate(PETSC_COMM_WORLD,&user->solver);
1083:   KSPSetFromOptions(user->solver);

1085:   KSPSetOperators(user->solver,user->JsBlock,user->DSG);

1087:   MatMult(user->JsInv,user->q,user->ytrue);
1088:   /* First compute Av_u = Av*exp(-u) */
1089:   VecSet(user->uwork,0);
1090:   VecAXPY(user->uwork,-1.0,user->u); /* Note: user->u */
1091:   VecExp(user->uwork);
1092:   MatMult(user->Av,user->uwork,user->Av_u);

1094:   /* Next update DSG = Div*S*Grad  with user->u */
1095:   VecCopy(user->Av_u,user->Swork);
1096:   VecReciprocal(user->Swork);
1097:   if (user->use_ptap) {
1098:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES);
1099:     MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);
1100:   } else {
1101:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN);
1102:     MatDiagonalScale(user->Divwork,NULL,user->Av_u);
1103:     MatMatMultNumeric(user->Divwork,user->Grad,user->DSG);
1104:   }

1106:   /* Now solve for y */

1108:   MatMult(user->JsInv,user->q,user->y);

1110:   user->ksp_its_initial = user->ksp_its;
1111:   user->ksp_its = 0;
1112:   /* Construct projection matrix Q (blocks) */
1113:   MatCreate(PETSC_COMM_WORLD,&user->Q);
1114:   MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign);
1115:   MatSetFromOptions(user->Q);
1116:   MatMPIAIJSetPreallocation(user->Q,8,NULL,8,NULL);
1117:   MatSeqAIJSetPreallocation(user->Q,8,NULL);

1119:   for (i=0; i<user->mx; i++){
1120:     x[i] = h*(i+0.5);
1121:     y[i] = h*(i+0.5);
1122:     z[i] = h*(i+0.5);
1123:   }
1124:   MatGetOwnershipRange(user->Q,&istart,&iend);

1126:   nx = user->mx; ny = user->mx; nz = user->mx;
1127:   for (i=istart; i<iend; i++){

1129:     xri = xr[i];
1130:     im = 0;
1131:     xim = x[im];
1132:     while (xri>xim && im<nx){
1133:       im = im+1;
1134:       xim = x[im];
1135:     }
1136:     indx1 = im-1;
1137:     indx2 = im;
1138:     dx1 = xri - x[indx1];
1139:     dx2 = x[indx2] - xri;

1141:     yri = yr[i];
1142:     im = 0;
1143:     yim = y[im];
1144:     while (yri>yim && im<ny){
1145:       im = im+1;
1146:       yim = y[im];
1147:     }
1148:     indy1 = im-1;
1149:     indy2 = im;
1150:     dy1 = yri - y[indy1];
1151:     dy2 = y[indy2] - yri;

1153:     zri = zr[i];
1154:     im = 0;
1155:     zim = z[im];
1156:     while (zri>zim && im<nz){
1157:       im = im+1;
1158:       zim = z[im];
1159:     }
1160:     indz1 = im-1;
1161:     indz2 = im;
1162:     dz1 = zri - z[indz1];
1163:     dz2 = z[indz2] - zri;

1165:     Dx = x[indx2] - x[indx1];
1166:     Dy = y[indy2] - y[indy1];
1167:     Dz = z[indz2] - z[indz1];

1169:     j = indx1 + indy1*nx + indz1*nx*ny;
1170:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1171:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1173:     j = indx1 + indy1*nx + indz2*nx*ny;
1174:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1175:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1177:     j = indx1 + indy2*nx + indz1*nx*ny;
1178:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1179:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1181:     j = indx1 + indy2*nx + indz2*nx*ny;
1182:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1183:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1185:     j = indx2 + indy1*nx + indz1*nx*ny;
1186:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1187:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1189:     j = indx2 + indy1*nx + indz2*nx*ny;
1190:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1191:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1193:     j = indx2 + indy2*nx + indz1*nx*ny;
1194:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1195:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1197:     j = indx2 + indy2*nx + indz2*nx*ny;
1198:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1199:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);
1200:   }

1202:   MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY);
1203:   MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY);
1204:   /* Create MQ (composed of blocks of Q */
1205:   MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ);
1206:   MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult);
1207:   MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose);

1209:   /* Add noise to the measurement data */
1210:   VecSet(user->ywork,1.0);
1211:   VecAYPX(user->ywork,user->noise,user->ytrue);
1212:   MatMult(user->MQ,user->ywork,user->d);

1214:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1215:   PetscFree(x);
1216:   PetscFree(y);
1217:   PetscFree(z);
1218:   PetscLogStagePop();
1219:   return(0);
1220: }

1222: PetscErrorCode EllipticDestroy(AppCtx *user)
1223: {
1225:   PetscInt       i;

1228:   MatDestroy(&user->DSG);
1229:   KSPDestroy(&user->solver);
1230:   MatDestroy(&user->Q);
1231:   MatDestroy(&user->MQ);
1232:   if (!user->use_ptap) {
1233:     MatDestroy(&user->Div);
1234:     MatDestroy(&user->Divwork);
1235:   } else {
1236:     MatDestroy(&user->Diag);
1237:   }
1238:   if (user->use_lrc) {
1239:     MatDestroy(&user->Ones);
1240:   }

1242:   MatDestroy(&user->Grad);
1243:   MatDestroy(&user->Av);
1244:   MatDestroy(&user->Avwork);
1245:   MatDestroy(&user->L);
1246:   MatDestroy(&user->Js);
1247:   MatDestroy(&user->Jd);
1248:   MatDestroy(&user->JsBlock);
1249:   MatDestroy(&user->JsInv);

1251:   VecDestroy(&user->x);
1252:   VecDestroy(&user->u);
1253:   VecDestroy(&user->uwork);
1254:   VecDestroy(&user->utrue);
1255:   VecDestroy(&user->y);
1256:   VecDestroy(&user->ywork);
1257:   VecDestroy(&user->ytrue);
1258:   VecDestroy(&user->c);
1259:   VecDestroy(&user->cwork);
1260:   VecDestroy(&user->ur);
1261:   VecDestroy(&user->q);
1262:   VecDestroy(&user->d);
1263:   VecDestroy(&user->dwork);
1264:   VecDestroy(&user->lwork);
1265:   VecDestroy(&user->S);
1266:   VecDestroy(&user->Swork);
1267:   VecDestroy(&user->Sdiag);
1268:   VecDestroy(&user->Ywork);
1269:   VecDestroy(&user->Twork);
1270:   VecDestroy(&user->Av_u);
1271:   VecDestroy(&user->js_diag);
1272:   ISDestroy(&user->s_is);
1273:   ISDestroy(&user->d_is);
1274:   VecDestroy(&user->suby);
1275:   VecDestroy(&user->subd);
1276:   VecDestroy(&user->subq);
1277:   VecScatterDestroy(&user->state_scatter);
1278:   VecScatterDestroy(&user->design_scatter);
1279:   for (i=0;i<user->ns;i++) {
1280:     VecScatterDestroy(&user->yi_scatter[i]);
1281:     VecScatterDestroy(&user->di_scatter[i]);
1282:   }
1283:   PetscFree(user->yi_scatter);
1284:   PetscFree(user->di_scatter);
1285:   if (user->use_lrc) {
1286:     PetscFree(user->ones);
1287:     MatDestroy(&user->Ones);
1288:   }
1289:   return(0);
1290: }

1292: PetscErrorCode EllipticMonitor(Tao tao, void *ptr)
1293: {
1295:   Vec            X;
1296:   PetscReal      unorm,ynorm;
1297:   AppCtx         *user = (AppCtx*)ptr;

1300:   TaoGetSolutionVector(tao,&X);
1301:   Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
1302:   VecAXPY(user->ywork,-1.0,user->ytrue);
1303:   VecAXPY(user->uwork,-1.0,user->utrue);
1304:   VecNorm(user->uwork,NORM_2,&unorm);
1305:   VecNorm(user->ywork,NORM_2,&ynorm);
1306:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm);
1307:   return(0);
1308: }




1313: /*TEST

1315:    build:
1316:       requires: !complex

1318:    test:
1319:       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1320:       requires: !single

1322:    test:
1323:       suffix: 2
1324:       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1325:       requires: !single

1327: TEST*/