Actual source code: plog.c

petsc-main 2021-04-16
Report Typos and Errors

  2: /*
  3:       PETSc code to log object creation and destruction and PETSc events.

  5:       This provides the public API used by the rest of PETSc and by users.

  7:       These routines use a private API that is not used elsewhere in PETSc and is not
  8:       accessible to users. The private API is defined in logimpl.h and the utils directory.

 10: */
 11: #include <petsc/private/logimpl.h>
 12: #include <petsctime.h>
 13: #include <petscviewer.h>

 15: PetscErrorCode PetscLogObjectParent(PetscObject p,PetscObject c)
 16: {
 17:   if (!c || !p) return 0;
 18:   c->parent   = p;
 19:   c->parentid = p->id;
 20:   return 0;
 21: }

 23: /*@C
 24:    PetscLogObjectMemory - Adds to an object a count of additional amount of memory that is used by the object.

 26:    Not collective.

 28:    Input Parameters:
 29: +  obj  - the PETSc object
 30: -  mem  - the amount of memory that is being added to the object

 32:    Level: developer

 34:    Developer Notes:
 35:     Currently we do not always do a good job of associating all memory allocations with an object.

 37: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscInitializeNoArguments()

 39: @*/
 40: PetscErrorCode PetscLogObjectMemory(PetscObject p,PetscLogDouble m)
 41: {
 42:   if (!p) return 0;
 43:   p->mem += m;
 44:   return 0;
 45: }

 47: PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT;

 49: #if defined(PETSC_USE_LOG)
 50: #include <petscmachineinfo.h>
 51: #include <petscconfiginfo.h>

 53: /* used in the MPI_XXX() count macros in petsclog.h */

 55: /* Action and object logging variables */
 56: Action    *petsc_actions            = NULL;
 57: Object    *petsc_objects            = NULL;
 58: PetscBool petsc_logActions          = PETSC_FALSE;
 59: PetscBool petsc_logObjects          = PETSC_FALSE;
 60: int       petsc_numActions          = 0, petsc_maxActions = 100;
 61: int       petsc_numObjects          = 0, petsc_maxObjects = 100;
 62: int       petsc_numObjectsDestroyed = 0;

 64: /* Global counters */
 65: PetscLogDouble petsc_BaseTime        = 0.0;
 66: PetscLogDouble petsc_TotalFlops      = 0.0;  /* The number of flops */
 67: PetscLogDouble petsc_tmp_flops       = 0.0;  /* The incremental number of flops */
 68: PetscLogDouble petsc_send_ct         = 0.0;  /* The number of sends */
 69: PetscLogDouble petsc_recv_ct         = 0.0;  /* The number of receives */
 70: PetscLogDouble petsc_send_len        = 0.0;  /* The total length of all sent messages */
 71: PetscLogDouble petsc_recv_len        = 0.0;  /* The total length of all received messages */
 72: PetscLogDouble petsc_isend_ct        = 0.0;  /* The number of immediate sends */
 73: PetscLogDouble petsc_irecv_ct        = 0.0;  /* The number of immediate receives */
 74: PetscLogDouble petsc_isend_len       = 0.0;  /* The total length of all immediate send messages */
 75: PetscLogDouble petsc_irecv_len       = 0.0;  /* The total length of all immediate receive messages */
 76: PetscLogDouble petsc_wait_ct         = 0.0;  /* The number of waits */
 77: PetscLogDouble petsc_wait_any_ct     = 0.0;  /* The number of anywaits */
 78: PetscLogDouble petsc_wait_all_ct     = 0.0;  /* The number of waitalls */
 79: PetscLogDouble petsc_sum_of_waits_ct = 0.0;  /* The total number of waits */
 80: PetscLogDouble petsc_allreduce_ct    = 0.0;  /* The number of reductions */
 81: PetscLogDouble petsc_gather_ct       = 0.0;  /* The number of gathers and gathervs */
 82: PetscLogDouble petsc_scatter_ct      = 0.0;  /* The number of scatters and scattervs */
 83: #if defined(PETSC_HAVE_DEVICE)
 84: PetscLogDouble petsc_ctog_ct         = 0.0;  /* The total number of CPU to GPU copies */
 85: PetscLogDouble petsc_gtoc_ct         = 0.0;  /* The total number of GPU to CPU copies */
 86: PetscLogDouble petsc_ctog_sz         = 0.0;  /* The total size of CPU to GPU copies */
 87: PetscLogDouble petsc_gtoc_sz         = 0.0;  /* The total size of GPU to CPU copies */
 88: PetscLogDouble petsc_gflops          = 0.0;  /* The flops done on a GPU */
 89: PetscLogDouble petsc_gtime           = 0.0;  /* The time spent on a GPU */
 90: #if defined(PETSC_USE_DEBUG)
 91: PetscBool petsc_gtime_inuse = PETSC_FALSE;
 92: #endif
 93: #endif

 95: /* Logging functions */
 96: PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL;
 97: PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL;
 98: PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;
 99: PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;

101: /* Tracing event logging variables */
102: FILE             *petsc_tracefile            = NULL;
103: int              petsc_tracelevel            = 0;
104: const char       *petsc_traceblanks          = "                                                                                                    ";
105: char             petsc_tracespace[128]       = " ";
106: PetscLogDouble   petsc_tracetime             = 0.0;
107: static PetscBool PetscLogInitializeCalled = PETSC_FALSE;

109: PETSC_INTERN PetscErrorCode PetscLogInitialize(void)
110: {
111:   int            stage;
112:   PetscBool      opt;

116:   if (PetscLogInitializeCalled) return(0);
117:   PetscLogInitializeCalled = PETSC_TRUE;

119:   PetscOptionsHasName(NULL,NULL, "-log_exclude_actions", &opt);
120:   if (opt) petsc_logActions = PETSC_FALSE;
121:   PetscOptionsHasName(NULL,NULL, "-log_exclude_objects", &opt);
122:   if (opt) petsc_logObjects = PETSC_FALSE;
123:   if (petsc_logActions) {
124:     PetscMalloc1(petsc_maxActions, &petsc_actions);
125:   }
126:   if (petsc_logObjects) {
127:     PetscMalloc1(petsc_maxObjects, &petsc_objects);
128:   }
129:   PetscLogPHC = PetscLogObjCreateDefault;
130:   PetscLogPHD = PetscLogObjDestroyDefault;
131:   /* Setup default logging structures */
132:   PetscStageLogCreate(&petsc_stageLog);
133:   PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage);

135:   /* All processors sync here for more consistent logging */
136:   MPI_Barrier(PETSC_COMM_WORLD);
137:   PetscTime(&petsc_BaseTime);
138:   PetscLogStagePush(stage);
139:   return(0);
140: }

142: PETSC_INTERN PetscErrorCode PetscLogFinalize(void)
143: {
144:   PetscStageLog  stageLog;

148:   PetscFree(petsc_actions);
149:   PetscFree(petsc_objects);
150:   PetscLogNestedEnd();
151:   PetscLogSet(NULL, NULL);

153:   /* Resetting phase */
154:   PetscLogGetStageLog(&stageLog);
155:   PetscStageLogDestroy(stageLog);

157:   petsc_TotalFlops            = 0.0;
158:   petsc_numActions            = 0;
159:   petsc_numObjects            = 0;
160:   petsc_numObjectsDestroyed   = 0;
161:   petsc_maxActions            = 100;
162:   petsc_maxObjects            = 100;
163:   petsc_actions               = NULL;
164:   petsc_objects               = NULL;
165:   petsc_logActions            = PETSC_FALSE;
166:   petsc_logObjects            = PETSC_FALSE;
167:   petsc_BaseTime              = 0.0;
168:   petsc_TotalFlops            = 0.0;
169:   petsc_tmp_flops             = 0.0;
170:   petsc_send_ct               = 0.0;
171:   petsc_recv_ct               = 0.0;
172:   petsc_send_len              = 0.0;
173:   petsc_recv_len              = 0.0;
174:   petsc_isend_ct              = 0.0;
175:   petsc_irecv_ct              = 0.0;
176:   petsc_isend_len             = 0.0;
177:   petsc_irecv_len             = 0.0;
178:   petsc_wait_ct               = 0.0;
179:   petsc_wait_any_ct           = 0.0;
180:   petsc_wait_all_ct           = 0.0;
181:   petsc_sum_of_waits_ct       = 0.0;
182:   petsc_allreduce_ct          = 0.0;
183:   petsc_gather_ct             = 0.0;
184:   petsc_scatter_ct            = 0.0;
185:   #if defined(PETSC_HAVE_DEVICE)
186:   petsc_ctog_ct               = 0.0;
187:   petsc_gtoc_ct               = 0.0;
188:   petsc_ctog_sz               = 0.0;
189:   petsc_gtoc_sz               = 0.0;
190:   petsc_gflops                = 0.0;
191:   petsc_gtime                 = 0.0;
192:   #endif
193:   PETSC_LARGEST_EVENT         = PETSC_EVENT;
194:   PetscLogPHC                 = NULL;
195:   PetscLogPHD                 = NULL;
196:   petsc_tracefile             = NULL;
197:   petsc_tracelevel            = 0;
198:   petsc_traceblanks           = "                                                                                                    ";
199:   petsc_tracespace[0]         = ' '; petsc_tracespace[1] = 0;
200:   petsc_tracetime             = 0.0;
201:   PETSC_LARGEST_CLASSID       = PETSC_SMALLEST_CLASSID;
202:   PETSC_OBJECT_CLASSID        = 0;
203:   petsc_stageLog              = NULL;
204:   PetscLogInitializeCalled    = PETSC_FALSE;
205:   return(0);
206: }

208: /*@C
209:   PetscLogSet - Sets the logging functions called at the beginning and ending of every event.

211:   Not Collective

213:   Input Parameters:
214: + b - The function called at beginning of event
215: - e - The function called at end of event

217:   Level: developer

219: .seealso: PetscLogDump(), PetscLogDefaultBegin(), PetscLogAllBegin(), PetscLogTraceBegin()
220: @*/
221: PetscErrorCode  PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject),
222:                             PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject))
223: {
225:   PetscLogPLB = b;
226:   PetscLogPLE = e;
227:   return(0);
228: }

230: /*@C
231:   PetscLogDefaultBegin - Turns on logging of objects and events. This logs flop
232:   rates and object creation and should not slow programs down too much.
233:   This routine may be called more than once.

235:   Logically Collective over PETSC_COMM_WORLD

237:   Options Database Keys:
238: . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the
239:                   screen (for code configured with --with-log=1 (which is the default))

241:   Usage:
242: .vb
243:       PetscInitialize(...);
244:       PetscLogDefaultBegin();
245:        ... code ...
246:       PetscLogView(viewer); or PetscLogDump();
247:       PetscFinalize();
248: .ve

250:   Notes:
251:   PetscLogView(viewer) or PetscLogDump() actually cause the printing of
252:   the logging information.

254:   Level: advanced

256: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin()
257: @*/
258: PetscErrorCode  PetscLogDefaultBegin(void)
259: {

263:   PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault);
264:   return(0);
265: }

267: /*@C
268:   PetscLogAllBegin - Turns on extensive logging of objects and events. Logs
269:   all events. This creates large log files and slows the program down.

271:   Logically Collective on PETSC_COMM_WORLD

273:   Options Database Keys:
274: . -log_all - Prints extensive log information

276:   Usage:
277: .vb
278:      PetscInitialize(...);
279:      PetscLogAllBegin();
280:      ... code ...
281:      PetscLogDump(filename);
282:      PetscFinalize();
283: .ve

285:   Notes:
286:   A related routine is PetscLogDefaultBegin() (with the options key -log), which is
287:   intended for production runs since it logs only flop rates and object
288:   creation (and shouldn't significantly slow the programs).

290:   Level: advanced

292: .seealso: PetscLogDump(), PetscLogDefaultBegin(), PetscLogTraceBegin()
293: @*/
294: PetscErrorCode  PetscLogAllBegin(void)
295: {

299:   PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete);
300:   return(0);
301: }

303: /*@C
304:   PetscLogTraceBegin - Activates trace logging.  Every time a PETSc event
305:   begins or ends, the event name is printed.

307:   Logically Collective on PETSC_COMM_WORLD

309:   Input Parameter:
310: . file - The file to print trace in (e.g. stdout)

312:   Options Database Key:
313: . -log_trace [filename] - Activates PetscLogTraceBegin()

315:   Notes:
316:   PetscLogTraceBegin() prints the processor number, the execution time (sec),
317:   then "Event begin:" or "Event end:" followed by the event name.

319:   PetscLogTraceBegin() allows tracing of all PETSc calls, which is useful
320:   to determine where a program is hanging without running in the
321:   debugger.  Can be used in conjunction with the -info option.

323:   Level: intermediate

325: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogDefaultBegin()
326: @*/
327: PetscErrorCode  PetscLogTraceBegin(FILE *file)
328: {

332:   petsc_tracefile = file;

334:   PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace);
335:   return(0);
336: }

338: /*@
339:   PetscLogActions - Determines whether actions are logged for the graphical viewer.

341:   Not Collective

343:   Input Parameter:
344: . flag - PETSC_TRUE if actions are to be logged

346:   Level: intermediate

348:   Note: Logging of actions continues to consume more memory as the program
349:   runs. Long running programs should consider turning this feature off.

351:   Options Database Keys:
352: . -log_exclude_actions - Turns off actions logging

354: .seealso: PetscLogStagePush(), PetscLogStagePop()
355: @*/
356: PetscErrorCode  PetscLogActions(PetscBool flag)
357: {
359:   petsc_logActions = flag;
360:   return(0);
361: }

363: /*@
364:   PetscLogObjects - Determines whether objects are logged for the graphical viewer.

366:   Not Collective

368:   Input Parameter:
369: . flag - PETSC_TRUE if objects are to be logged

371:   Level: intermediate

373:   Note: Logging of objects continues to consume more memory as the program
374:   runs. Long running programs should consider turning this feature off.

376:   Options Database Keys:
377: . -log_exclude_objects - Turns off objects logging

379: .seealso: PetscLogStagePush(), PetscLogStagePop()
380: @*/
381: PetscErrorCode  PetscLogObjects(PetscBool flag)
382: {
384:   petsc_logObjects = flag;
385:   return(0);
386: }

388: /*------------------------------------------------ Stage Functions --------------------------------------------------*/
389: /*@C
390:   PetscLogStageRegister - Attaches a character string name to a logging stage.

392:   Not Collective

394:   Input Parameter:
395: . sname - The name to associate with that stage

397:   Output Parameter:
398: . stage - The stage number

400:   Level: intermediate

402: .seealso: PetscLogStagePush(), PetscLogStagePop()
403: @*/
404: PetscErrorCode  PetscLogStageRegister(const char sname[],PetscLogStage *stage)
405: {
406:   PetscStageLog  stageLog;
407:   PetscLogEvent  event;

411:   PetscLogGetStageLog(&stageLog);
412:   PetscStageLogRegister(stageLog, sname, stage);
413:   /* Copy events already changed in the main stage, this sucks */
414:   PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents);
415:   for (event = 0; event < stageLog->eventLog->numEvents; event++) {
416:     PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event],&stageLog->stageInfo[*stage].eventLog->eventInfo[event]);
417:   }
418:   PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses);
419:   return(0);
420: }

422: /*@C
423:   PetscLogStagePush - This function pushes a stage on the stack.

425:   Not Collective

427:   Input Parameter:
428: . stage - The stage on which to log

430:   Usage:
431:   If the option -log_sumary is used to run the program containing the
432:   following code, then 2 sets of summary data will be printed during
433:   PetscFinalize().
434: .vb
435:       PetscInitialize(int *argc,char ***args,0,0);
436:       [stage 0 of code]
437:       PetscLogStagePush(1);
438:       [stage 1 of code]
439:       PetscLogStagePop();
440:       PetscBarrier(...);
441:       [more stage 0 of code]
442:       PetscFinalize();
443: .ve

445:   Notes:
446:   Use PetscLogStageRegister() to register a stage.

448:   Level: intermediate

450: .seealso: PetscLogStagePop(), PetscLogStageRegister(), PetscBarrier()
451: @*/
452: PetscErrorCode  PetscLogStagePush(PetscLogStage stage)
453: {
454:   PetscStageLog  stageLog;

458:   PetscLogGetStageLog(&stageLog);
459:   PetscStageLogPush(stageLog, stage);
460:   return(0);
461: }

463: /*@C
464:   PetscLogStagePop - This function pops a stage from the stack.

466:   Not Collective

468:   Usage:
469:   If the option -log_sumary is used to run the program containing the
470:   following code, then 2 sets of summary data will be printed during
471:   PetscFinalize().
472: .vb
473:       PetscInitialize(int *argc,char ***args,0,0);
474:       [stage 0 of code]
475:       PetscLogStagePush(1);
476:       [stage 1 of code]
477:       PetscLogStagePop();
478:       PetscBarrier(...);
479:       [more stage 0 of code]
480:       PetscFinalize();
481: .ve

483:   Notes:
484:   Use PetscLogStageRegister() to register a stage.

486:   Level: intermediate

488: .seealso: PetscLogStagePush(), PetscLogStageRegister(), PetscBarrier()
489: @*/
490: PetscErrorCode  PetscLogStagePop(void)
491: {
492:   PetscStageLog  stageLog;

496:   PetscLogGetStageLog(&stageLog);
497:   PetscStageLogPop(stageLog);
498:   return(0);
499: }

501: /*@
502:   PetscLogStageSetActive - Determines stage activity for PetscLogEventBegin() and PetscLogEventEnd().

504:   Not Collective

506:   Input Parameters:
507: + stage    - The stage
508: - isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)

510:   Level: intermediate

512: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
513: @*/
514: PetscErrorCode  PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive)
515: {
516:   PetscStageLog  stageLog;

520:   PetscLogGetStageLog(&stageLog);
521:   PetscStageLogSetActive(stageLog, stage, isActive);
522:   return(0);
523: }

525: /*@
526:   PetscLogStageGetActive - Returns stage activity for PetscLogEventBegin() and PetscLogEventEnd().

528:   Not Collective

530:   Input Parameter:
531: . stage    - The stage

533:   Output Parameter:
534: . isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)

536:   Level: intermediate

538: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
539: @*/
540: PetscErrorCode  PetscLogStageGetActive(PetscLogStage stage, PetscBool  *isActive)
541: {
542:   PetscStageLog  stageLog;

546:   PetscLogGetStageLog(&stageLog);
547:   PetscStageLogGetActive(stageLog, stage, isActive);
548:   return(0);
549: }

551: /*@
552:   PetscLogStageSetVisible - Determines stage visibility in PetscLogView()

554:   Not Collective

556:   Input Parameters:
557: + stage     - The stage
558: - isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)

560:   Level: intermediate

562: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogView()
563: @*/
564: PetscErrorCode  PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible)
565: {
566:   PetscStageLog  stageLog;

570:   PetscLogGetStageLog(&stageLog);
571:   PetscStageLogSetVisible(stageLog, stage, isVisible);
572:   return(0);
573: }

575: /*@
576:   PetscLogStageGetVisible - Returns stage visibility in PetscLogView()

578:   Not Collective

580:   Input Parameter:
581: . stage     - The stage

583:   Output Parameter:
584: . isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)

586:   Level: intermediate

588: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogView()
589: @*/
590: PetscErrorCode  PetscLogStageGetVisible(PetscLogStage stage, PetscBool  *isVisible)
591: {
592:   PetscStageLog  stageLog;

596:   PetscLogGetStageLog(&stageLog);
597:   PetscStageLogGetVisible(stageLog, stage, isVisible);
598:   return(0);
599: }

601: /*@C
602:   PetscLogStageGetId - Returns the stage id when given the stage name.

604:   Not Collective

606:   Input Parameter:
607: . name  - The stage name

609:   Output Parameter:
610: . stage - The stage, , or -1 if no stage with that name exists

612:   Level: intermediate

614: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
615: @*/
616: PetscErrorCode  PetscLogStageGetId(const char name[], PetscLogStage *stage)
617: {
618:   PetscStageLog  stageLog;

622:   PetscLogGetStageLog(&stageLog);
623:   PetscStageLogGetStage(stageLog, name, stage);
624:   return(0);
625: }

627: /*------------------------------------------------ Event Functions --------------------------------------------------*/
628: /*@C
629:   PetscLogEventRegister - Registers an event name for logging operations in an application code.

631:   Not Collective

633:   Input Parameter:
634: + name   - The name associated with the event
635: - classid - The classid associated to the class for this event, obtain either with
636:            PetscClassIdRegister() or use a predefined one such as KSP_CLASSID, SNES_CLASSID, the predefined ones
637:            are only available in C code

639:   Output Parameter:
640: . event - The event id for use with PetscLogEventBegin() and PetscLogEventEnd().

642:   Example of Usage:
643: .vb
644:       PetscLogEvent USER_EVENT;
645:       PetscClassId classid;
646:       PetscLogDouble user_event_flops;
647:       PetscClassIdRegister("class name",&classid);
648:       PetscLogEventRegister("User event name",classid,&USER_EVENT);
649:       PetscLogEventBegin(USER_EVENT,0,0,0,0);
650:          [code segment to monitor]
651:          PetscLogFlops(user_event_flops);
652:       PetscLogEventEnd(USER_EVENT,0,0,0,0);
653: .ve

655:   Notes:
656:   PETSc automatically logs library events if the code has been
657:   configured with --with-log (which is the default) and
658:   -log_view or -log_all is specified.  PetscLogEventRegister() is
659:   intended for logging user events to supplement this PETSc
660:   information.

662:   PETSc can gather data for use with the utilities Jumpshot
663:   (part of the MPICH distribution).  If PETSc has been compiled
664:   with flag -DPETSC_HAVE_MPE (MPE is an additional utility within
665:   MPICH), the user can employ another command line option, -log_mpe,
666:   to create a logfile, "mpe.log", which can be visualized
667:   Jumpshot.

669:   The classid is associated with each event so that classes of events
670:   can be disabled simultaneously, such as all matrix events. The user
671:   can either use an existing classid, such as MAT_CLASSID, or create
672:   their own as shown in the example.

674:   If an existing event with the same name exists, its event handle is
675:   returned instead of creating a new event.

677:   Level: intermediate

679: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogFlops(),
680:           PetscLogEventActivate(), PetscLogEventDeactivate(), PetscClassIdRegister()
681: @*/
682: PetscErrorCode  PetscLogEventRegister(const char name[],PetscClassId classid,PetscLogEvent *event)
683: {
684:   PetscStageLog  stageLog;
685:   int            stage;

689:   *event = PETSC_DECIDE;
690:   PetscLogGetStageLog(&stageLog);
691:   PetscEventRegLogGetEvent(stageLog->eventLog, name, event);
692:   if (*event > 0) return(0);
693:   PetscEventRegLogRegister(stageLog->eventLog, name, classid, event);
694:   for (stage = 0; stage < stageLog->numStages; stage++) {
695:     PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents);
696:     PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
697:   }
698:   return(0);
699: }

701: /*@
702:   PetscLogEventSetCollective - Indicates that a particular event is collective.

704:   Not Collective

706:   Input Parameter:
707: + event - The event id
708: - collective - Bolean flag indicating whether a particular event is collective

710:   Note:
711:   New events returned from PetscLogEventRegister() are collective by default.

713:   Level: developer

715: .seealso: PetscLogEventRegister()
716: @*/
717: PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event,PetscBool collective)
718: {
719:   PetscStageLog    stageLog;
720:   PetscEventRegLog eventRegLog;
721:   PetscErrorCode   ierr;

724:   PetscLogGetStageLog(&stageLog);
725:   PetscStageLogGetEventRegLog(stageLog,&eventRegLog);
726:   if (event < 0 || event > eventRegLog->numEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid event id");
727:   eventRegLog->eventInfo[event].collective = collective;
728:   return(0);
729: }

731: /*@
732:   PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage.

734:   Not Collective

736:   Input Parameter:
737: . classid - The object class, for example MAT_CLASSID, SNES_CLASSID, etc.

739:   Level: developer

741: .seealso: PetscLogEventActivateClass(),PetscLogEventDeactivateClass(),PetscLogEventActivate(),PetscLogEventDeactivate()
742: @*/
743: PetscErrorCode  PetscLogEventIncludeClass(PetscClassId classid)
744: {
745:   PetscStageLog  stageLog;
746:   int            stage;

750:   PetscLogGetStageLog(&stageLog);
751:   for (stage = 0; stage < stageLog->numStages; stage++) {
752:     PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
753:   }
754:   return(0);
755: }

757: /*@
758:   PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage.

760:   Not Collective

762:   Input Parameter:
763: . classid - The object class, for example MAT_CLASSID, SNES_CLASSID, etc.

765:   Level: developer

767: .seealso: PetscLogEventDeactivateClass(),PetscLogEventActivateClass(),PetscLogEventDeactivate(),PetscLogEventActivate()
768: @*/
769: PetscErrorCode  PetscLogEventExcludeClass(PetscClassId classid)
770: {
771:   PetscStageLog  stageLog;
772:   int            stage;

776:   PetscLogGetStageLog(&stageLog);
777:   for (stage = 0; stage < stageLog->numStages; stage++) {
778:     PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
779:   }
780:   return(0);
781: }

783: /*@
784:   PetscLogEventActivate - Indicates that a particular event should be logged.

786:   Not Collective

788:   Input Parameter:
789: . event - The event id

791:   Usage:
792: .vb
793:       PetscLogEventDeactivate(VEC_SetValues);
794:         [code where you do not want to log VecSetValues()]
795:       PetscLogEventActivate(VEC_SetValues);
796:         [code where you do want to log VecSetValues()]
797: .ve

799:   Note:
800:   The event may be either a pre-defined PETSc event (found in include/petsclog.h)
801:   or an event number obtained with PetscLogEventRegister().

803:   Level: advanced

805: .seealso: PlogEventDeactivate(), PlogEventDeactivatePush(), PetscLogEventDeactivatePop()
806: @*/
807: PetscErrorCode  PetscLogEventActivate(PetscLogEvent event)
808: {
809:   PetscStageLog  stageLog;
810:   int            stage;

814:   PetscLogGetStageLog(&stageLog);
815:   PetscStageLogGetCurrent(stageLog, &stage);
816:   PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
817:   return(0);
818: }

820: /*@
821:   PetscLogEventDeactivate - Indicates that a particular event should not be logged.

823:   Not Collective

825:   Input Parameter:
826: . event - The event id

828:   Usage:
829: .vb
830:       PetscLogEventDeactivate(VEC_SetValues);
831:         [code where you do not want to log VecSetValues()]
832:       PetscLogEventActivate(VEC_SetValues);
833:         [code where you do want to log VecSetValues()]
834: .ve

836:   Note:
837:   The event may be either a pre-defined PETSc event (found in
838:   include/petsclog.h) or an event number obtained with PetscLogEventRegister()).

840:   Level: advanced

842: .seealso: PetscLogEventActivate(), PetscLogEventDeactivatePush(), PetscLogEventDeactivatePop()
843: @*/
844: PetscErrorCode  PetscLogEventDeactivate(PetscLogEvent event)
845: {
846:   PetscStageLog  stageLog;
847:   int            stage;

851:   PetscLogGetStageLog(&stageLog);
852:   PetscStageLogGetCurrent(stageLog, &stage);
853:   PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
854:   return(0);
855: }

857: /*@
858:   PetscLogEventDeactivatePush - Indicates that a particular event should not be logged.

860:   Not Collective

862:   Input Parameter:
863: . event - The event id

865:   Usage:
866: .vb
867:       PetscLogEventDeactivatePush(VEC_SetValues);
868:         [code where you do not want to log VecSetValues()]
869:       PetscLogEventDeactivatePop(VEC_SetValues);
870:         [code where you do want to log VecSetValues()]
871: .ve

873:   Note:
874:   The event may be either a pre-defined PETSc event (found in
875:   include/petsclog.h) or an event number obtained with PetscLogEventRegister()).

877:   Level: advanced

879: .seealso: PetscLogEventActivate(), PetscLogEventDeactivatePop()
880: @*/
881: PetscErrorCode  PetscLogEventDeactivatePush(PetscLogEvent event)
882: {
883:   PetscStageLog  stageLog;
884:   int            stage;

888:   PetscLogGetStageLog(&stageLog);
889:   PetscStageLogGetCurrent(stageLog, &stage);
890:   PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event);
891:   return(0);
892: }

894: /*@
895:   PetscLogEventDeactivatePop - Indicates that a particular event shouldbe logged.

897:   Not Collective

899:   Input Parameter:
900: . event - The event id

902:   Usage:
903: .vb
904:       PetscLogEventDeactivatePush(VEC_SetValues);
905:         [code where you do not want to log VecSetValues()]
906:       PetscLogEventDeactivatePop(VEC_SetValues);
907:         [code where you do want to log VecSetValues()]
908: .ve

910:   Note:
911:   The event may be either a pre-defined PETSc event (found in
912:   include/petsclog.h) or an event number obtained with PetscLogEventRegister()).

914:   Level: advanced

916: .seealso: PetscLogEventActivate(), PetscLogEventDeactivatePush()
917: @*/
918: PetscErrorCode  PetscLogEventDeactivatePop(PetscLogEvent event)
919: {
920:   PetscStageLog  stageLog;
921:   int            stage;

925:   PetscLogGetStageLog(&stageLog);
926:   PetscStageLogGetCurrent(stageLog, &stage);
927:   PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event);
928:   return(0);
929: }

931: /*@
932:   PetscLogEventSetActiveAll - Sets the event activity in every stage.

934:   Not Collective

936:   Input Parameters:
937: + event    - The event id
938: - isActive - The activity flag determining whether the event is logged

940:   Level: advanced

942: .seealso: PlogEventActivate(),PlogEventDeactivate()
943: @*/
944: PetscErrorCode  PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive)
945: {
946:   PetscStageLog  stageLog;
947:   int            stage;

951:   PetscLogGetStageLog(&stageLog);
952:   for (stage = 0; stage < stageLog->numStages; stage++) {
953:     if (isActive) {
954:       PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
955:     } else {
956:       PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
957:     }
958:   }
959:   return(0);
960: }

962: /*@
963:   PetscLogEventActivateClass - Activates event logging for a PETSc object class.

965:   Not Collective

967:   Input Parameter:
968: . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc.

970:   Level: developer

972: .seealso: PetscLogEventDeactivateClass(),PetscLogEventActivate(),PetscLogEventDeactivate()
973: @*/
974: PetscErrorCode  PetscLogEventActivateClass(PetscClassId classid)
975: {
976:   PetscStageLog  stageLog;
977:   int            stage;

981:   PetscLogGetStageLog(&stageLog);
982:   PetscStageLogGetCurrent(stageLog, &stage);
983:   PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
984:   return(0);
985: }

987: /*@
988:   PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class.

990:   Not Collective

992:   Input Parameter:
993: . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc.

995:   Level: developer

997: .seealso: PetscLogEventActivateClass(),PetscLogEventActivate(),PetscLogEventDeactivate()
998: @*/
999: PetscErrorCode  PetscLogEventDeactivateClass(PetscClassId classid)
1000: {
1001:   PetscStageLog  stageLog;
1002:   int            stage;

1006:   PetscLogGetStageLog(&stageLog);
1007:   PetscStageLogGetCurrent(stageLog, &stage);
1008:   PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
1009:   return(0);
1010: }

1012: /*MC
1013:    PetscLogEventSync - Synchronizes the beginning of a user event.

1015:    Synopsis:
1016: #include <petsclog.h>
1017:    PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm)

1019:    Collective

1021:    Input Parameters:
1022: +  e - integer associated with the event obtained from PetscLogEventRegister()
1023: -  comm - an MPI communicator

1025:    Usage:
1026: .vb
1027:      PetscLogEvent USER_EVENT;
1028:      PetscLogEventRegister("User event",0,&USER_EVENT);
1029:      PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD);
1030:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
1031:         [code segment to monitor]
1032:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
1033: .ve

1035:    Notes:
1036:    This routine should be called only if there is not a
1037:    PetscObject available to pass to PetscLogEventBegin().

1039:    Level: developer

1041: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd()

1043: M*/

1045: /*MC
1046:    PetscLogEventBegin - Logs the beginning of a user event.

1048:    Synopsis:
1049: #include <petsclog.h>
1050:    PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)

1052:    Not Collective

1054:    Input Parameters:
1055: +  e - integer associated with the event obtained from PetscLogEventRegister()
1056: -  o1,o2,o3,o4 - objects associated with the event, or 0


1059:    Fortran Synopsis:
1060:    void PetscLogEventBegin(int e,PetscErrorCode ierr)

1062:    Usage:
1063: .vb
1064:      PetscLogEvent USER_EVENT;
1065:      PetscLogDouble user_event_flops;
1066:      PetscLogEventRegister("User event",0,&USER_EVENT);
1067:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
1068:         [code segment to monitor]
1069:         PetscLogFlops(user_event_flops);
1070:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
1071: .ve

1073:    Notes:
1074:    You need to register each integer event with the command
1075:    PetscLogEventRegister().

1077:    Level: intermediate

1079: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops()

1081: M*/

1083: /*MC
1084:    PetscLogEventEnd - Log the end of a user event.

1086:    Synopsis:
1087: #include <petsclog.h>
1088:    PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)

1090:    Not Collective

1092:    Input Parameters:
1093: +  e - integer associated with the event obtained with PetscLogEventRegister()
1094: -  o1,o2,o3,o4 - objects associated with the event, or 0


1097:    Fortran Synopsis:
1098:    void PetscLogEventEnd(int e,PetscErrorCode ierr)

1100:    Usage:
1101: .vb
1102:      PetscLogEvent USER_EVENT;
1103:      PetscLogDouble user_event_flops;
1104:      PetscLogEventRegister("User event",0,&USER_EVENT,);
1105:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
1106:         [code segment to monitor]
1107:         PetscLogFlops(user_event_flops);
1108:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
1109: .ve

1111:    Notes:
1112:    You should also register each additional integer event with the command
1113:    PetscLogEventRegister().

1115:    Level: intermediate

1117: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogFlops()

1119: M*/

1121: /*@C
1122:   PetscLogEventGetId - Returns the event id when given the event name.

1124:   Not Collective

1126:   Input Parameter:
1127: . name  - The event name

1129:   Output Parameter:
1130: . event - The event, or -1 if no event with that name exists

1132:   Level: intermediate

1134: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogStageGetId()
1135: @*/
1136: PetscErrorCode  PetscLogEventGetId(const char name[], PetscLogEvent *event)
1137: {
1138:   PetscStageLog  stageLog;

1142:   PetscLogGetStageLog(&stageLog);
1143:   PetscEventRegLogGetEvent(stageLog->eventLog, name, event);
1144:   return(0);
1145: }


1148: /*------------------------------------------------ Output Functions -------------------------------------------------*/
1149: /*@C
1150:   PetscLogDump - Dumps logs of objects to a file. This file is intended to
1151:   be read by bin/petscview. This program no longer exists.

1153:   Collective on PETSC_COMM_WORLD

1155:   Input Parameter:
1156: . name - an optional file name

1158:   Usage:
1159: .vb
1160:      PetscInitialize(...);
1161:      PetscLogDefaultBegin(); or PetscLogAllBegin();
1162:      ... code ...
1163:      PetscLogDump(filename);
1164:      PetscFinalize();
1165: .ve

1167:   Notes:
1168:   The default file name is
1169: $    Log.<rank>
1170:   where <rank> is the processor number. If no name is specified,
1171:   this file will be used.

1173:   Level: advanced

1175: .seealso: PetscLogDefaultBegin(), PetscLogAllBegin(), PetscLogView()
1176: @*/
1177: PetscErrorCode  PetscLogDump(const char sname[])
1178: {
1179:   PetscStageLog      stageLog;
1180:   PetscEventPerfInfo *eventInfo;
1181:   FILE               *fd;
1182:   char               file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
1183:   PetscLogDouble     flops, _TotalTime;
1184:   PetscMPIInt        rank;
1185:   int                action, object, curStage;
1186:   PetscLogEvent      event;
1187:   PetscErrorCode     ierr;

1190:   /* Calculate the total elapsed time */
1191:   PetscTime(&_TotalTime);
1192:   _TotalTime -= petsc_BaseTime;
1193:   /* Open log file */
1194:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
1195:   if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank);
1196:   else sprintf(file, "Log.%d", rank);
1197:   PetscFixFilename(file, fname);
1198:   PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd);
1199:   if ((!rank) && (!fd)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname);
1200:   /* Output totals */
1201:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime);
1202:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0);
1203:   /* Output actions */
1204:   if (petsc_logActions) {
1205:     PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions);
1206:     for (action = 0; action < petsc_numActions; action++) {
1207:       PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n",
1208:                           petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1,
1209:                           petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem);
1210:     }
1211:   }
1212:   /* Output objects */
1213:   if (petsc_logObjects) {
1214:     PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed);
1215:     for (object = 0; object < petsc_numObjects; object++) {
1216:       PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int) petsc_objects[object].mem);
1217:       if (!petsc_objects[object].name[0]) {
1218:         PetscFPrintf(PETSC_COMM_WORLD, fd,"No Name\n");
1219:       } else {
1220:         PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name);
1221:       }
1222:       if (petsc_objects[object].info[0] != 0) {
1223:         PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n");
1224:       } else {
1225:         PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info);
1226:       }
1227:     }
1228:   }
1229:   /* Output events */
1230:   PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n");
1231:   PetscLogGetStageLog(&stageLog);
1232:   PetscIntStackTop(stageLog->stack, &curStage);
1233:   eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo;
1234:   for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) {
1235:     if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops/eventInfo[event].time;
1236:     else flops = 0.0;
1237:     PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count,
1238:                         eventInfo[event].flops, eventInfo[event].time, flops);
1239:   }
1240:   PetscFClose(PETSC_COMM_WORLD, fd);
1241:   return(0);
1242: }

1244: /*
1245:   PetscLogView_Detailed - Each process prints the times for its own events

1247: */
1248: PetscErrorCode  PetscLogView_Detailed(PetscViewer viewer)
1249: {
1250:   PetscStageLog      stageLog;
1251:   PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL;
1252:   PetscLogDouble     locTotalTime, numRed, maxMem;
1253:   int                numStages,numEvents,stage,event;
1254:   MPI_Comm           comm = PetscObjectComm((PetscObject) viewer);
1255:   PetscMPIInt        rank,size;
1256:   PetscErrorCode     ierr;

1259:   MPI_Comm_size(comm, &size);
1260:   MPI_Comm_rank(comm, &rank);
1261:   /* Must preserve reduction count before we go on */
1262:   numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1263:   /* Get the total elapsed time */
1264:   PetscTime(&locTotalTime);  locTotalTime -= petsc_BaseTime;
1265:   PetscViewerASCIIPrintf(viewer,"size = %d\n",size);
1266:   PetscViewerASCIIPrintf(viewer,"LocalTimes = {}\n");
1267:   PetscViewerASCIIPrintf(viewer,"LocalMessages = {}\n");
1268:   PetscViewerASCIIPrintf(viewer,"LocalMessageLens = {}\n");
1269:   PetscViewerASCIIPrintf(viewer,"LocalReductions = {}\n");
1270:   PetscViewerASCIIPrintf(viewer,"LocalFlop = {}\n");
1271:   PetscViewerASCIIPrintf(viewer,"LocalObjects = {}\n");
1272:   PetscViewerASCIIPrintf(viewer,"LocalMemory = {}\n");
1273:   PetscLogGetStageLog(&stageLog);
1274:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1275:   PetscViewerASCIIPrintf(viewer,"Stages = {}\n");
1276:   for (stage=0; stage<numStages; stage++) {
1277:     PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"] = {}\n",stageLog->stageInfo[stage].name);
1278:     PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"][\"summary\"] = {}\n",stageLog->stageInfo[stage].name);
1279:     MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1280:     for (event = 0; event < numEvents; event++) {
1281:       PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"][\"%s\"] = {}\n",stageLog->stageInfo[stage].name,stageLog->eventLog->eventInfo[event].name);
1282:     }
1283:   }
1284:   PetscMallocGetMaximumUsage(&maxMem);
1285:   PetscViewerASCIIPushSynchronized(viewer);
1286:   PetscViewerASCIISynchronizedPrintf(viewer,"LocalTimes[%d] = %g\n",rank,locTotalTime);
1287:   PetscViewerASCIISynchronizedPrintf(viewer,"LocalMessages[%d] = %g\n",rank,(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct));
1288:   PetscViewerASCIISynchronizedPrintf(viewer,"LocalMessageLens[%d] = %g\n",rank,(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len));
1289:   PetscViewerASCIISynchronizedPrintf(viewer,"LocalReductions[%d] = %g\n",rank,numRed);
1290:   PetscViewerASCIISynchronizedPrintf(viewer,"LocalFlop[%d] = %g\n",rank,petsc_TotalFlops);
1291:   PetscViewerASCIISynchronizedPrintf(viewer,"LocalObjects[%d] = %d\n",rank,petsc_numObjects);
1292:   PetscViewerASCIISynchronizedPrintf(viewer,"LocalMemory[%d] = %g\n",rank,maxMem);
1293:   PetscViewerFlush(viewer);
1294:   for (stage=0; stage<numStages; stage++) {
1295:     stageInfo = &stageLog->stageInfo[stage].perfInfo;
1296:     PetscViewerASCIISynchronizedPrintf(viewer,"Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n",
1297:                                               stageLog->stageInfo[stage].name,rank,
1298:                                               stageInfo->time,stageInfo->numMessages,stageInfo->messageLength,stageInfo->numReductions,stageInfo->flops);
1299:     MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1300:     for (event = 0; event < numEvents; event++) {
1301:       eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1302:       PetscViewerASCIISynchronizedPrintf(viewer,"Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %D, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g",
1303:                                                 stageLog->stageInfo[stage].name,stageLog->eventLog->eventInfo[event].name,rank,
1304:                                                 eventInfo->count,eventInfo->time,eventInfo->syncTime,eventInfo->numMessages,eventInfo->messageLength,eventInfo->numReductions,eventInfo->flops);
1305:       if (eventInfo->dof[0] >= 0.) {
1306:         PetscInt d, e;

1308:         PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [");
1309:         for (d = 0; d < 8; ++d) {
1310:           if (d > 0) {PetscViewerASCIISynchronizedPrintf(viewer, ", ");}
1311:           PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d]);
1312:         }
1313:         PetscViewerASCIISynchronizedPrintf(viewer, "]");
1314:         PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [");
1315:         for (e = 0; e < 8; ++e) {
1316:           if (e > 0) {PetscViewerASCIISynchronizedPrintf(viewer, ", ");}
1317:           PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e]);
1318:         }
1319:         PetscViewerASCIISynchronizedPrintf(viewer, "]");
1320:       }
1321:       PetscViewerASCIISynchronizedPrintf(viewer,"}\n");
1322:     }
1323:   }
1324:   PetscViewerFlush(viewer);
1325:   PetscViewerASCIIPopSynchronized(viewer);
1326:   return(0);
1327: }

1329: /*
1330:   PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format
1331: */
1332: PetscErrorCode  PetscLogView_CSV(PetscViewer viewer)
1333: {
1334:   PetscStageLog      stageLog;
1335:   PetscEventPerfInfo *eventInfo = NULL;
1336:   PetscLogDouble     locTotalTime, maxMem;
1337:   int                numStages,numEvents,stage,event;
1338:   MPI_Comm           comm = PetscObjectComm((PetscObject) viewer);
1339:   PetscMPIInt        rank,size;
1340:   PetscErrorCode     ierr;

1343:   MPI_Comm_size(comm, &size);
1344:   MPI_Comm_rank(comm, &rank);
1345:   /* Must preserve reduction count before we go on */
1346:   /* Get the total elapsed time */
1347:   PetscTime(&locTotalTime);  locTotalTime -= petsc_BaseTime;
1348:   PetscLogGetStageLog(&stageLog);
1349:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1350:   PetscMallocGetMaximumUsage(&maxMem);
1351:   PetscViewerASCIIPushSynchronized(viewer);
1352:   PetscViewerASCIIPrintf(viewer,"Stage Name,Event Name,Rank,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size);
1353:   PetscViewerFlush(viewer);
1354:   for (stage=0; stage<numStages; stage++) {
1355:     PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo;

1357:     PetscViewerASCIISynchronizedPrintf(viewer,"%s,summary,%d,%g,%g,%g,%g,%g\n",
1358:                                               stageLog->stageInfo[stage].name,rank,stageInfo->time,stageInfo->numMessages,stageInfo->messageLength,stageInfo->numReductions,stageInfo->flops);
1359:     MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1360:     for (event = 0; event < numEvents; event++) {
1361:       eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1362:       PetscViewerASCIISynchronizedPrintf(viewer,"%s,%s,%d,%g,%g,%g,%g,%g",stageLog->stageInfo[stage].name,
1363:                                                 stageLog->eventLog->eventInfo[event].name,rank,eventInfo->time,eventInfo->numMessages,
1364:                                                 eventInfo->messageLength,eventInfo->numReductions,eventInfo->flops);
1365:       if (eventInfo->dof[0] >= 0.) {
1366:         PetscInt d, e;

1368:         for (d = 0; d < 8; ++d) {
1369:           PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d]);
1370:         }
1371:         for (e = 0; e < 8; ++e) {
1372:           PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e]);
1373:         }
1374:       }
1375:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
1376:     }
1377:   }
1378:   PetscViewerFlush(viewer);
1379:   PetscViewerASCIIPopSynchronized(viewer);
1380:   return(0);
1381: }

1383: static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm,FILE *fd)
1384: {
1387:   if (!PetscLogSyncOn) return(0);
1388:   PetscFPrintf(comm, fd, "\n\n");
1389:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1390:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1391:   PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1392:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1393:   PetscFPrintf(comm, fd, "      #   This program was run with logging synchronization.   #\n");
1394:   PetscFPrintf(comm, fd, "      #   This option provides more meaningful imbalance       #\n");
1395:   PetscFPrintf(comm, fd, "      #   figures at the expense of slowing things down and    #\n");
1396:   PetscFPrintf(comm, fd, "      #   providing a distorted view of the overall runtime.   #\n");
1397:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1398:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1399:   return(0);
1400: }

1402: static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm,FILE *fd)
1403: {

1407:   if (PetscDefined(USE_DEBUG)) {
1408:     PetscFPrintf(comm, fd, "\n\n");
1409:     PetscFPrintf(comm, fd, "      ##########################################################\n");
1410:     PetscFPrintf(comm, fd, "      #                                                        #\n");
1411:     PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1412:     PetscFPrintf(comm, fd, "      #                                                        #\n");
1413:     PetscFPrintf(comm, fd, "      #   This code was compiled with a debugging option.      #\n");
1414:     PetscFPrintf(comm, fd, "      #   To get timing results run ./configure                #\n");
1415:     PetscFPrintf(comm, fd, "      #   using --with-debugging=no, the performance will      #\n");
1416:     PetscFPrintf(comm, fd, "      #   be generally two or three times faster.              #\n");
1417:     PetscFPrintf(comm, fd, "      #                                                        #\n");
1418:     PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1419:   }
1420:   return(0);
1421: }

1423: static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm,FILE *fd)
1424: {
1425: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)

1429:   if (use_gpu_aware_mpi || !PetscCreatedGpuObjects) return(0);
1430:   PetscFPrintf(comm, fd, "\n\n");
1431:   PetscFPrintf(comm, fd, "      ##########################################################\n");
1432:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1433:   PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n");
1434:   PetscFPrintf(comm, fd, "      #                                                        #\n");
1435:   PetscFPrintf(comm, fd, "      # This code was compiled with GPU support and you've     #\n");
1436:   PetscFPrintf(comm, fd, "      # created PETSc/GPU objects, but you intentionally used  #\n");
1437:   PetscFPrintf(comm, fd, "      # -use_gpu_aware_mpi 0, such that PETSc had to copy data #\n");
1438:   PetscFPrintf(comm, fd, "      # from GPU to CPU for communication. To get meaningfull  #\n");
1439:   PetscFPrintf(comm, fd, "      # timing results, please use GPU-aware MPI instead.      #\n");
1440:   PetscFPrintf(comm, fd, "      ##########################################################\n\n\n");
1441:   return(0);
1442: #else
1443:   return 0;
1444: #endif
1445: }

1447: #if defined(PETSC_HAVE_OPENMP)
1448: extern PetscInt PetscNumOMPThreads;
1449: #endif

1451: PetscErrorCode  PetscLogView_Default(PetscViewer viewer)
1452: {
1453:   FILE               *fd;
1454:   PetscLogDouble     zero       = 0.0;
1455:   PetscStageLog      stageLog;
1456:   PetscStageInfo     *stageInfo = NULL;
1457:   PetscEventPerfInfo *eventInfo = NULL;
1458:   PetscClassPerfInfo *classInfo;
1459:   char               arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
1460:   const char         *name;
1461:   PetscLogDouble     locTotalTime, TotalTime, TotalFlops;
1462:   PetscLogDouble     numMessages, messageLength, avgMessLen, numReductions;
1463:   PetscLogDouble     stageTime, flops, flopr, mem, mess, messLen, red;
1464:   PetscLogDouble     fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1465:   PetscLogDouble     fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1466:   PetscLogDouble     min, max, tot, ratio, avg, x, y;
1467:   PetscLogDouble     minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax;
1468:   #if defined(PETSC_HAVE_DEVICE)
1469:   PetscLogDouble     cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops;
1470:   #endif
1471:   PetscMPIInt        minC, maxC;
1472:   PetscMPIInt        size, rank;
1473:   PetscBool          *localStageUsed,    *stageUsed;
1474:   PetscBool          *localStageVisible, *stageVisible;
1475:   int                numStages, localNumEvents, numEvents;
1476:   int                stage, oclass;
1477:   PetscLogEvent      event;
1478:   PetscErrorCode     ierr;
1479:   char               version[256];
1480:   MPI_Comm           comm;

1483:   PetscObjectGetComm((PetscObject)viewer,&comm);
1484:   PetscViewerASCIIGetPointer(viewer,&fd);
1485:   MPI_Comm_size(comm, &size);
1486:   MPI_Comm_rank(comm, &rank);
1487:   /* Get the total elapsed time */
1488:   PetscTime(&locTotalTime);  locTotalTime -= petsc_BaseTime;

1490:   PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1491:   PetscFPrintf(comm, fd, "***             WIDEN YOUR WINDOW TO 120 CHARACTERS.  Use 'enscript -r -fCourier9' to print this document            ***\n");
1492:   PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1493:   PetscFPrintf(comm, fd, "\n---------------------------------------------- PETSc Performance Summary: ----------------------------------------------\n\n");
1494:   PetscLogViewWarnSync(comm,fd);
1495:   PetscLogViewWarnDebugging(comm,fd);
1496:   PetscLogViewWarnNoGpuAwareMpi(comm,fd);
1497:   PetscGetArchType(arch,sizeof(arch));
1498:   PetscGetHostName(hostname,sizeof(hostname));
1499:   PetscGetUserName(username,sizeof(username));
1500:   PetscGetProgramName(pname,sizeof(pname));
1501:   PetscGetDate(date,sizeof(date));
1502:   PetscGetVersion(version,sizeof(version));
1503:   if (size == 1) {
1504:     PetscFPrintf(comm,fd,"%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date);
1505:   } else {
1506:     PetscFPrintf(comm,fd,"%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date);
1507:   }
1508: #if defined(PETSC_HAVE_OPENMP)
1509:   PetscFPrintf(comm,fd,"Using %D OpenMP threads\n", PetscNumOMPThreads);
1510: #endif
1511:   PetscFPrintf(comm, fd, "Using %s\n", version);

1513:   /* Must preserve reduction count before we go on */
1514:   red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;

1516:   /* Calculate summary information */
1517:   PetscFPrintf(comm, fd, "\n                         Max       Max/Min     Avg       Total\n");
1518:   /*   Time */
1519:   MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1520:   MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1521:   MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1522:   avg  = tot/((PetscLogDouble) size);
1523:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1524:   PetscFPrintf(comm, fd, "Time (sec):           %5.3e   %7.3f   %5.3e\n", max, ratio, avg);
1525:   TotalTime = tot;
1526:   /*   Objects */
1527:   avg  = (PetscLogDouble) petsc_numObjects;
1528:   MPI_Allreduce(&avg,          &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1529:   MPI_Allreduce(&avg,          &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1530:   MPI_Allreduce(&avg,          &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1531:   avg  = tot/((PetscLogDouble) size);
1532:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1533:   PetscFPrintf(comm, fd, "Objects:              %5.3e   %7.3f   %5.3e\n", max, ratio, avg);
1534:   /*   Flops */
1535:   MPI_Allreduce(&petsc_TotalFlops,  &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1536:   MPI_Allreduce(&petsc_TotalFlops,  &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1537:   MPI_Allreduce(&petsc_TotalFlops,  &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1538:   avg  = tot/((PetscLogDouble) size);
1539:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1540:   PetscFPrintf(comm, fd, "Flop:                 %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1541:   TotalFlops = tot;
1542:   /*   Flops/sec -- Must talk to Barry here */
1543:   if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime; else flops = 0.0;
1544:   MPI_Allreduce(&flops,        &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1545:   MPI_Allreduce(&flops,        &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1546:   MPI_Allreduce(&flops,        &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1547:   avg  = tot/((PetscLogDouble) size);
1548:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1549:   PetscFPrintf(comm, fd, "Flop/sec:             %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1550:   /*   Memory */
1551:   PetscMallocGetMaximumUsage(&mem);
1552:   if (mem > 0.0) {
1553:     MPI_Allreduce(&mem,          &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1554:     MPI_Allreduce(&mem,          &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1555:     MPI_Allreduce(&mem,          &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1556:     avg  = tot/((PetscLogDouble) size);
1557:     if (min != 0.0) ratio = max/min; else ratio = 0.0;
1558:     PetscFPrintf(comm, fd, "Memory:               %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1559:   }
1560:   /*   Messages */
1561:   mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1562:   MPI_Allreduce(&mess,         &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1563:   MPI_Allreduce(&mess,         &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1564:   MPI_Allreduce(&mess,         &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1565:   avg  = tot/((PetscLogDouble) size);
1566:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1567:   PetscFPrintf(comm, fd, "MPI Messages:         %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1568:   numMessages = tot;
1569:   /*   Message Lengths */
1570:   mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1571:   MPI_Allreduce(&mess,         &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1572:   MPI_Allreduce(&mess,         &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1573:   MPI_Allreduce(&mess,         &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1574:   if (numMessages != 0) avg = tot/numMessages; else avg = 0.0;
1575:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1576:   PetscFPrintf(comm, fd, "MPI Message Lengths:  %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot);
1577:   messageLength = tot;
1578:   /*   Reductions */
1579:   MPI_Allreduce(&red,          &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1580:   MPI_Allreduce(&red,          &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1581:   MPI_Allreduce(&red,          &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1582:   if (min != 0.0) ratio = max/min; else ratio = 0.0;
1583:   PetscFPrintf(comm, fd, "MPI Reductions:       %5.3e   %7.3f\n", max, ratio);
1584:   numReductions = red; /* wrong because uses count from process zero */
1585:   PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n");
1586:   PetscFPrintf(comm, fd, "                            e.g., VecAXPY() for real vectors of length N --> 2N flop\n");
1587:   PetscFPrintf(comm, fd, "                            and VecAXPY() for complex vectors of length N --> 8N flop\n");

1589:   /* Get total number of stages --
1590:        Currently, a single processor can register more stages than another, but stages must all be registered in order.
1591:        We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
1592:        This seems best accomplished by assoicating a communicator with each stage.
1593:   */
1594:   PetscLogGetStageLog(&stageLog);
1595:   MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1596:   PetscMalloc1(numStages, &localStageUsed);
1597:   PetscMalloc1(numStages, &stageUsed);
1598:   PetscMalloc1(numStages, &localStageVisible);
1599:   PetscMalloc1(numStages, &stageVisible);
1600:   if (numStages > 0) {
1601:     stageInfo = stageLog->stageInfo;
1602:     for (stage = 0; stage < numStages; stage++) {
1603:       if (stage < stageLog->numStages) {
1604:         localStageUsed[stage]    = stageInfo[stage].used;
1605:         localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
1606:       } else {
1607:         localStageUsed[stage]    = PETSC_FALSE;
1608:         localStageVisible[stage] = PETSC_TRUE;
1609:       }
1610:     }
1611:     MPI_Allreduce(localStageUsed,    stageUsed,    numStages, MPIU_BOOL, MPI_LOR,  comm);
1612:     MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm);
1613:     for (stage = 0; stage < numStages; stage++) {
1614:       if (stageUsed[stage]) {
1615:         PetscFPrintf(comm, fd, "\nSummary of Stages:   ----- Time ------  ----- Flop ------  --- Messages ---  -- Message Lengths --  -- Reductions --\n");
1616:         PetscFPrintf(comm, fd, "                        Avg     %%Total     Avg     %%Total    Count   %%Total     Avg         %%Total    Count   %%Total\n");
1617:         break;
1618:       }
1619:     }
1620:     for (stage = 0; stage < numStages; stage++) {
1621:       if (!stageUsed[stage]) continue;
1622:       /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1623:       if (localStageUsed[stage]) {
1624:         MPI_Allreduce(&stageInfo[stage].perfInfo.time,          &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1625:         MPI_Allreduce(&stageInfo[stage].perfInfo.flops,         &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1626:         MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages,   &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1627:         MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1628:         MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1629:         name = stageInfo[stage].name;
1630:       } else {
1631:         MPI_Allreduce(&zero,                           &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1632:         MPI_Allreduce(&zero,                           &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1633:         MPI_Allreduce(&zero,                           &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1634:         MPI_Allreduce(&zero,                           &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1635:         MPI_Allreduce(&zero,                           &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1636:         name = "";
1637:       }
1638:       mess *= 0.5; messLen *= 0.5; red /= size;
1639:       if (TotalTime     != 0.0) fracTime       = stageTime/TotalTime;    else fracTime       = 0.0;
1640:       if (TotalFlops    != 0.0) fracFlops      = flops/TotalFlops;       else fracFlops      = 0.0;
1641:       /* Talk to Barry if (stageTime     != 0.0) flops          = (size*flops)/stageTime; else flops          = 0.0; */
1642:       if (numMessages   != 0.0) fracMessages   = mess/numMessages;       else fracMessages   = 0.0;
1643:       if (mess          != 0.0) avgMessLen     = messLen/mess;           else avgMessLen     = 0.0;
1644:       if (messageLength != 0.0) fracLength     = messLen/messageLength;  else fracLength     = 0.0;
1645:       if (numReductions != 0.0) fracReductions = red/numReductions;      else fracReductions = 0.0;
1646:       PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%%  %6.4e %5.1f%%  %5.3e %5.1f%%  %5.3e      %5.1f%%  %5.3e %5.1f%%\n",
1647:                           stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops,
1648:                           mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions);
1649:     }
1650:   }

1652:   PetscFPrintf(comm, fd,"\n------------------------------------------------------------------------------------------------------------------------\n");
1653:   PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n");
1654:   PetscFPrintf(comm, fd, "Phase summary info:\n");
1655:   PetscFPrintf(comm, fd, "   Count: number of times phase was executed\n");
1656:   PetscFPrintf(comm, fd, "   Time and Flop: Max - maximum over all processors\n");
1657:   PetscFPrintf(comm, fd, "                  Ratio - ratio of maximum to minimum over all processors\n");
1658:   PetscFPrintf(comm, fd, "   Mess: number of messages sent\n");
1659:   PetscFPrintf(comm, fd, "   AvgLen: average message length (bytes)\n");
1660:   PetscFPrintf(comm, fd, "   Reduct: number of global reductions\n");
1661:   PetscFPrintf(comm, fd, "   Global: entire computation\n");
1662:   PetscFPrintf(comm, fd, "   Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n");
1663:   PetscFPrintf(comm, fd, "      %%T - percent time in this phase         %%F - percent flop in this phase\n");
1664:   PetscFPrintf(comm, fd, "      %%M - percent messages in this phase     %%L - percent message lengths in this phase\n");
1665:   PetscFPrintf(comm, fd, "      %%R - percent reductions in this phase\n");
1666:   PetscFPrintf(comm, fd, "   Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n");
1667:   if (PetscLogMemory) {
1668:     PetscFPrintf(comm, fd, "   Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event)\n");
1669:     PetscFPrintf(comm, fd, "   EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events)\n");
1670:     PetscFPrintf(comm, fd, "   MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event)\n");
1671:     PetscFPrintf(comm, fd, "   RMI Mbytes: Increase in resident memory (sum over all calls to event)\n");
1672:   }
1673:   #if defined(PETSC_HAVE_DEVICE)
1674:   PetscFPrintf(comm, fd, "   GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n");
1675:   PetscFPrintf(comm, fd, "   CpuToGpu Count: total number of CPU to GPU copies per processor\n");
1676:   PetscFPrintf(comm, fd, "   CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n");
1677:   PetscFPrintf(comm, fd, "   GpuToCpu Count: total number of GPU to CPU copies per processor\n");
1678:   PetscFPrintf(comm, fd, "   GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n");
1679:   PetscFPrintf(comm, fd, "   GPU %%F: percent flops on GPU in this event\n");
1680:   #endif
1681:   PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n");

1683:   PetscLogViewWarnDebugging(comm,fd);

1685:   /* Report events */
1686:   PetscFPrintf(comm, fd,"Event                Count      Time (sec)     Flop                              --- Global ---  --- Stage ----  Total");
1687:   if (PetscLogMemory) {
1688:     PetscFPrintf(comm, fd,"  Malloc EMalloc MMalloc RMI");
1689:   }
1690:   #if defined(PETSC_HAVE_DEVICE)
1691:   PetscFPrintf(comm, fd,"   GPU    - CpuToGpu -   - GpuToCpu - GPU");
1692:   #endif
1693:   PetscFPrintf(comm, fd,"\n");
1694:   PetscFPrintf(comm, fd,"                   Max Ratio  Max     Ratio   Max  Ratio  Mess   AvgLen  Reduct  %%T %%F %%M %%L %%R  %%T %%F %%M %%L %%R Mflop/s");
1695:   if (PetscLogMemory) {
1696:     PetscFPrintf(comm, fd," Mbytes Mbytes Mbytes Mbytes");
1697:   }
1698:   #if defined(PETSC_HAVE_DEVICE)
1699:   PetscFPrintf(comm, fd," Mflop/s Count   Size   Count   Size  %%F");
1700:   #endif
1701:   PetscFPrintf(comm, fd,"\n");
1702:   PetscFPrintf(comm, fd,"------------------------------------------------------------------------------------------------------------------------");
1703:   if (PetscLogMemory) {
1704:     PetscFPrintf(comm, fd,"-----------------------------");
1705:   }
1706:   #if defined(PETSC_HAVE_DEVICE)
1707:   PetscFPrintf(comm, fd,"---------------------------------------");
1708:   #endif
1709:   PetscFPrintf(comm, fd,"\n");

1711:   /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1712:   for (stage = 0; stage < numStages; stage++) {
1713:     if (!stageVisible[stage]) continue;
1714:     /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1715:     if (localStageUsed[stage]) {
1716:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1717:       MPI_Allreduce(&stageInfo[stage].perfInfo.time,          &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1718:       MPI_Allreduce(&stageInfo[stage].perfInfo.flops,         &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1719:       MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages,   &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1720:       MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1721:       MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1722:     } else {
1723:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1724:       MPI_Allreduce(&zero,                           &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1725:       MPI_Allreduce(&zero,                           &flops,     1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1726:       MPI_Allreduce(&zero,                           &mess,      1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1727:       MPI_Allreduce(&zero,                           &messLen,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1728:       MPI_Allreduce(&zero,                           &red,       1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1729:     }
1730:     mess *= 0.5; messLen *= 0.5; red /= size;

1732:     /* Get total number of events in this stage --
1733:        Currently, a single processor can register more events than another, but events must all be registered in order,
1734:        just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
1735:        on the event ID. This seems best accomplished by associating a communicator with each stage.

1737:        Problem: If the event did not happen on proc 1, its name will not be available.
1738:        Problem: Event visibility is not implemented
1739:     */
1740:     if (localStageUsed[stage]) {
1741:       eventInfo      = stageLog->stageInfo[stage].eventLog->eventInfo;
1742:       localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1743:     } else localNumEvents = 0;
1744:     MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1745:     for (event = 0; event < numEvents; event++) {
1746:       /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1747:       if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
1748:         if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; else flopr = 0.0;
1749:         MPI_Allreduce(&flopr,                          &minf,  1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1750:         MPI_Allreduce(&flopr,                          &maxf,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1751:         MPI_Allreduce(&eventInfo[event].flops,         &totf,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1752:         MPI_Allreduce(&eventInfo[event].time,          &mint,  1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1753:         MPI_Allreduce(&eventInfo[event].time,          &maxt,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1754:         MPI_Allreduce(&eventInfo[event].time,          &tott,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1755:         MPI_Allreduce(&eventInfo[event].numMessages,   &totm,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1756:         MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1757:         MPI_Allreduce(&eventInfo[event].numReductions, &totr,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1758:         MPI_Allreduce(&eventInfo[event].count,         &minC,  1, MPI_INT,             MPI_MIN, comm);
1759:         MPI_Allreduce(&eventInfo[event].count,         &maxC,  1, MPI_INT,             MPI_MAX, comm);
1760:         if (PetscLogMemory) {
1761:           MPI_Allreduce(&eventInfo[event].memIncrease,    &mem,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1762:           MPI_Allreduce(&eventInfo[event].mallocSpace,    &mal,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1763:           MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1764:           MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1765:         }
1766:         #if defined(PETSC_HAVE_DEVICE)
1767:         MPI_Allreduce(&eventInfo[event].CpuToGpuCount,    &cct,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1768:         MPI_Allreduce(&eventInfo[event].GpuToCpuCount,    &gct,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1769:         MPI_Allreduce(&eventInfo[event].CpuToGpuSize,     &csz,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1770:         MPI_Allreduce(&eventInfo[event].GpuToCpuSize,     &gsz,   1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1771:         MPI_Allreduce(&eventInfo[event].GpuFlops,         &gflops,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1772:         MPI_Allreduce(&eventInfo[event].GpuTime,          &gmaxt ,1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1773:         #endif
1774:         name = stageLog->eventLog->eventInfo[event].name;
1775:       } else {
1776:         flopr = 0.0;
1777:         MPI_Allreduce(&flopr,                         &minf,  1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1778:         MPI_Allreduce(&flopr,                         &maxf,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1779:         MPI_Allreduce(&zero,                          &totf,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1780:         MPI_Allreduce(&zero,                          &mint,  1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1781:         MPI_Allreduce(&zero,                          &maxt,  1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1782:         MPI_Allreduce(&zero,                          &tott,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1783:         MPI_Allreduce(&zero,                          &totm,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1784:         MPI_Allreduce(&zero,                          &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1785:         MPI_Allreduce(&zero,                          &totr,  1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1786:         MPI_Allreduce(&ierr,                          &minC,  1, MPI_INT,             MPI_MIN, comm);
1787:         MPI_Allreduce(&ierr,                          &maxC,  1, MPI_INT,             MPI_MAX, comm);
1788:         if (PetscLogMemory) {
1789:           MPI_Allreduce(&zero,                        &mem,    1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1790:           MPI_Allreduce(&zero,                        &mal,    1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1791:           MPI_Allreduce(&zero,                        &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1792:           MPI_Allreduce(&zero,                        &emalmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1793:         }
1794:         #if defined(PETSC_HAVE_DEVICE)
1795:         MPI_Allreduce(&zero,                          &cct,    1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1796:         MPI_Allreduce(&zero,                          &gct,    1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1797:         MPI_Allreduce(&zero,                          &csz,    1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1798:         MPI_Allreduce(&zero,                          &gsz,    1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1799:         MPI_Allreduce(&zero,                          &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1800:         MPI_Allreduce(&zero,                          &gmaxt , 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1801:         #endif
1802:         name  = "";
1803:       }
1804:       if (mint < 0.0) {
1805:         PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n",mint,name);
1806:         mint = 0;
1807:       }
1808:       if (minf < 0.0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Minimum flop %g over all processors for %s is negative! Not possible!",minf,name);
1809:       totm *= 0.5; totml *= 0.5; totr /= size;

1811:       if (maxC != 0) {
1812:         if (minC          != 0)   ratC             = ((PetscLogDouble)maxC)/minC;else ratC             = 0.0;
1813:         if (mint          != 0.0) ratt             = maxt/mint;                  else ratt             = 0.0;
1814:         if (minf          != 0.0) ratf             = maxf/minf;                  else ratf             = 0.0;
1815:         if (TotalTime     != 0.0) fracTime         = tott/TotalTime;             else fracTime         = 0.0;
1816:         if (TotalFlops    != 0.0) fracFlops        = totf/TotalFlops;            else fracFlops        = 0.0;
1817:         if (stageTime     != 0.0) fracStageTime    = tott/stageTime;             else fracStageTime    = 0.0;
1818:         if (flops         != 0.0) fracStageFlops   = totf/flops;                 else fracStageFlops   = 0.0;
1819:         if (numMessages   != 0.0) fracMess         = totm/numMessages;           else fracMess         = 0.0;
1820:         if (messageLength != 0.0) fracMessLen      = totml/messageLength;        else fracMessLen      = 0.0;
1821:         if (numReductions != 0.0) fracRed          = totr/numReductions;         else fracRed          = 0.0;
1822:         if (mess          != 0.0) fracStageMess    = totm/mess;                  else fracStageMess    = 0.0;
1823:         if (messLen       != 0.0) fracStageMessLen = totml/messLen;              else fracStageMessLen = 0.0;
1824:         if (red           != 0.0) fracStageRed     = totr/red;                   else fracStageRed     = 0.0;
1825:         if (totm          != 0.0) totml           /= totm;                       else totml            = 0.0;
1826:         if (maxt          != 0.0) flopr            = totf/maxt;                  else flopr            = 0.0;
1827:         if (fracStageTime > 1.00)  PetscFPrintf(comm, fd,"Warning -- total time of event greater than time of entire stage -- something is wrong with the timer\n");
1828:         PetscFPrintf(comm, fd,
1829:                             "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f",
1830:                             name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr,
1831:                             100.0*fracTime, 100.0*fracFlops, 100.0*fracMess, 100.0*fracMessLen, 100.0*fracRed,
1832:                             100.0*fracStageTime, 100.0*fracStageFlops, 100.0*fracStageMess, 100.0*fracStageMessLen, 100.0*fracStageRed,
1833:                             PetscAbs(flopr)/1.0e6);
1834:         if (PetscLogMemory) {
1835:           PetscFPrintf(comm, fd," %5.0f   %5.0f   %5.0f   %5.0f",mal/1.0e6,emalmax/1.0e6,malmax/1.0e6,mem/1.0e6);
1836:         }
1837:         #if defined(PETSC_HAVE_DEVICE)
1838:         if (totf  != 0.0) fracgflops = gflops/totf;  else fracgflops = 0.0;
1839:         if (gmaxt != 0.0) gflopr     = gflops/gmaxt; else gflopr     = 0.0;
1840:         PetscFPrintf(comm, fd,"   %5.0f   %4.0f %3.2e %4.0f %3.2e% 3.0f",PetscAbs(gflopr)/1.0e6,cct/size,csz/(1.0e6*size),gct/size,gsz/(1.0e6*size),100.0*fracgflops);
1841:         #endif
1842:         PetscFPrintf(comm, fd,"\n");
1843:       }
1844:     }
1845:   }

1847:   /* Memory usage and object creation */
1848:   PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------");
1849:   if (PetscLogMemory) {
1850:     PetscFPrintf(comm, fd, "-----------------------------");
1851:   }
1852:   #if defined(PETSC_HAVE_DEVICE)
1853:   PetscFPrintf(comm, fd, "---------------------------------------");
1854:   #endif
1855:   PetscFPrintf(comm, fd, "\n");
1856:   PetscFPrintf(comm, fd, "\n");
1857:   PetscFPrintf(comm, fd, "Memory usage is given in bytes:\n\n");

1859:   /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1860:      the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1861:      stats for stages local to processor sets.
1862:   */
1863:   /* We should figure out the longest object name here (now 20 characters) */
1864:   PetscFPrintf(comm, fd, "Object Type          Creations   Destructions     Memory  Descendants' Mem.\n");
1865:   PetscFPrintf(comm, fd, "Reports information only for process 0.\n");
1866:   for (stage = 0; stage < numStages; stage++) {
1867:     if (localStageUsed[stage]) {
1868:       classInfo = stageLog->stageInfo[stage].classLog->classInfo;
1869:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1870:       for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) {
1871:         if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) {
1872:           PetscFPrintf(comm, fd, "%20s %5d          %5d  %11.0f     %g\n", stageLog->classLog->classInfo[oclass].name,
1873:                               classInfo[oclass].creations, classInfo[oclass].destructions, classInfo[oclass].mem,
1874:                               classInfo[oclass].descMem);
1875:         }
1876:       }
1877:     } else {
1878:       if (!localStageVisible[stage]) continue;
1879:       PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1880:     }
1881:   }

1883:   PetscFree(localStageUsed);
1884:   PetscFree(stageUsed);
1885:   PetscFree(localStageVisible);
1886:   PetscFree(stageVisible);

1888:   /* Information unrelated to this particular run */
1889:   PetscFPrintf(comm, fd, "========================================================================================================================\n");
1890:   PetscTime(&y);
1891:   PetscTime(&x);
1892:   PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y);
1893:   PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y);
1894:   PetscFPrintf(comm,fd,"Average time to get PetscTime(): %g\n", (y-x)/10.0);
1895:   /* MPI information */
1896:   if (size > 1) {
1897:     MPI_Status  status;
1898:     PetscMPIInt tag;
1899:     MPI_Comm    newcomm;

1901:     MPI_Barrier(comm);
1902:     PetscTime(&x);
1903:     MPI_Barrier(comm);
1904:     MPI_Barrier(comm);
1905:     MPI_Barrier(comm);
1906:     MPI_Barrier(comm);
1907:     MPI_Barrier(comm);
1908:     PetscTime(&y);
1909:     PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y-x)/5.0);
1910:     PetscCommDuplicate(comm,&newcomm, &tag);
1911:     MPI_Barrier(comm);
1912:     if (rank) {
1913:       MPI_Recv(NULL, 0, MPI_INT, rank-1,            tag, newcomm, &status);
1914:       MPI_Send(NULL, 0, MPI_INT, (rank+1)%size, tag, newcomm);
1915:     } else {
1916:       PetscTime(&x);
1917:       MPI_Send(NULL, 0, MPI_INT, 1,          tag, newcomm);
1918:       MPI_Recv(NULL, 0, MPI_INT, size-1, tag, newcomm, &status);
1919:       PetscTime(&y);
1920:       PetscFPrintf(comm,fd,"Average time for zero size MPI_Send(): %g\n", (y-x)/size);
1921:     }
1922:     PetscCommDestroy(&newcomm);
1923:   }
1924:   PetscOptionsView(NULL,viewer);

1926:   /* Machine and compile information */
1927: #if defined(PETSC_USE_FORTRAN_KERNELS)
1928:   PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n");
1929: #else
1930:   PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n");
1931: #endif
1932: #if defined(PETSC_USE_64BIT_INDICES)
1933:   PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n");
1934: #elif defined(PETSC_USE___FLOAT128)
1935:   PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n");
1936: #endif
1937: #if defined(PETSC_USE_REAL_SINGLE)
1938:   PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n");
1939: #elif defined(PETSC_USE___FLOAT128)
1940:   PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n");
1941: #endif
1942: #if defined(PETSC_USE_REAL_MAT_SINGLE)
1943:   PetscFPrintf(comm, fd, "Compiled with single precision matrices\n");
1944: #else
1945:   PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n");
1946: #endif
1947:   PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n",
1948:                       (int) sizeof(short), (int) sizeof(int), (int) sizeof(long), (int) sizeof(void*),(int) sizeof(PetscScalar),(int) sizeof(PetscInt));

1950:   PetscFPrintf(comm, fd, "Configure options: %s",petscconfigureoptions);
1951:   PetscFPrintf(comm, fd, "%s", petscmachineinfo);
1952:   PetscFPrintf(comm, fd, "%s", petsccompilerinfo);
1953:   PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo);
1954:   PetscFPrintf(comm, fd, "%s", petsclinkerinfo);

1956:   /* Cleanup */
1957:   PetscFPrintf(comm, fd, "\n");
1958:   PetscLogViewWarnNoGpuAwareMpi(comm,fd);
1959:   PetscLogViewWarnDebugging(comm,fd);
1960:   return(0);
1961: }

1963: /*@C
1964:   PetscLogView - Prints a summary of the logging.

1966:   Collective over MPI_Comm

1968:   Input Parameter:
1969: .  viewer - an ASCII viewer

1971:   Options Database Keys:
1972: +  -log_view [:filename] - Prints summary of log information
1973: .  -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file
1974: .  -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it)
1975: .  -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it)
1976: .  -log_all - Saves a file Log.rank for each MPI process with details of each step of the computation
1977: -  -log_trace [filename] - Displays a trace of what each process is doing

1979:   Notes:
1980:   It is possible to control the logging programatically but we recommend using the options database approach whenever possible
1981:   By default the summary is printed to stdout.

1983:   Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin()

1985:   If PETSc is configured with --with-logging=0 then this functionality is not available

1987:   To view the nested XML format filename.xml first copy  ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current
1988:   directory then open filename.xml with your browser. Specific notes for certain browsers
1989: $    Firefox and Internet explorer - simply open the file
1990: $    Google Chrome - you must start up Chrome with the option --allow-file-access-from-files
1991: $    Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access
1992:   or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with
1993:   your browser.
1994:   Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser
1995:   window and render the XML log file contents.

1997:   The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij  MARITIME  RESEARCH  INSTITUTE  NETHERLANDS

1999:   The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph)
2000:   or using speedscope (https://www.speedscope.app).
2001:   Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py.

2003:   Level: beginner

2005: .seealso: PetscLogDefaultBegin(), PetscLogDump()
2006: @*/
2007: PetscErrorCode  PetscLogView(PetscViewer viewer)
2008: {
2009:   PetscErrorCode    ierr;
2010:   PetscBool         isascii;
2011:   PetscViewerFormat format;
2012:   int               stage, lastStage;
2013:   PetscStageLog     stageLog;

2016:   if (!PetscLogPLB) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Must use -log_view or PetscLogDefaultBegin() before calling this routine");
2017:   /* Pop off any stages the user forgot to remove */
2018:   lastStage = 0;
2019:   PetscLogGetStageLog(&stageLog);
2020:   PetscStageLogGetCurrent(stageLog, &stage);
2021:   while (stage >= 0) {
2022:     lastStage = stage;
2023:     PetscStageLogPop(stageLog);
2024:     PetscStageLogGetCurrent(stageLog, &stage);
2025:   }
2026:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
2027:   if (!isascii) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Currently can only view logging to ASCII");
2028:   PetscViewerGetFormat(viewer,&format);
2029:   if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) {
2030:     PetscLogView_Default(viewer);
2031:   } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2032:     PetscLogView_Detailed(viewer);
2033:   } else if (format == PETSC_VIEWER_ASCII_CSV) {
2034:     PetscLogView_CSV(viewer);
2035:   } else if (format == PETSC_VIEWER_ASCII_XML) {
2036:     PetscLogView_Nested(viewer);
2037:   } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) {
2038:     PetscLogView_Flamegraph(viewer);
2039:   }
2040:   PetscStageLogPush(stageLog, lastStage);
2041:   return(0);
2042: }

2044: /*@C
2045:   PetscLogViewFromOptions - Processes command line options to determine if/how a PetscLog is to be viewed.

2047:   Collective on PETSC_COMM_WORLD

2049:   Not normally called by user

2051:   Level: intermediate

2053: @*/
2054: PetscErrorCode PetscLogViewFromOptions(void)
2055: {
2056:   PetscErrorCode    ierr;
2057:   PetscViewer       viewer;
2058:   PetscBool         flg;
2059:   PetscViewerFormat format;

2062:   PetscOptionsGetViewer(PETSC_COMM_WORLD,NULL,NULL,"-log_view",&viewer,&format,&flg);
2063:   if (flg) {
2064:     PetscViewerPushFormat(viewer,format);
2065:     PetscLogView(viewer);
2066:     PetscViewerPopFormat(viewer);
2067:     PetscViewerDestroy(&viewer);
2068:   }
2069:   return(0);
2070: }



2074: /*----------------------------------------------- Counter Functions -------------------------------------------------*/
2075: /*@C
2076:    PetscGetFlops - Returns the number of flops used on this processor
2077:    since the program began.

2079:    Not Collective

2081:    Output Parameter:
2082:    flops - number of floating point operations

2084:    Notes:
2085:    A global counter logs all PETSc flop counts.  The user can use
2086:    PetscLogFlops() to increment this counter to include flops for the
2087:    application code.

2089:    Level: intermediate

2091: .seealso: PetscTime(), PetscLogFlops()
2092: @*/
2093: PetscErrorCode  PetscGetFlops(PetscLogDouble *flops)
2094: {
2096:   *flops = petsc_TotalFlops;
2097:   return(0);
2098: }

2100: PetscErrorCode  PetscLogObjectState(PetscObject obj, const char format[], ...)
2101: {
2103:   size_t         fullLength;
2104:   va_list        Argp;

2107:   if (!petsc_logObjects) return(0);
2108:   va_start(Argp, format);
2109:   PetscVSNPrintf(petsc_objects[obj->id].info, 64,format,&fullLength, Argp);
2110:   va_end(Argp);
2111:   return(0);
2112: }


2115: /*MC
2116:    PetscLogFlops - Adds floating point operations to the global counter.

2118:    Synopsis:
2119: #include <petsclog.h>
2120:    PetscErrorCode PetscLogFlops(PetscLogDouble f)

2122:    Not Collective

2124:    Input Parameter:
2125: .  f - flop counter


2128:    Usage:
2129: .vb
2130:      PetscLogEvent USER_EVENT;
2131:      PetscLogEventRegister("User event",0,&USER_EVENT);
2132:      PetscLogEventBegin(USER_EVENT,0,0,0,0);
2133:         [code segment to monitor]
2134:         PetscLogFlops(user_flops)
2135:      PetscLogEventEnd(USER_EVENT,0,0,0,0);
2136: .ve

2138:    Notes:
2139:    A global counter logs all PETSc flop counts.  The user can use
2140:    PetscLogFlops() to increment this counter to include flops for the
2141:    application code.

2143:    Level: intermediate

2145: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscGetFlops()

2147: M*/

2149: /*MC
2150:    PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice)
2151:     to get accurate timings

2153:    Synopsis:
2154: #include <petsclog.h>
2155:    void PetscPreLoadBegin(PetscBool  flag,char *name);

2157:    Not Collective

2159:    Input Parameter:
2160: +   flag - PETSC_TRUE to run twice, PETSC_FALSE to run once, may be overridden
2161:            with command line option -preload true or -preload false
2162: -   name - name of first stage (lines of code timed separately with -log_view) to
2163:            be preloaded

2165:    Usage:
2166: .vb
2167:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2168:        lines of code
2169:        PetscPreLoadStage("second stage");
2170:        lines of code
2171:      PetscPreLoadEnd();
2172: .ve

2174:    Notes:
2175:     Only works in C/C++, not Fortran

2177:      Flags available within the macro.
2178: +    PetscPreLoadingUsed - true if we are or have done preloading
2179: .    PetscPreLoadingOn - true if it is CURRENTLY doing preload
2180: .    PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second
2181: -    PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on
2182:      The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin()
2183:      and PetscPreLoadEnd()

2185:    Level: intermediate

2187: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadEnd(), PetscPreLoadStage()


2190: M*/

2192: /*MC
2193:    PetscPreLoadEnd - End a segment of code that may be preloaded (run twice)
2194:     to get accurate timings

2196:    Synopsis:
2197: #include <petsclog.h>
2198:    void PetscPreLoadEnd(void);

2200:    Not Collective

2202:    Usage:
2203: .vb
2204:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2205:        lines of code
2206:        PetscPreLoadStage("second stage");
2207:        lines of code
2208:      PetscPreLoadEnd();
2209: .ve

2211:    Notes:
2212:     only works in C/C++ not fortran

2214:    Level: intermediate

2216: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadStage()

2218: M*/

2220: /*MC
2221:    PetscPreLoadStage - Start a new segment of code to be timed separately.
2222:     to get accurate timings

2224:    Synopsis:
2225: #include <petsclog.h>
2226:    void PetscPreLoadStage(char *name);

2228:    Not Collective

2230:    Usage:
2231: .vb
2232:      PetscPreLoadBegin(PETSC_TRUE,"first stage);
2233:        lines of code
2234:        PetscPreLoadStage("second stage");
2235:        lines of code
2236:      PetscPreLoadEnd();
2237: .ve

2239:    Notes:
2240:     only works in C/C++ not fortran

2242:    Level: intermediate

2244: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd()

2246: M*/


2249: #else /* end of -DPETSC_USE_LOG section */

2251: PetscErrorCode  PetscLogObjectState(PetscObject obj, const char format[], ...)
2252: {
2254:   return(0);
2255: }

2257: #endif /* PETSC_USE_LOG*/


2260: PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
2261: PetscClassId PETSC_OBJECT_CLASSID  = 0;

2263: /*@C
2264:   PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code.

2266:   Not Collective

2268:   Input Parameter:
2269: . name   - The class name

2271:   Output Parameter:
2272: . oclass - The class id or classid

2274:   Level: developer

2276: @*/
2277: PetscErrorCode  PetscClassIdRegister(const char name[],PetscClassId *oclass)
2278: {
2279: #if defined(PETSC_USE_LOG)
2280:   PetscStageLog  stageLog;
2281:   PetscInt       stage;
2283: #endif

2286:   *oclass = ++PETSC_LARGEST_CLASSID;
2287: #if defined(PETSC_USE_LOG)
2288:   PetscLogGetStageLog(&stageLog);
2289:   PetscClassRegLogRegister(stageLog->classLog, name, *oclass);
2290:   for (stage = 0; stage < stageLog->numStages; stage++) {
2291:     PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
2292:   }
2293: #endif
2294:   return(0);
2295: }

2297: #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE)
2298: #include <mpe.h>

2300: PetscBool PetscBeganMPE = PETSC_FALSE;

2302: PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject);
2303: PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject);

2305: /*@C
2306:    PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files
2307:    and slows the program down.

2309:    Collective over PETSC_COMM_WORLD

2311:    Options Database Keys:
2312: . -log_mpe - Prints extensive log information

2314:    Notes:
2315:    A related routine is PetscLogDefaultBegin() (with the options key -log_view), which is
2316:    intended for production runs since it logs only flop rates and object
2317:    creation (and should not significantly slow the programs).

2319:    Level: advanced


2322: .seealso: PetscLogDump(), PetscLogDefaultBegin(), PetscLogAllBegin(), PetscLogEventActivate(),
2323:           PetscLogEventDeactivate()
2324: @*/
2325: PetscErrorCode  PetscLogMPEBegin(void)
2326: {

2330:   /* Do MPE initialization */
2331:   if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */
2332:     PetscInfo(0,"Initializing MPE.\n");
2333:     MPE_Init_log();

2335:     PetscBeganMPE = PETSC_TRUE;
2336:   } else {
2337:     PetscInfo(0,"MPE already initialized. Not attempting to reinitialize.\n");
2338:   }
2339:   PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE);
2340:   return(0);
2341: }

2343: /*@C
2344:    PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot.

2346:    Collective over PETSC_COMM_WORLD

2348:    Level: advanced

2350: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogMPEBegin()
2351: @*/
2352: PetscErrorCode  PetscLogMPEDump(const char sname[])
2353: {
2354:   char           name[PETSC_MAX_PATH_LEN];

2358:   if (PetscBeganMPE) {
2359:     PetscInfo(0,"Finalizing MPE.\n");
2360:     if (sname) {
2361:       PetscStrcpy(name,sname);
2362:     } else {
2363:       PetscGetProgramName(name,sizeof(name));
2364:     }
2365:     MPE_Finish_log(name);
2366:   } else {
2367:     PetscInfo(0,"Not finalizing MPE (not started by PETSc).\n");
2368:   }
2369:   return(0);
2370: }

2372: #define PETSC_RGB_COLORS_MAX 39
2373: static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {
2374:   "OliveDrab:      ",
2375:   "BlueViolet:     ",
2376:   "CadetBlue:      ",
2377:   "CornflowerBlue: ",
2378:   "DarkGoldenrod:  ",
2379:   "DarkGreen:      ",
2380:   "DarkKhaki:      ",
2381:   "DarkOliveGreen: ",
2382:   "DarkOrange:     ",
2383:   "DarkOrchid:     ",
2384:   "DarkSeaGreen:   ",
2385:   "DarkSlateGray:  ",
2386:   "DarkTurquoise:  ",
2387:   "DeepPink:       ",
2388:   "DarkKhaki:      ",
2389:   "DimGray:        ",
2390:   "DodgerBlue:     ",
2391:   "GreenYellow:    ",
2392:   "HotPink:        ",
2393:   "IndianRed:      ",
2394:   "LavenderBlush:  ",
2395:   "LawnGreen:      ",
2396:   "LemonChiffon:   ",
2397:   "LightCoral:     ",
2398:   "LightCyan:      ",
2399:   "LightPink:      ",
2400:   "LightSalmon:    ",
2401:   "LightSlateGray: ",
2402:   "LightYellow:    ",
2403:   "LimeGreen:      ",
2404:   "MediumPurple:   ",
2405:   "MediumSeaGreen: ",
2406:   "MediumSlateBlue:",
2407:   "MidnightBlue:   ",
2408:   "MintCream:      ",
2409:   "MistyRose:      ",
2410:   "NavajoWhite:    ",
2411:   "NavyBlue:       ",
2412:   "OliveDrab:      "
2413: };

2415: /*@C
2416:   PetscLogMPEGetRGBColor - This routine returns a rgb color useable with PetscLogEventRegister()

2418:   Not collective. Maybe it should be?

2420:   Output Parameter:
2421: . str - character string representing the color

2423:   Level: developer

2425: .seealso: PetscLogEventRegister
2426: @*/
2427: PetscErrorCode  PetscLogMPEGetRGBColor(const char *str[])
2428: {
2429:   static int idx = 0;

2432:   *str = PetscLogMPERGBColors[idx];
2433:   idx  = (idx + 1)% PETSC_RGB_COLORS_MAX;
2434:   return(0);
2435: }

2437: #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */