Actual source code: plog.c
1: #define PETSC_DLL
2: /*
3: PETSc code to log object creation and destruction and PETSc events.
4: */
5: #include petscsys.h
6: #include petsctime.h
7: #if defined(PETSC_HAVE_MPE)
8: #include "mpe.h"
9: #endif
10: #include <stdarg.h>
11: #include <sys/types.h>
12: #if defined(PETSC_HAVE_STDLIB_H)
13: #include <stdlib.h>
14: #endif
15: #if defined(PETSC_HAVE_MALLOC_H)
16: #include <malloc.h>
17: #endif
18: #include ../src/sys/plog/plog.h
20: PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT;
23: std::map<std::string,PETSc::LogEvent> PETSc::Log::event_registry;
24: std::map<std::string,PETSc::LogStage> PETSc::Log::stage_registry;
25: #endif
27: #if defined(PETSC_USE_LOG)
28: #include "petscmachineinfo.h"
29: #include "petscconfiginfo.h"
31: /* used in the MPI_XXX() count macros in petsclog.h */
33: /* Action and object logging variables */
34: Action *actions = PETSC_NULL;
35: Object *objects = PETSC_NULL;
36: PetscTruth logActions = PETSC_FALSE;
37: PetscTruth logObjects = PETSC_FALSE;
38: int numActions = 0, maxActions = 100;
39: int numObjects = 0, maxObjects = 100;
40: int numObjectsDestroyed = 0;
42: /* Global counters */
43: PetscLogDouble BaseTime = 0.0;
44: PetscLogDouble _TotalFlops = 0.0; /* The number of flops */
45: PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */
46: PetscLogDouble send_ct = 0.0; /* The number of sends */
47: PetscLogDouble recv_ct = 0.0; /* The number of receives */
48: PetscLogDouble send_len = 0.0; /* The total length of all sent messages */
49: PetscLogDouble recv_len = 0.0; /* The total length of all received messages */
50: PetscLogDouble isend_ct = 0.0; /* The number of immediate sends */
51: PetscLogDouble irecv_ct = 0.0; /* The number of immediate receives */
52: PetscLogDouble isend_len = 0.0; /* The total length of all immediate send messages */
53: PetscLogDouble irecv_len = 0.0; /* The total length of all immediate receive messages */
54: PetscLogDouble wait_ct = 0.0; /* The number of waits */
55: PetscLogDouble wait_any_ct = 0.0; /* The number of anywaits */
56: PetscLogDouble wait_all_ct = 0.0; /* The number of waitalls */
57: PetscLogDouble sum_of_waits_ct = 0.0; /* The total number of waits */
58: PetscLogDouble allreduce_ct = 0.0; /* The number of reductions */
59: PetscLogDouble gather_ct = 0.0; /* The number of gathers and gathervs */
60: PetscLogDouble scatter_ct = 0.0; /* The number of scatters and scattervs */
62: /* Logging functions */
63: PetscErrorCode (*_PetscLogPHC)(PetscObject) = PETSC_NULL;
64: PetscErrorCode (*_PetscLogPHD)(PetscObject) = PETSC_NULL;
65: PetscErrorCode (*_PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = PETSC_NULL;
66: PetscErrorCode (*_PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = PETSC_NULL;
68: /* Tracing event logging variables */
69: FILE *tracefile = PETSC_NULL;
70: int tracelevel = 0;
71: const char *traceblanks = " ";
72: char tracespace[128] = " ";
73: PetscLogDouble tracetime = 0.0;
74: PetscTruth PetscLogBegin_PrivateCalled = PETSC_FALSE;
76: /*---------------------------------------------- General Functions --------------------------------------------------*/
79: /*@C
80: PetscLogDestroy - Destroys the object and event logging data and resets the global counters.
82: Not Collective
84: Notes:
85: This routine should not usually be used by programmers. Instead employ
86: PetscLogStagePush() and PetscLogStagePop().
88: Level: developer
90: .keywords: log, destroy
91: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogPrintSummary(), PetscLogStagePush(), PlogStagePop()
92: @*/
93: PetscErrorCode PetscLogDestroy(void)
94: {
95: StageLog stageLog;
99: PetscFree(actions);
100: actions = PETSC_NULL;
101: PetscFree(objects);
102: objects = PETSC_NULL;
103: PetscLogSet(PETSC_NULL, PETSC_NULL);
105: /* Resetting phase */
106: PetscLogGetStageLog(&stageLog);
107: StageLogDestroy(stageLog);
108: _TotalFlops = 0.0;
109: numActions = 0;
110: numObjects = 0;
111: numObjectsDestroyed = 0;
112: maxActions = 100;
113: maxObjects = 100;
114: actions = PETSC_NULL;
115: objects = PETSC_NULL;
116: logActions = PETSC_FALSE;
117: logObjects = PETSC_FALSE;
118: BaseTime = 0.0;
119: _TotalFlops = 0.0;
120: petsc_tmp_flops = 0.0;
121: send_ct = 0.0;
122: recv_ct = 0.0;
123: send_len = 0.0;
124: recv_len = 0.0;
125: isend_ct = 0.0;
126: irecv_ct = 0.0;
127: isend_len = 0.0;
128: irecv_len = 0.0;
129: wait_ct = 0.0;
130: wait_any_ct = 0.0;
131: wait_all_ct = 0.0;
132: sum_of_waits_ct = 0.0;
133: allreduce_ct = 0.0;
134: gather_ct = 0.0;
135: scatter_ct = 0.0;
136: PETSC_LARGEST_EVENT = PETSC_EVENT;
137: _PetscLogPHC = PETSC_NULL;
138: _PetscLogPHD = PETSC_NULL;
139: tracefile = PETSC_NULL;
140: tracelevel = 0;
141: traceblanks = " ";
142: tracespace[0] = ' '; tracespace[1] = 0;
143: tracetime = 0.0;
144: PETSC_LARGEST_COOKIE = PETSC_SMALLEST_COOKIE;
145: PETSC_OBJECT_COOKIE = 0;
146: _stageLog = 0;
147: PetscLogBegin_PrivateCalled = PETSC_FALSE;
148: return(0);
149: }
153: /*@C
154: PetscLogSet - Sets the logging functions called at the beginning and ending of every event.
156: Not Collective
158: Input Parameters:
159: + b - The function called at beginning of event
160: - e - The function called at end of event
162: Level: developer
164: .seealso: PetscLogDump(), PetscLogBegin(), PetscLogAllBegin(), PetscLogTraceBegin()
165: @*/
166: PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject),
167: PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject))
168: {
170: _PetscLogPLB = b;
171: _PetscLogPLE = e;
172: return(0);
173: }
175: #if defined(PETSC_HAVE_CHUD)
176: #include <CHUD/CHUD.h>
177: #endif
178: #if defined(PETSC_HAVE_PAPI)
179: #include "papi.h"
180: int PAPIEventSet = PAPI_NULL;
181: #endif
183: /*------------------------------------------- Initialization Functions ----------------------------------------------*/
186: PetscErrorCode PetscLogBegin_Private(void)
187: {
188: int stage;
189: PetscTruth opt;
190: PetscErrorCode ierr;
193: if (PetscLogBegin_PrivateCalled) return(0);
194: PetscLogBegin_PrivateCalled = PETSC_TRUE;
196: PetscOptionsHasName(PETSC_NULL, "-log_exclude_actions", &opt);
197: if (opt) {
198: logActions = PETSC_FALSE;
199: }
200: PetscOptionsHasName(PETSC_NULL, "-log_exclude_objects", &opt);
201: if (opt) {
202: logObjects = PETSC_FALSE;
203: }
204: if (logActions) {
205: PetscMalloc(maxActions * sizeof(Action), &actions);
206: }
207: if (logObjects) {
208: PetscMalloc(maxObjects * sizeof(Object), &objects);
209: }
210: _PetscLogPHC = PetscLogObjCreateDefault;
211: _PetscLogPHD = PetscLogObjDestroyDefault;
212: /* Setup default logging structures */
213: StageLogCreate(&_stageLog);
214: StageLogRegister(_stageLog, "Main Stage", &stage);
215: #if defined(PETSC_HAVE_CHUD)
216: chudInitialize();
217: chudAcquireSamplingFacility(CHUD_BLOCKING);
218: chudSetSamplingDevice(chudCPU1Dev);
219: chudSetStartDelay(0,chudNanoSeconds);
220: chudClearPMCMode(chudCPU1Dev,chudUnused);
221: chudClearPMCs();
222: /* chudSetPMCMuxPosition(chudCPU1Dev,0,0); */
223: printf("%s\n",chudGetEventName(chudCPU1Dev,PMC_1,193));
224: printf("%s\n",chudGetEventDescription(chudCPU1Dev,PMC_1,193));
225: printf("%s\n",chudGetEventNotes(chudCPU1Dev,PMC_1,193));
226: chudSetPMCEvent(chudCPU1Dev,PMC_1,193);
227: chudSetPMCMode(chudCPU1Dev,PMC_1,chudCounter);
228: chudSetPrivilegeFilter(chudCPU1Dev,PMC_1,chudCountUserEvents);
229: chudSetPMCEventMask(chudCPU1Dev,PMC_1,0xFE);
230: if (!chudIsEventValid(chudCPU1Dev,PMC_1,193)) SETERRQ1(PETSC_ERR_SUP,"Event is not valid %d",193);
231: chudStartPMCs();
232: #endif
233: #if defined(PETSC_HAVE_PAPI)
234: PAPI_library_init(PAPI_VER_CURRENT);
235: if (ierr != PAPI_VER_CURRENT) SETERRQ(PETSC_ERR_LIB,"Cannot initialize PAPI");
236: PAPI_query_event(PAPI_FP_INS);
237: PAPI_create_eventset(&PAPIEventSet);
238: PAPI_add_event(PAPIEventSet,PAPI_FP_INS);
239: PAPI_start(PAPIEventSet);
240: #endif
242: /* All processors sync here for more consistent logging */
243: MPI_Barrier(PETSC_COMM_WORLD);
244: PetscTime(BaseTime);
245: PetscLogStagePush(stage);
246: return(0);
247: }
251: /*@C
252: PetscLogBegin - Turns on logging of objects and events. This logs flop
253: rates and object creation and should not slow programs down too much.
254: This routine may be called more than once.
256: Collective over PETSC_COMM_WORLD
258: Options Database Keys:
259: + -log_summary - Prints summary of flop and timing information to the
260: screen (for code compiled with PETSC_USE_LOG)
261: - -log - Prints detailed log information (for code compiled with PETSC_USE_LOG)
263: Usage:
264: .vb
265: PetscInitialize(...);
266: PetscLogBegin();
267: ... code ...
268: PetscLogPrintSummary(MPI_Comm,filename); or PetscLogDump();
269: PetscFinalize();
270: .ve
272: Notes:
273: PetscLogPrintSummary(MPI_Comm,filename) or PetscLogDump() actually cause the printing of
274: the logging information.
276: Level: advanced
278: .keywords: log, begin
279: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogPrintSummary(), PetscLogTraceBegin()
280: @*/
281: PetscErrorCode PetscLogBegin(void)
282: {
286: PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault);
287: PetscLogBegin_Private();
288: return(0);
289: }
293: /*@C
294: PetscLogAllBegin - Turns on extensive logging of objects and events. Logs
295: all events. This creates large log files and slows the program down.
297: Collective on PETSC_COMM_WORLD
299: Options Database Keys:
300: . -log_all - Prints extensive log information (for code compiled with PETSC_USE_LOG)
302: Usage:
303: .vb
304: PetscInitialize(...);
305: PetscLogAllBegin();
306: ... code ...
307: PetscLogDump(filename);
308: PetscFinalize();
309: .ve
311: Notes:
312: A related routine is PetscLogBegin (with the options key -log), which is
313: intended for production runs since it logs only flop rates and object
314: creation (and shouldn't significantly slow the programs).
316: Level: advanced
318: .keywords: log, all, begin
319: .seealso: PetscLogDump(), PetscLogBegin(), PetscLogTraceBegin()
320: @*/
321: PetscErrorCode PetscLogAllBegin(void)
322: {
326: PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete);
327: PetscLogBegin_Private();
328: return(0);
329: }
333: /*@
334: PetscLogTraceBegin - Activates trace logging. Every time a PETSc event
335: begins or ends, the event name is printed.
337: Collective on PETSC_COMM_WORLD
339: Input Parameter:
340: . file - The file to print trace in (e.g. stdout)
342: Options Database Key:
343: . -log_trace [filename] - Activates PetscLogTraceBegin()
345: Notes:
346: PetscLogTraceBegin() prints the processor number, the execution time (sec),
347: then "Event begin:" or "Event end:" followed by the event name.
349: PetscLogTraceBegin() allows tracing of all PETSc calls, which is useful
350: to determine where a program is hanging without running in the
351: debugger. Can be used in conjunction with the -info option.
353: Level: intermediate
355: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogPrintSummary(), PetscLogBegin()
356: @*/
357: PetscErrorCode PetscLogTraceBegin(FILE *file)
358: {
362: tracefile = file;
363: PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace);
364: PetscLogBegin_Private();
365: return(0);
366: }
370: /*@
371: PetscLogActions - Determines whether actions are logged for the graphical viewer.
373: Not Collective
375: Input Parameter:
376: . flag - PETSC_TRUE if actions are to be logged
378: Level: intermediate
380: Note: Logging of actions continues to consume more memory as the program
381: runs. Long running programs should consider turning this feature off.
383: Options Database Keys:
384: . -log_exclude_actions - Turns off actions logging
386: .keywords: log, stage, register
387: .seealso: PetscLogStagePush(), PetscLogStagePop()
388: @*/
389: PetscErrorCode PetscLogActions(PetscTruth flag)
390: {
392: logActions = flag;
393: return(0);
394: }
398: /*@
399: PetscLogObjects - Determines whether objects are logged for the graphical viewer.
401: Not Collective
403: Input Parameter:
404: . flag - PETSC_TRUE if objects are to be logged
406: Level: intermediate
408: Note: Logging of objects continues to consume more memory as the program
409: runs. Long running programs should consider turning this feature off.
411: Options Database Keys:
412: . -log_exclude_objects - Turns off objects logging
414: .keywords: log, stage, register
415: .seealso: PetscLogStagePush(), PetscLogStagePop()
416: @*/
417: PetscErrorCode PetscLogObjects(PetscTruth flag)
418: {
420: logObjects = flag;
421: return(0);
422: }
424: /*------------------------------------------------ Stage Functions --------------------------------------------------*/
427: /*@C
428: PetscLogStageRegister - Attaches a charactor string name to a logging stage.
430: Not Collective
432: Input Parameter:
433: . sname - The name to associate with that stage
435: Output Parameter:
436: . stage - The stage number
438: Level: intermediate
440: .keywords: log, stage, register
441: .seealso: PetscLogStagePush(), PetscLogStagePop()
442: @*/
443: PetscErrorCode PetscLogStageRegister(const char sname[],PetscLogStage *stage)
444: {
445: StageLog stageLog;
446: PetscLogEvent event;
450: PetscLogGetStageLog(&stageLog);
451: StageLogRegister(stageLog, sname, stage);
452: /* Copy events already changed in the main stage, this sucks */
453: EventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents);
454: for(event = 0; event < stageLog->eventLog->numEvents; event++) {
455: EventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event],
456: &stageLog->stageInfo[*stage].eventLog->eventInfo[event]);
457: }
458: ClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses);
459: return(0);
460: }
464: /*@C
465: PetscLogStagePush - This function pushes a stage on the stack.
467: Not Collective
469: Input Parameter:
470: . stage - The stage on which to log
472: Usage:
473: If the option -log_sumary is used to run the program containing the
474: following code, then 2 sets of summary data will be printed during
475: PetscFinalize().
476: .vb
477: PetscInitialize(int *argc,char ***args,0,0);
478: [stage 0 of code]
479: PetscLogStagePush(1);
480: [stage 1 of code]
481: PetscLogStagePop();
482: PetscBarrier(...);
483: [more stage 0 of code]
484: PetscFinalize();
485: .ve
486:
487: Notes:
488: Use PetscLogStageRegister() to register a stage.
490: Level: intermediate
492: .keywords: log, push, stage
493: .seealso: PetscLogStagePop(), PetscLogStageRegister(), PetscBarrier()
494: @*/
495: PetscErrorCode PetscLogStagePush(PetscLogStage stage)
496: {
497: StageLog stageLog;
501: PetscLogGetStageLog(&stageLog);
502: StageLogPush(stageLog, stage);
503: return(0);
504: }
508: /*@C
509: PetscLogStagePop - This function pops a stage from the stack.
511: Not Collective
513: Usage:
514: If the option -log_sumary is used to run the program containing the
515: following code, then 2 sets of summary data will be printed during
516: PetscFinalize().
517: .vb
518: PetscInitialize(int *argc,char ***args,0,0);
519: [stage 0 of code]
520: PetscLogStagePush(1);
521: [stage 1 of code]
522: PetscLogStagePop();
523: PetscBarrier(...);
524: [more stage 0 of code]
525: PetscFinalize();
526: .ve
528: Notes:
529: Use PetscLogStageRegister() to register a stage.
531: Level: intermediate
533: .keywords: log, pop, stage
534: .seealso: PetscLogStagePush(), PetscLogStageRegister(), PetscBarrier()
535: @*/
536: PetscErrorCode PetscLogStagePop(void)
537: {
538: StageLog stageLog;
542: PetscLogGetStageLog(&stageLog);
543: StageLogPop(stageLog);
544: return(0);
545: }
549: /*@
550: PetscLogStageSetActive - Determines stage activity for PetscLogEventBegin() and PetscLogEventEnd().
552: Not Collective
554: Input Parameters:
555: + stage - The stage
556: - isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)
558: Level: intermediate
560: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PreLoadBegin(), PreLoadEnd(), PreLoadStage()
561: @*/
562: PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscTruth isActive)
563: {
564: StageLog stageLog;
568: PetscLogGetStageLog(&stageLog);
569: StageLogSetActive(stageLog, stage, isActive);
570: return(0);
571: }
575: /*@
576: PetscLogStageGetActive - Returns stage activity for PetscLogEventBegin() and PetscLogEventEnd().
578: Not Collective
580: Input Parameter:
581: . stage - The stage
583: Output Parameter:
584: . isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)
586: Level: intermediate
588: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PreLoadBegin(), PreLoadEnd(), PreLoadStage()
589: @*/
590: PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscTruth *isActive)
591: {
592: StageLog stageLog;
596: PetscLogGetStageLog(&stageLog);
597: StageLogGetActive(stageLog, stage, isActive);
598: return(0);
599: }
603: /*@
604: PetscLogStageSetVisible - Determines stage visibility in PetscLogPrintSummary()
606: Not Collective
608: Input Parameters:
609: + stage - The stage
610: - isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)
612: Level: intermediate
614: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogPrintSummary()
615: @*/
616: PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscTruth isVisible)
617: {
618: StageLog stageLog;
622: PetscLogGetStageLog(&stageLog);
623: StageLogSetVisible(stageLog, stage, isVisible);
624: return(0);
625: }
629: /*@
630: PetscLogStageGetVisible - Returns stage visibility in PetscLogPrintSummary()
632: Not Collective
634: Input Parameter:
635: . stage - The stage
637: Output Parameter:
638: . isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)
640: Level: intermediate
642: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogPrintSummary()
643: @*/
644: PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscTruth *isVisible)
645: {
646: StageLog stageLog;
650: PetscLogGetStageLog(&stageLog);
651: StageLogGetVisible(stageLog, stage, isVisible);
652: return(0);
653: }
657: /*@C
658: PetscLogStageGetId - Returns the stage id when given the stage name.
660: Not Collective
662: Input Parameter:
663: . name - The stage name
665: Output Parameter:
666: . stage - The stage
668: Level: intermediate
670: .seealso: PetscLogStagePush(), PetscLogStagePop(), PreLoadBegin(), PreLoadEnd(), PreLoadStage()
671: @*/
672: PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage)
673: {
674: StageLog stageLog;
678: PetscLogGetStageLog(&stageLog);
679: StageLogGetStage(stageLog, name, stage);
680: return(0);
681: }
683: /*------------------------------------------------ Event Functions --------------------------------------------------*/
686: /*@C
687: PetscLogEventRegister - Registers an event name for logging operations in an application code.
689: Not Collective
691: Input Parameter:
692: + name - The name associated with the event
693: - cookie - The cookie associated to the class for this event, obtain either with
694: PetscCookieRegister() or use a predefined one such as KSP_COOKIE, SNES_COOKIE
695:
696: Output Parameter:
697: . event - The event id for use with PetscLogEventBegin() and PetscLogEventEnd().
699: Example of Usage:
700: .vb
701: PetscLogEvent USER_EVENT;
702: PetscCookie cookie;
703: PetscLogDouble user_event_flops;
704: PetscCookieRegister("class name",&cookie);
705: PetscLogEventRegister("User event name",cookie,&USER_EVENT);
706: PetscLogEventBegin(USER_EVENT,0,0,0,0);
707: [code segment to monitor]
708: PetscLogFlops(user_event_flops);
709: PetscLogEventEnd(USER_EVENT,0,0,0,0);
710: .ve
712: Notes:
713: PETSc automatically logs library events if the code has been
714: compiled with -DPETSC_USE_LOG (which is the default) and -log,
715: -log_summary, or -log_all are specified. PetscLogEventRegister() is
716: intended for logging user events to supplement this PETSc
717: information.
719: PETSc can gather data for use with the utilities Upshot/Nupshot
720: (part of the MPICH distribution). If PETSc has been compiled
721: with flag -DPETSC_HAVE_MPE (MPE is an additional utility within
722: MPICH), the user can employ another command line option, -log_mpe,
723: to create a logfile, "mpe.log", which can be visualized
724: Upshot/Nupshot.
726: The cookie is associated with each event so that classes of events
727: can be disabled simultaneously, such as all matrix events. The user
728: can either use an existing cookie, such as MAT_COOKIE, or create
729: their own as shown in the example.
731: Level: intermediate
733: .keywords: log, event, register
734: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogFlops(),
735: PetscLogEventMPEActivate(), PetscLogEventMPEDeactivate(),
736: PetscLogEventActivate(), PetscLogEventDeactivate(), PetscCookieRegister()
737: @*/
738: PetscErrorCode PetscLogEventRegister(const char name[],PetscCookie cookie,PetscLogEvent *event)
739: {
740: StageLog stageLog;
741: int stage;
745: *event = PETSC_DECIDE;
746: PetscLogGetStageLog(&stageLog);
747: EventRegLogRegister(stageLog->eventLog, name, cookie, event);
748: for(stage = 0; stage < stageLog->numStages; stage++) {
749: EventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents);
750: ClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
751: }
752: return(0);
753: }
757: /*@
758: PetscLogEventActivate - Indicates that a particular event should be logged.
760: Not Collective
762: Input Parameter:
763: . event - The event id
765: Usage:
766: .vb
767: PetscLogEventDeactivate(VEC_SetValues);
768: [code where you do not want to log VecSetValues()]
769: PetscLogEventActivate(VEC_SetValues);
770: [code where you do want to log VecSetValues()]
771: .ve
773: Note:
774: The event may be either a pre-defined PETSc event (found in include/petsclog.h)
775: or an event number obtained with PetscLogEventRegister().
777: Level: advanced
779: .keywords: log, event, activate
780: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventDeactivate()
781: @*/
782: PetscErrorCode PetscLogEventActivate(PetscLogEvent event)
783: {
784: StageLog stageLog;
785: int stage;
789: PetscLogGetStageLog(&stageLog);
790: StageLogGetCurrent(stageLog, &stage);
791: EventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
792: return(0);
793: }
797: /*@
798: PetscLogEventDeactivate - Indicates that a particular event should not be logged.
800: Not Collective
802: Input Parameter:
803: . event - The event id
805: Usage:
806: .vb
807: PetscLogEventDeactivate(VEC_SetValues);
808: [code where you do not want to log VecSetValues()]
809: PetscLogEventActivate(VEC_SetValues);
810: [code where you do want to log VecSetValues()]
811: .ve
813: Note:
814: The event may be either a pre-defined PETSc event (found in
815: include/petsclog.h) or an event number obtained with PetscLogEventRegister()).
817: Level: advanced
819: .keywords: log, event, deactivate
820: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventActivate()
821: @*/
822: PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event)
823: {
824: StageLog stageLog;
825: int stage;
829: PetscLogGetStageLog(&stageLog);
830: StageLogGetCurrent(stageLog, &stage);
831: EventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
832: return(0);
833: }
837: /*@
838: PetscLogEventSetActiveAll - Sets the event activity in every stage.
840: Not Collective
842: Input Parameters:
843: + event - The event id
844: - isActive - The activity flag determining whether the event is logged
846: Level: advanced
848: .keywords: log, event, activate
849: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventActivate(),PlogEventDeactivate()
850: @*/
851: PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscTruth isActive)
852: {
853: StageLog stageLog;
854: int stage;
858: PetscLogGetStageLog(&stageLog);
859: for(stage = 0; stage < stageLog->numStages; stage++) {
860: if (isActive) {
861: EventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
862: } else {
863: EventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
864: }
865: }
866: return(0);
867: }
871: /*@
872: PetscLogEventActivateClass - Activates event logging for a PETSc object class.
874: Not Collective
876: Input Parameter:
877: . cookie - The event class, for example MAT_COOKIE, SNES_COOKIE, etc.
879: Level: developer
881: .keywords: log, event, activate, class
882: .seealso: PetscInfoActivate(),PetscInfo(),PetscInfoAllow(),PetscLogEventDeactivateClass(), PetscLogEventActivate(),PetscLogEventDeactivate()
883: @*/
884: PetscErrorCode PetscLogEventActivateClass(PetscCookie cookie)
885: {
886: StageLog stageLog;
887: int stage;
891: PetscLogGetStageLog(&stageLog);
892: StageLogGetCurrent(stageLog, &stage);
893: EventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, cookie);
894: return(0);
895: }
899: /*@
900: PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class.
902: Not Collective
904: Input Parameter:
905: . cookie - The event class, for example MAT_COOKIE, SNES_COOKIE, etc.
907: Level: developer
909: .keywords: log, event, deactivate, class
910: .seealso: PetscInfoActivate(),PetscInfo(),PetscInfoAllow(),PetscLogEventActivateClass(), PetscLogEventActivate(),PetscLogEventDeactivate()
911: @*/
912: PetscErrorCode PetscLogEventDeactivateClass(PetscCookie cookie)
913: {
914: StageLog stageLog;
915: int stage;
919: PetscLogGetStageLog(&stageLog);
920: StageLogGetCurrent(stageLog, &stage);
921: EventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, cookie);
922: return(0);
923: }
925: /*MC
926: PetscLogEventBegin - Logs the beginning of a user event.
928: Synopsis:
929: void PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,
930: PetscObject o4)
932: Not Collective
934: Input Parameters:
935: + e - integer associated with the event obtained from PetscLogEventRegister()
936: - o1,o2,o3,o4 - objects associated with the event, or 0
939: Fortran Synopsis:
940: void PetscLogEventBegin(int e,PetscErrorCode ierr)
942: Usage:
943: .vb
944: int USER_EVENT;
945: PetscLogDouble user_event_flops;
946: PetscLogEventRegister("User event",0,&USER_EVENT);
947: PetscLogEventBegin(USER_EVENT,0,0,0,0);
948: [code segment to monitor]
949: PetscLogFlops(user_event_flops);
950: PetscLogEventEnd(USER_EVENT,0,0,0,0);
951: .ve
953: Notes:
954: You need to register each integer event with the command
955: PetscLogEventRegister(). The source code must be compiled with
956: -DPETSC_USE_LOG, which is the default.
958: PETSc automatically logs library events if the code has been
959: compiled with -DPETSC_USE_LOG, and -log, -log_summary, or -log_all are
960: specified. PetscLogEventBegin() is intended for logging user events
961: to supplement this PETSc information.
963: Level: intermediate
965: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops()
967: .keywords: log, event, begin
968: M*/
970: /*MC
971: PetscLogEventEnd - Log the end of a user event.
973: Synopsis:
974: void PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,
975: PetscObject o4)
977: Not Collective
979: Input Parameters:
980: + e - integer associated with the event obtained with PetscLogEventRegister()
981: - o1,o2,o3,o4 - objects associated with the event, or 0
984: Fortran Synopsis:
985: void PetscLogEventEnd(int e,PetscErrorCode ierr)
987: Usage:
988: .vb
989: int USER_EVENT;
990: PetscLogDouble user_event_flops;
991: PetscLogEventRegister("User event",0,&USER_EVENT,);
992: PetscLogEventBegin(USER_EVENT,0,0,0,0);
993: [code segment to monitor]
994: PetscLogFlops(user_event_flops);
995: PetscLogEventEnd(USER_EVENT,0,0,0,0);
996: .ve
998: Notes:
999: You should also register each additional integer event with the command
1000: PetscLogEventRegister(). Source code must be compiled with
1001: -DPETSC_USE_LOG, which is the default.
1003: PETSc automatically logs library events if the code has been
1004: compiled with -DPETSC_USE_LOG, and -log, -log_summary, or -log_all are
1005: specified. PetscLogEventEnd() is intended for logging user events
1006: to supplement this PETSc information.
1008: Level: intermediate
1010: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogFlops()
1012: .keywords: log, event, end
1013: M*/
1015: /*MC
1016: PetscLogEventBarrierBegin - Logs the time in a barrier before an event.
1018: Synopsis:
1019: void PetscLogEventBarrierBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,
1020: PetscObject o4,MPI_Comm comm)
1022: Not Collective
1024: Input Parameters:
1025: . e - integer associated with the event obtained from PetscLogEventRegister()
1026: . o1,o2,o3,o4 - objects associated with the event, or 0
1027: . comm - communicator the barrier takes place over
1030: Usage:
1031: .vb
1032: PetscLogEventBarrierBegin(VEC_NormBarrier,0,0,0,0,comm);
1033: MPI_Allreduce()
1034: PetscLogEventBarrierEnd(VEC_NormBarrier,0,0,0,0,comm);
1035: .ve
1037: Notes:
1038: This is for logging the amount of time spent in a barrier for an event
1039: that requires synchronization.
1041: Additional Notes:
1042: Synchronization events always come in pairs; for example, VEC_NormBarrier and
1043: VEC_NormComm = VEC_NormBarrier + 1
1045: Level: advanced
1047: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops(), PetscLogEventBegin(),
1048: PetscLogEventBarrierEnd()
1050: .keywords: log, event, begin, barrier
1051: M*/
1053: /*MC
1054: PetscLogEventBarrierEnd - Logs the time in a barrier before an event.
1056: Synopsis:
1057: void PetscLogEventBarrierEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,
1058: PetscObject o4,MPI_Comm comm)
1060: Collective on MPI_Comm
1062: Input Parameters:
1063: . e - integer associated with the event obtained from PetscLogEventRegister()
1064: . o1,o2,o3,o4 - objects associated with the event, or 0
1065: . comm - communicator the barrier takes place over
1068: Usage:
1069: .vb
1070: PetscLogEventBarrierBegin(VEC_NormBarrier,0,0,0,0,comm);
1071: MPI_Allreduce()
1072: PetscLogEventBarrierEnd(VEC_NormBarrier,0,0,0,0,comm);
1073: .ve
1075: Notes:
1076: This is for logging the amount of time spent in a barrier for an event
1077: that requires synchronization.
1079: Additional Notes:
1080: Synchronization events always come in pairs; for example, VEC_NormBarrier and
1081: VEC_NormComm = VEC_NormBarrier + 1
1083: Level: advanced
1085: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops(), PetscLogEventBegin(),
1086: PetscLogEventBarrierBegin()
1088: .keywords: log, event, begin, barrier
1089: M*/
1093: /*@C
1094: PetscLogEventGetId - Returns the event id when given the event name.
1096: Not Collective
1098: Input Parameter:
1099: . name - The event name
1101: Output Parameter:
1102: . event - The event
1104: Level: intermediate
1106: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogStageGetId()
1107: @*/
1108: PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event)
1109: {
1110: StageLog stageLog;
1114: PetscLogGetStageLog(&stageLog);
1115: EventRegLogGetEvent(stageLog->eventLog, name, event);
1116: return(0);
1117: }
1120: /*------------------------------------------------ Output Functions -------------------------------------------------*/
1123: /*@C
1124: PetscLogDump - Dumps logs of objects to a file. This file is intended to
1125: be read by bin/petscview. This program no longer exists.
1127: Collective on PETSC_COMM_WORLD
1129: Input Parameter:
1130: . name - an optional file name
1132: Options Database Keys:
1133: + -log - Prints basic log information (for code compiled with PETSC_USE_LOG)
1134: - -log_all - Prints extensive log information (for code compiled with PETSC_USE_LOG)
1135:
1136: Usage:
1137: .vb
1138: PetscInitialize(...);
1139: PetscLogBegin(); or PetscLogAllBegin();
1140: ... code ...
1141: PetscLogDump(filename);
1142: PetscFinalize();
1143: .ve
1145: Notes:
1146: The default file name is
1147: $ Log.<rank>
1148: where <rank> is the processor number. If no name is specified,
1149: this file will be used.
1151: Level: advanced
1153: .keywords: log, dump
1154: .seealso: PetscLogBegin(), PetscLogAllBegin(), PetscLogPrintSummary()
1155: @*/
1156: PetscErrorCode PetscLogDump(const char sname[])
1157: {
1158: StageLog stageLog;
1159: EventPerfInfo *eventInfo;
1160: FILE *fd;
1161: char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
1162: PetscLogDouble flops, _TotalTime;
1163: PetscMPIInt rank;
1164: int action, object, curStage;
1165: PetscLogEvent event;
1167:
1169: /* Calculate the total elapsed time */
1170: PetscTime(_TotalTime);
1171: _TotalTime -= BaseTime;
1172: /* Open log file */
1173: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
1174: if (sname) {
1175: sprintf(file, "%s.%d", sname, rank);
1176: } else {
1177: sprintf(file, "Log.%d", rank);
1178: }
1179: PetscFixFilename(file, fname);
1180: PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd);
1181: if ((!rank) && (!fd)) SETERRQ1(PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname);
1182: /* Output totals */
1183: PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flops %14e %16.8e\n", _TotalFlops, _TotalTime);
1184: PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0);
1185: /* Output actions */
1186: if (logActions) {
1187: PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", numActions);
1188: for(action = 0; action < numActions; action++) {
1189: PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n",
1190: actions[action].time, actions[action].action, (int)actions[action].event, (int)actions[action].cookie, actions[action].id1,
1191: actions[action].id2, actions[action].id3, actions[action].flops, actions[action].mem, actions[action].maxmem);
1192: }
1193: }
1194: /* Output objects */
1195: if (logObjects) {
1196: PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", numObjects, numObjectsDestroyed);
1197: for(object = 0; object < numObjects; object++) {
1198: PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", objects[object].parent, (int) objects[object].mem);
1199: if (!objects[object].name[0]) {
1200: PetscFPrintf(PETSC_COMM_WORLD, fd,"No Name\n");
1201: } else {
1202: PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", objects[object].name);
1203: }
1204: if (objects[object].info[0] != 0) {
1205: PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n");
1206: } else {
1207: PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", objects[object].info);
1208: }
1209: }
1210: }
1211: /* Output events */
1212: PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n");
1213: PetscLogGetStageLog(&stageLog);
1214: StackTop(stageLog->stack, &curStage);
1215: eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo;
1216: for(event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) {
1217: if (eventInfo[event].time != 0.0) {
1218: flops = eventInfo[event].flops/eventInfo[event].time;
1219: } else {
1220: flops = 0.0;
1221: }
1222: PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count,
1223: eventInfo[event].flops, eventInfo[event].time, flops);
1224: }
1225: PetscFClose(PETSC_COMM_WORLD, fd);
1226: return(0);
1227: }
1231: /*@C
1232: PetscLogPrintSummary - Prints a summary of the logging.
1234: Collective over MPI_Comm
1236: Input Parameter:
1237: + comm - The MPI communicator (only one processor prints output)
1238: - file - [Optional] The output file name
1240: Options Database Keys:
1241: . -log_summary - Prints summary of log information (for code compiled with PETSC_USE_LOG)
1243: Usage:
1244: .vb
1245: PetscInitialize(...);
1246: PetscLogBegin();
1247: ... code ...
1248: PetscLogPrintSummary(MPI_Comm,filename);
1249: PetscFinalize(...);
1250: .ve
1252: Notes:
1253: By default the summary is printed to stdout.
1255: Level: beginner
1256:
1257: .keywords: log, dump, print
1258: .seealso: PetscLogBegin(), PetscLogDump()
1259: @*/
1260: PetscErrorCode PetscLogPrintSummary(MPI_Comm comm, const char filename[])
1261: {
1262: FILE *fd = PETSC_STDOUT;
1263: PetscLogDouble zero = 0.0;
1264: StageLog stageLog;
1265: StageInfo *stageInfo = PETSC_NULL;
1266: EventPerfInfo *eventInfo = PETSC_NULL;
1267: ClassPerfInfo *classInfo;
1268: char arch[10], hostname[64], username[16], pname[PETSC_MAX_PATH_LEN], date[64];
1269: const char *name;
1270: PetscLogDouble locTotalTime, TotalTime, TotalFlops;
1271: PetscLogDouble numMessages, messageLength, avgMessLen, numReductions;
1272: PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red;
1273: PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1274: PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1275: PetscLogDouble min, max, tot, ratio, avg, x, y;
1276: PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratCt, totm, totml, totr;
1277: PetscMPIInt minCt, maxCt;
1278: PetscMPIInt size, rank;
1279: PetscTruth *localStageUsed, *stageUsed;
1280: PetscTruth *localStageVisible, *stageVisible;
1281: int numStages, localNumEvents, numEvents;
1282: int stage, lastStage, oclass;
1283: PetscLogEvent event;
1285: char version[256];
1288: MPI_Comm_size(comm, &size);
1289: MPI_Comm_rank(comm, &rank);
1290: /* Pop off any stages the user forgot to remove */
1291: lastStage = 0;
1292: PetscLogGetStageLog(&stageLog);
1293: StageLogGetCurrent(stageLog, &stage);
1294: while (stage >= 0) {
1295: lastStage = stage;
1296: StageLogPop(stageLog);
1297: StageLogGetCurrent(stageLog, &stage);
1298: }
1299: /* Get the total elapsed time */
1300: PetscTime(locTotalTime); locTotalTime -= BaseTime;
1301: /* Open the summary file */
1302: if (filename) {
1303: PetscFOpen(comm, filename, "w", &fd);
1304: }
1306: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1307: PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n");
1308: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1309: PetscFPrintf(comm, fd, "\n---------------------------------------------- PETSc Performance Summary: ----------------------------------------------\n\n");
1310: PetscGetArchType(arch, 10);
1311: PetscGetHostName(hostname, 64);
1312: PetscGetUserName(username, 16);
1313: PetscGetProgramName(pname, PETSC_MAX_PATH_LEN);
1314: PetscGetDate(date, 64);
1315: PetscGetVersion(version,256);
1316: if (size == 1) {
1317: PetscFPrintf(comm,fd,"%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date);
1318: } else {
1319: PetscFPrintf(comm,fd,"%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date);
1320: }
1321: PetscFPrintf(comm, fd, "Using %s\n", version);
1323: /* Must preserve reduction count before we go on */
1324: red = allreduce_ct + gather_ct + scatter_ct;
1326: /* Calculate summary information */
1327: PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total \n");
1328: /* Time */
1329: MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1330: MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1331: MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1332: avg = (tot)/((PetscLogDouble) size);
1333: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1334: PetscFPrintf(comm, fd, "Time (sec): %5.3e %10.5f %5.3e\n", max, ratio, avg);
1335: TotalTime = tot;
1336: /* Objects */
1337: avg = (PetscLogDouble) numObjects;
1338: MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1339: MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1340: MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1341: avg = (tot)/((PetscLogDouble) size);
1342: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1343: PetscFPrintf(comm, fd, "Objects: %5.3e %10.5f %5.3e\n", max, ratio, avg);
1344: /* Flops */
1345: MPI_Allreduce(&_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1346: MPI_Allreduce(&_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1347: MPI_Allreduce(&_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1348: avg = (tot)/((PetscLogDouble) size);
1349: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1350: PetscFPrintf(comm, fd, "Flops: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1351: TotalFlops = tot;
1352: /* Flops/sec -- Must talk to Barry here */
1353: if (locTotalTime != 0.0) flops = _TotalFlops/locTotalTime; else flops = 0.0;
1354: MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1355: MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1356: MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1357: avg = (tot)/((PetscLogDouble) size);
1358: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1359: PetscFPrintf(comm, fd, "Flops/sec: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1360: /* Memory */
1361: PetscMallocGetMaximumUsage(&mem);
1362: if (mem > 0.0) {
1363: MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1364: MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1365: MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1366: avg = (tot)/((PetscLogDouble) size);
1367: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1368: PetscFPrintf(comm, fd, "Memory: %5.3e %10.5f %5.3e\n", max, ratio, tot);
1369: }
1370: /* Messages */
1371: mess = 0.5*(irecv_ct + isend_ct + recv_ct + send_ct);
1372: MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1373: MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1374: MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1375: avg = (tot)/((PetscLogDouble) size);
1376: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1377: PetscFPrintf(comm, fd, "MPI Messages: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1378: numMessages = tot;
1379: /* Message Lengths */
1380: mess = 0.5*(irecv_len + isend_len + recv_len + send_len);
1381: MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1382: MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1383: MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1384: if (numMessages != 0) avg = (tot)/(numMessages); else avg = 0.0;
1385: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1386: PetscFPrintf(comm, fd, "MPI Message Lengths: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1387: messageLength = tot;
1388: /* Reductions */
1389: MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1390: MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1391: MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1392: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1393: PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %10.5f\n", max, ratio);
1394: numReductions = red; /* wrong because uses count from process zero */
1395: PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n");
1396: PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n");
1397: PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n");
1399: /* Get total number of stages --
1400: Currently, a single processor can register more stages than another, but stages must all be registered in order.
1401: We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
1402: This seems best accomplished by assoicating a communicator with each stage.
1403: */
1404: MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1405: PetscMalloc(numStages * sizeof(PetscTruth), &localStageUsed);
1406: PetscMalloc(numStages * sizeof(PetscTruth), &stageUsed);
1407: PetscMalloc(numStages * sizeof(PetscTruth), &localStageVisible);
1408: PetscMalloc(numStages * sizeof(PetscTruth), &stageVisible);
1409: if (numStages > 0) {
1410: stageInfo = stageLog->stageInfo;
1411: for(stage = 0; stage < numStages; stage++) {
1412: if (stage < stageLog->numStages) {
1413: localStageUsed[stage] = stageInfo[stage].used;
1414: localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
1415: } else {
1416: localStageUsed[stage] = PETSC_FALSE;
1417: localStageVisible[stage] = PETSC_TRUE;
1418: }
1419: }
1420: MPI_Allreduce(localStageUsed, stageUsed, numStages, MPI_INT, MPI_LOR, comm);
1421: MPI_Allreduce(localStageVisible, stageVisible, numStages, MPI_INT, MPI_LAND, comm);
1422: for(stage = 0; stage < numStages; stage++) {
1423: if (stageUsed[stage]) {
1424: PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --\n");
1425: PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total counts %%Total Avg %%Total counts %%Total \n");
1426: break;
1427: }
1428: }
1429: for(stage = 0; stage < numStages; stage++) {
1430: if (!stageUsed[stage]) continue;
1431: if (localStageUsed[stage]) {
1432: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1433: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1434: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1435: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1436: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1437: name = stageInfo[stage].name;
1438: } else {
1439: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1440: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1441: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1442: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1443: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1444: name = "";
1445: }
1446: mess *= 0.5; messLen *= 0.5; red /= size;
1447: if (TotalTime != 0.0) fracTime = stageTime/TotalTime; else fracTime = 0.0;
1448: if (TotalFlops != 0.0) fracFlops = flops/TotalFlops; else fracFlops = 0.0;
1449: /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */
1450: if (numMessages != 0.0) fracMessages = mess/numMessages; else fracMessages = 0.0;
1451: if (numMessages != 0.0) avgMessLen = messLen/numMessages; else avgMessLen = 0.0;
1452: if (messageLength != 0.0) fracLength = messLen/messageLength; else fracLength = 0.0;
1453: if (numReductions != 0.0) fracReductions = red/numReductions; else fracReductions = 0.0;
1454: PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% \n",
1455: stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops,
1456: mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions);
1457: }
1458: }
1460: PetscFPrintf(comm, fd,
1461: "\n------------------------------------------------------------------------------------------------------------------------\n");
1462:
1463: PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n");
1464: PetscFPrintf(comm, fd, "Phase summary info:\n");
1465: PetscFPrintf(comm, fd, " Count: number of times phase was executed\n");
1466: PetscFPrintf(comm, fd, " Time and Flops: Max - maximum over all processors\n");
1467: PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n");
1468: PetscFPrintf(comm, fd, " Mess: number of messages sent\n");
1469: PetscFPrintf(comm, fd, " Avg. len: average message length\n");
1470: PetscFPrintf(comm, fd, " Reduct: number of global reductions\n");
1471: PetscFPrintf(comm, fd, " Global: entire computation\n");
1472: PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n");
1473: PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flops in this phase\n");
1474: PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n");
1475: PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n");
1476: PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)\n");
1477: PetscFPrintf(comm, fd,
1478: "------------------------------------------------------------------------------------------------------------------------\n");
1479:
1481: #if defined(PETSC_USE_DEBUG)
1482: PetscFPrintf(comm, fd, "\n\n");
1483: PetscFPrintf(comm, fd, " ##########################################################\n");
1484: PetscFPrintf(comm, fd, " # #\n");
1485: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1486: PetscFPrintf(comm, fd, " # #\n");
1487: PetscFPrintf(comm, fd, " # This code was compiled with a debugging option, #\n");
1488: PetscFPrintf(comm, fd, " # To get timing results run config/configure.py #\n");
1489: PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n");
1490: PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n");
1491: PetscFPrintf(comm, fd, " # #\n");
1492: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1493: #endif
1494: #if defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_FORTRAN_KERNELS)
1495: PetscFPrintf(comm, fd, "\n\n");
1496: PetscFPrintf(comm, fd, " ##########################################################\n");
1497: PetscFPrintf(comm, fd, " # #\n");
1498: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1499: PetscFPrintf(comm, fd, " # #\n");
1500: PetscFPrintf(comm, fd, " # The code for various complex numbers numerical #\n");
1501: PetscFPrintf(comm, fd, " # kernels uses C++, which generally is not well #\n");
1502: PetscFPrintf(comm, fd, " # optimized. For performance that is about 4-5 times #\n");
1503: PetscFPrintf(comm, fd, " # faster, specify --with-fortran-kernels=1 #\n");
1504: PetscFPrintf(comm, fd, " # when running config/configure.py. #\n");
1505: PetscFPrintf(comm, fd, " # #\n");
1506: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1507: #endif
1509: /* Report events */
1510: PetscFPrintf(comm, fd,
1511: "Event Count Time (sec) Flops --- Global --- --- Stage --- Total\n");
1512:
1513: PetscFPrintf(comm, fd,
1514: " Max Ratio Max Ratio Max Ratio Mess Avg len Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s\n");
1515:
1516: PetscFPrintf(comm,fd,
1517: "------------------------------------------------------------------------------------------------------------------------\n");
1519:
1520: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1521: for(stage = 0; stage < numStages; stage++) {
1522: if (!stageVisible[stage]) continue;
1523: if (localStageUsed[stage]) {
1524: PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1525: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1526: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1527: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1528: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1529: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1530: } else {
1531: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1532: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1533: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1534: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1535: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1536: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1537: }
1538: mess *= 0.5; messLen *= 0.5; red /= size;
1540: /* Get total number of events in this stage --
1541: Currently, a single processor can register more events than another, but events must all be registered in order,
1542: just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
1543: on the event ID. This seems best accomplished by assoicating a communicator with each stage.
1545: Problem: If the event did not happen on proc 1, its name will not be available.
1546: Problem: Event visibility is not implemented
1547: */
1548: if (localStageUsed[stage]) {
1549: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1550: localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1551: } else {
1552: localNumEvents = 0;
1553: }
1554: MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1555: for(event = 0; event < numEvents; event++) {
1556: if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
1557: if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) {
1558: flopr = eventInfo[event].flops;
1559: } else {
1560: flopr = 0.0;
1561: }
1562: MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1563: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1564: MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1565: MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1566: MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1567: MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1568: MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1569: MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1570: MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1571: MPI_Allreduce(&eventInfo[event].count, &minCt, 1, MPI_INT, MPI_MIN, comm);
1572: MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, comm);
1573: name = stageLog->eventLog->eventInfo[event].name;
1574: } else {
1575: flopr = 0.0;
1576: MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1577: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1578: MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1579: MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1580: MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1581: MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1582: MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1583: MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1584: MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1585: MPI_Allreduce(&ierr, &minCt, 1, MPI_INT, MPI_MIN, comm);
1586: MPI_Allreduce(&ierr, &maxCt, 1, MPI_INT, MPI_MAX, comm);
1587: name = "";
1588: }
1589: if (mint < 0.0) {
1590: PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n",mint,name);
1591: mint = 0;
1592: }
1593: if (minf < 0.0) SETERRQ2(PETSC_ERR_PLIB,"Minimum flops %g over all processors for %s is negative! Not possible!",minf,name);
1594: totm *= 0.5; totml *= 0.5; totr /= size;
1595:
1596: if (maxCt != 0) {
1597: if (minCt != 0) ratCt = ((PetscLogDouble) maxCt)/minCt; else ratCt = 0.0;
1598: if (mint != 0.0) ratt = maxt/mint; else ratt = 0.0;
1599: if (minf != 0.0) ratf = maxf/minf; else ratf = 0.0;
1600: if (TotalTime != 0.0) fracTime = tott/TotalTime; else fracTime = 0.0;
1601: if (TotalFlops != 0.0) fracFlops = totf/TotalFlops; else fracFlops = 0.0;
1602: if (stageTime != 0.0) fracStageTime = tott/stageTime; else fracStageTime = 0.0;
1603: if (flops != 0.0) fracStageFlops = totf/flops; else fracStageFlops = 0.0;
1604: if (numMessages != 0.0) fracMess = totm/numMessages; else fracMess = 0.0;
1605: if (messageLength != 0.0) fracMessLen = totml/messageLength; else fracMessLen = 0.0;
1606: if (numReductions != 0.0) fracRed = totr/numReductions; else fracRed = 0.0;
1607: if (mess != 0.0) fracStageMess = totm/mess; else fracStageMess = 0.0;
1608: if (messLen != 0.0) fracStageMessLen = totml/messLen; else fracStageMessLen = 0.0;
1609: if (red != 0.0) fracStageRed = totr/red; else fracStageRed = 0.0;
1610: if (totm != 0.0) totml /= totm; else totml = 0.0;
1611: if (maxt != 0.0) flopr = totf/maxt; else flopr = 0.0;
1612: PetscFPrintf(comm, fd,
1613: "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f\n",
1614: name, maxCt, ratCt, maxt, ratt, maxf, ratf, totm, totml, totr,
1615: 100.0*fracTime, 100.0*fracFlops, 100.0*fracMess, 100.0*fracMessLen, 100.0*fracRed,
1616: 100.0*fracStageTime, 100.0*fracStageFlops, 100.0*fracStageMess, 100.0*fracStageMessLen, 100.0*fracStageRed,
1617: flopr/1.0e6);
1618: }
1619: }
1620: }
1622: /* Memory usage and object creation */
1623: PetscFPrintf(comm, fd,
1624: "------------------------------------------------------------------------------------------------------------------------\n");
1625: PetscFPrintf(comm, fd, "\n");
1626: PetscFPrintf(comm, fd, "Memory usage is given in bytes:\n\n");
1628: /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1629: the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1630: stats for stages local to processor sets.
1631: */
1632: /* We should figure out the longest object name here (now 20 characters) */
1633: PetscFPrintf(comm, fd, "Object Type Creations Destructions Memory Descendants' Mem.\n");
1634: PetscFPrintf(comm, fd, "Reports information only for process 0.\n");
1635: for(stage = 0; stage < numStages; stage++) {
1636: if (localStageUsed[stage]) {
1637: classInfo = stageLog->stageInfo[stage].classLog->classInfo;
1638: PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1639: for(oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) {
1640: if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) {
1641: PetscFPrintf(comm, fd, "%20s %5d %5d %11.0f %g\n", stageLog->classLog->classInfo[oclass].name,
1642: classInfo[oclass].creations, classInfo[oclass].destructions, classInfo[oclass].mem,
1643: classInfo[oclass].descMem);
1644: }
1645: }
1646: } else {
1647: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1648: }
1649: }
1651: PetscFree(localStageUsed);
1652: PetscFree(stageUsed);
1653: PetscFree(localStageVisible);
1654: PetscFree(stageVisible);
1656: /* Information unrelated to this particular run */
1657: PetscFPrintf(comm, fd,
1658: "========================================================================================================================\n");
1659: PetscTime(y);
1660: PetscTime(x);
1661: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
1662: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
1663: PetscFPrintf(comm,fd,"Average time to get PetscTime(): %g\n", (y-x)/10.0);
1664: /* MPI information */
1665: if (size > 1) {
1666: MPI_Status status;
1667: PetscMPIInt tag;
1668: MPI_Comm newcomm;
1670: MPI_Barrier(comm);
1671: PetscTime(x);
1672: MPI_Barrier(comm);
1673: MPI_Barrier(comm);
1674: MPI_Barrier(comm);
1675: MPI_Barrier(comm);
1676: MPI_Barrier(comm);
1677: PetscTime(y);
1678: PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y-x)/5.0);
1679: PetscCommDuplicate(comm,&newcomm, &tag);
1680: MPI_Barrier(comm);
1681: if (rank) {
1682: MPI_Recv(0, 0, MPI_INT, rank-1, tag, newcomm, &status);
1683: MPI_Send(0, 0, MPI_INT, (rank+1)%size, tag, newcomm);
1684: } else {
1685: PetscTime(x);
1686: MPI_Send(0, 0, MPI_INT, 1, tag, newcomm);
1687: MPI_Recv(0, 0, MPI_INT, size-1, tag, newcomm, &status);
1688: PetscTime(y);
1689: PetscFPrintf(comm,fd,"Average time for zero size MPI_Send(): %g\n", (y-x)/size);
1690: }
1691: PetscCommDestroy(&newcomm);
1692: }
1693: if (!rank) {
1694: PetscOptionsPrint(fd);
1695: }
1696: /* Machine and compile information */
1697: #if defined(PETSC_USE_FORTRAN_KERNELS)
1698: PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n");
1699: #else
1700: PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n");
1701: #endif
1702: #if defined(PETSC_USE_SCALAR_SINGLE)
1703: PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n");
1704: #elif defined(PETSC_USE_LONGDOUBLE)
1705: PetscFPrintf(comm, fd, "Compiled with long double precision PetscScalar and PetscReal\n");
1706: #elif defined(PETSC_USE_SCALAR_INT)
1707: PetscFPrintf(comm, fd, "Compiled with int PetscScalar and PetscReal\n");
1708: #endif
1710: #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
1711: PetscFPrintf(comm, fd, "Compiled with single precision matrices\n");
1712: #else
1713: PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n");
1714: #endif
1715: PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d\n",
1716: (int) sizeof(short), (int) sizeof(int), (int) sizeof(long), (int) sizeof(void*),(int) sizeof(PetscScalar));
1718: PetscFPrintf(comm, fd, "Configure run at: %s\n",petscconfigureruntime);
1719: PetscFPrintf(comm, fd, "Configure options: %s",petscconfigureoptions);
1720: PetscFPrintf(comm, fd, "%s", petscmachineinfo);
1721: PetscFPrintf(comm, fd, "%s", petsccompilerinfo);
1722: PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo);
1723: PetscFPrintf(comm, fd, "%s", petsclinkerinfo);
1725: /* Cleanup */
1726: PetscFPrintf(comm, fd, "\n");
1727: PetscFClose(comm, fd);
1728: StageLogPush(stageLog, lastStage);
1729: return(0);
1730: }
1734: /*@C
1735: PetscLogPrintDetailed - Each process prints the times for its own events
1737: Collective over MPI_Comm
1739: Input Parameter:
1740: + comm - The MPI communicator (only one processor prints output)
1741: - file - [Optional] The output file name
1743: Options Database Keys:
1744: . -log_summary_detailed - Prints summary of log information (for code compiled with PETSC_USE_LOG)
1746: Usage:
1747: .vb
1748: PetscInitialize(...);
1749: PetscLogBegin();
1750: ... code ...
1751: PetscLogPrintDetailed(MPI_Comm,filename);
1752: PetscFinalize(...);
1753: .ve
1755: Notes:
1756: By default the summary is printed to stdout.
1758: Level: beginner
1759:
1760: .keywords: log, dump, print
1761: .seealso: PetscLogBegin(), PetscLogDump(), PetscLogPrintSummary()
1762: @*/
1763: PetscErrorCode PetscLogPrintDetailed(MPI_Comm comm, const char filename[])
1764: {
1765: FILE *fd = PETSC_STDOUT;
1766: StageLog stageLog;
1767: StageInfo *stageInfo = PETSC_NULL;
1768: EventPerfInfo *eventInfo = PETSC_NULL;
1769: const char *name = PETSC_NULL;
1770: PetscLogDouble TotalTime;
1771: PetscLogDouble stageTime, flops, flopr, mess, messLen, red;
1772: PetscLogDouble maxf, totf, maxt, tott, totm, totml, totr = 0.0;
1773: PetscMPIInt maxCt;
1774: PetscMPIInt size, rank;
1775: PetscTruth *stageUsed;
1776: PetscTruth *stageVisible;
1777: int numStages, numEvents;
1778: int stage;
1779: PetscLogEvent event;
1783: MPI_Comm_size(comm, &size);
1784: MPI_Comm_rank(comm, &rank);
1785: /* Pop off any stages the user forgot to remove */
1786: PetscLogGetStageLog(&stageLog);
1787: StageLogGetCurrent(stageLog, &stage);
1788: while (stage >= 0) {
1789: StageLogPop(stageLog);
1790: StageLogGetCurrent(stageLog, &stage);
1791: }
1792: /* Get the total elapsed time */
1793: PetscTime(TotalTime); TotalTime -= BaseTime;
1794: /* Open the summary file */
1795: if (filename) {
1796: PetscFOpen(comm, filename, "w", &fd);
1797: }
1799: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1800: PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n");
1801: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1804: numStages = stageLog->numStages;
1805: PetscMalloc(numStages * sizeof(PetscTruth), &stageUsed);
1806: PetscMalloc(numStages * sizeof(PetscTruth), &stageVisible);
1807: if (numStages > 0) {
1808: stageInfo = stageLog->stageInfo;
1809: for(stage = 0; stage < numStages; stage++) {
1810: if (stage < stageLog->numStages) {
1811: stageUsed[stage] = stageInfo[stage].used;
1812: stageVisible[stage] = stageInfo[stage].perfInfo.visible;
1813: } else {
1814: stageUsed[stage] = PETSC_FALSE;
1815: stageVisible[stage] = PETSC_TRUE;
1816: }
1817: }
1818: }
1820: /* Report events */
1821: PetscFPrintf(comm, fd,"Event Count Time (sec) Flops/sec \n");
1822: PetscFPrintf(comm, fd," Mess Avg len Reduct \n");
1823: PetscFPrintf(comm,fd,"-----------------------------------------------------------------------------------\n");
1824: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1825: for(stage = 0; stage < numStages; stage++) {
1826: if (!stageVisible[stage]) continue;
1827: if (stageUsed[stage]) {
1828: PetscSynchronizedFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1829: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1830: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1831: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1832: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1833: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1834: }
1835: mess *= 0.5; messLen *= 0.5;
1837: /* Get total number of events in this stage --
1838: */
1839: if (stageUsed[stage]) {
1840: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1841: numEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1842: } else {
1843: numEvents = 0;
1844: }
1845: for(event = 0; event < numEvents; event++) {
1846: if (stageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents)) {
1847: if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) {
1848: flopr = eventInfo[event].flops/eventInfo[event].time;
1849: } else {
1850: flopr = 0.0;
1851: }
1852: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PETSC_COMM_SELF);
1853: MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1854: MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PETSC_COMM_SELF);
1855: MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1856: MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1857: MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1858: totr = eventInfo[event].numReductions;
1859: MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, PETSC_COMM_SELF);
1860: name = stageLog->eventLog->eventInfo[event].name;
1861: totm *= 0.5; totml *= 0.5;
1862: }
1863:
1864: if (maxCt != 0) {
1865: if (totm != 0.0) totml /= totm; else totml = 0.0;
1866: PetscSynchronizedFPrintf(comm, fd,"%-16s %7d %5.4e %3.2e %2.1e %2.1e %2.1e\n",name, maxCt, maxt, maxf, totm, totml, totr);
1867: }
1868: }
1869: }
1870: PetscSynchronizedFlush(comm);
1872: PetscFree(stageUsed);
1873: PetscFree(stageVisible);
1875: PetscFClose(comm, fd);
1876: return(0);
1877: }
1879: /*----------------------------------------------- Counter Functions -------------------------------------------------*/
1882: /*@C
1883: PetscGetFlops - Returns the number of flops used on this processor
1884: since the program began.
1886: Not Collective
1888: Output Parameter:
1889: flops - number of floating point operations
1891: Notes:
1892: A global counter logs all PETSc flop counts. The user can use
1893: PetscLogFlops() to increment this counter to include flops for the
1894: application code.
1896: PETSc automatically logs library events if the code has been
1897: compiled with -DPETSC_USE_LOG (which is the default), and -log,
1898: -log_summary, or -log_all are specified. PetscLogFlops() is
1899: intended for logging user flops to supplement this PETSc
1900: information.
1902: Level: intermediate
1904: .keywords: log, flops, floating point operations
1906: .seealso: PetscGetTime(), PetscLogFlops()
1907: @*/
1908: PetscErrorCode PetscGetFlops(PetscLogDouble *flops)
1909: {
1911: *flops = _TotalFlops;
1912: return(0);
1913: }
1917: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
1918: {
1920: int fullLength;
1921: va_list Argp;
1924: if (!logObjects) return(0);
1925: va_start(Argp, format);
1926: PetscVSNPrintf(objects[obj->id].info, 64,format,&fullLength, Argp);
1927: va_end(Argp);
1928: return(0);
1929: }
1933: /*@
1934: PetscLogGetStageLog - This function returns the default stage logging object.
1936: Not collective
1938: Output Parameter:
1939: . stageLog - The default StageLog
1941: Level: beginner
1943: .keywords: log, stage
1944: .seealso: StageLogCreate()
1945: @*/
1946: PetscErrorCode PetscLogGetStageLog(StageLog *stageLog)
1947: {
1950: if (_stageLog == PETSC_NULL) {
1951: fprintf(stderr, "Logging has not been enabled.\nYou might have forgotten to call PetscInitialize().\n");
1952: MPI_Abort(MPI_COMM_WORLD, PETSC_ERR_SUP);
1953: }
1954: *stageLog = _stageLog;
1955: return(0);
1956: }
1958: /*MC
1959: PetscLogFlops - Adds floating point operations to the global counter.
1961: Synopsis:
1962: void PetscLogFlops(PetscLogDouble f)
1964: Not Collective
1966: Input Parameter:
1967: . f - flop counter
1970: Usage:
1971: .vb
1972: int USER_EVENT;
1973: PetscLogEventRegister("User event",0,&USER_EVENT);
1974: PetscLogEventBegin(USER_EVENT,0,0,0,0);
1975: [code segment to monitor]
1976: PetscLogFlops(user_flops)
1977: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1978: .ve
1980: Notes:
1981: A global counter logs all PETSc flop counts. The user can use
1982: PetscLogFlops() to increment this counter to include flops for the
1983: application code.
1985: PETSc automatically logs library events if the code has been
1986: compiled with -DPETSC_USE_LOG (which is the default), and -log,
1987: -log_summary, or -log_all are specified. PetscLogFlops() is
1988: intended for logging user flops to supplement this PETSc
1989: information.
1991: Level: intermediate
1993: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscGetFlops()
1995: .keywords: log, flops, floating point operations
1996: M*/
1998: /*MC
1999: PreLoadBegin - Begin a segment of code that may be preloaded (run twice)
2000: to get accurate timings
2002: Synopsis:
2003: void PreLoadBegin(PetscTruth flag,char *name);
2005: Not Collective
2007: Input Parameter:
2008: + flag - PETSC_TRUE to run twice, PETSC_FALSE to run once, may be overridden
2009: with command line option -preload true or -preload false
2010: - name - name of first stage (lines of code timed separately with -log_summary) to
2011: be preloaded
2013: Usage:
2014: .vb
2015: PreLoadBegin(PETSC_TRUE,"first stage);
2016: lines of code
2017: PreLoadStage("second stage");
2018: lines of code
2019: PreLoadEnd();
2020: .ve
2022: Notes: Only works in C/C++, not Fortran
2024: Flags available within the macro.
2025: + PetscPreLoadingUsed - true if we are or have done preloading
2026: . PetscPreLoadingOn - true if it is CURRENTLY doing preload
2027: . PreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second
2028: - PreLoadMax - number of times it will do the computation, only one when preloading is turned on
2029: The first two variables are available throughout the program, the second two only between the PreLoadBegin()
2030: and PreLoadEnd()
2032: Level: intermediate
2034: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PreLoadEnd(), PreLoadStage()
2036: Concepts: preloading
2037: Concepts: timing^accurate
2038: Concepts: paging^eliminating effects of
2041: M*/
2043: /*MC
2044: PreLoadEnd - End a segment of code that may be preloaded (run twice)
2045: to get accurate timings
2047: Synopsis:
2048: void PreLoadEnd(void);
2050: Not Collective
2052: Usage:
2053: .vb
2054: PreLoadBegin(PETSC_TRUE,"first stage);
2055: lines of code
2056: PreLoadStage("second stage");
2057: lines of code
2058: PreLoadEnd();
2059: .ve
2061: Notes: only works in C/C++ not fortran
2063: Level: intermediate
2065: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PreLoadBegin(), PreLoadStage()
2067: M*/
2069: /*MC
2070: PreLoadStage - Start a new segment of code to be timed separately.
2071: to get accurate timings
2073: Synopsis:
2074: void PreLoadStage(char *name);
2076: Not Collective
2078: Usage:
2079: .vb
2080: PreLoadBegin(PETSC_TRUE,"first stage);
2081: lines of code
2082: PreLoadStage("second stage");
2083: lines of code
2084: PreLoadEnd();
2085: .ve
2087: Notes: only works in C/C++ not fortran
2089: Level: intermediate
2091: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PreLoadBegin(), PreLoadEnd()
2093: M*/
2095: /*----------------------------------------------- Stack Functions ---------------------------------------------------*/
2098: /*@C
2099: StackDestroy - This function destroys a stack.
2101: Not Collective
2103: Input Parameter:
2104: . stack - The stack
2106: Level: beginner
2108: .keywords: log, stack, destroy
2109: .seealso: StackCreate(), StackEmpty(), StackPush(), StackPop(), StackTop()
2110: @*/
2111: PetscErrorCode StackDestroy(IntStack stack)
2112: {
2116: PetscFree(stack->stack);
2117: PetscFree(stack);
2118: return(0);
2119: }
2123: /*@C
2124: StackEmpty - This function determines whether any items have been pushed.
2126: Not Collective
2128: Input Parameter:
2129: . stack - The stack
2131: Output Parameter:
2132: . empty - PETSC_TRUE if the stack is empty
2134: Level: intermediate
2136: .keywords: log, stack, empty
2137: .seealso: StackCreate(), StackDestroy(), StackPush(), StackPop(), StackTop()
2138: @*/
2139: PetscErrorCode StackEmpty(IntStack stack, PetscTruth *empty)
2140: {
2143: if (stack->top == -1) {
2144: *empty = PETSC_TRUE;
2145: } else {
2146: *empty = PETSC_FALSE;
2147: }
2148: return(0);
2149: }
2153: /*@C
2154: StackTop - This function returns the top of the stack.
2156: Not Collective
2158: Input Parameter:
2159: . stack - The stack
2161: Output Parameter:
2162: . top - The integer on top of the stack
2164: Level: intermediate
2166: .keywords: log, stack, top
2167: .seealso: StackCreate(), StackDestroy(), StackEmpty(), StackPush(), StackPop()
2168: @*/
2169: PetscErrorCode StackTop(IntStack stack, int *top)
2170: {
2173: *top = stack->stack[stack->top];
2174: return(0);
2175: }
2179: /*@C
2180: StackPush - This function pushes an integer on the stack.
2182: Not Collective
2184: Input Parameters:
2185: + stack - The stack
2186: - item - The integer to push
2188: Level: intermediate
2190: .keywords: log, stack, push
2191: .seealso: StackCreate(), StackDestroy(), StackEmpty(), StackPop(), StackTop()
2192: @*/
2193: PetscErrorCode StackPush(IntStack stack, int item)
2194: {
2195: int *array;
2199: stack->top++;
2200: if (stack->top >= stack->max) {
2201: PetscMalloc(stack->max*2 * sizeof(int), &array);
2202: PetscMemcpy(array, stack->stack, stack->max * sizeof(int));
2203: PetscFree(stack->stack);
2204: stack->stack = array;
2205: stack->max *= 2;
2206: }
2207: stack->stack[stack->top] = item;
2208: return(0);
2209: }
2213: /*@C
2214: StackPop - This function pops an integer from the stack.
2216: Not Collective
2218: Input Parameter:
2219: . stack - The stack
2221: Output Parameter:
2222: . item - The integer popped
2224: Level: intermediate
2226: .keywords: log, stack, pop
2227: .seealso: StackCreate(), StackDestroy(), StackEmpty(), StackPush(), StackTop()
2228: @*/
2229: PetscErrorCode StackPop(IntStack stack, int *item)
2230: {
2233: if (stack->top == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Stack is empty");
2234: *item = stack->stack[stack->top--];
2235: return(0);
2236: }
2240: /*@C
2241: StackCreate - This function creates a stack.
2243: Not Collective
2245: Output Parameter:
2246: . stack - The stack
2248: Level: beginner
2250: .keywords: log, stack, pop
2251: .seealso: StackDestroy(), StackEmpty(), StackPush(), StackPop(), StackTop()
2252: @*/
2253: PetscErrorCode StackCreate(IntStack *stack)
2254: {
2255: IntStack s;
2260: PetscNew(struct _n_IntStack, &s);
2261: s->top = -1;
2262: s->max = 128;
2263: PetscMalloc(s->max * sizeof(int), &s->stack);
2264: PetscMemzero(s->stack, s->max * sizeof(int));
2265: *stack = s;
2266: return(0);
2267: }
2269: #else /* end of -DPETSC_USE_LOG section */
2273: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2274: {
2276: return(0);
2277: }
2279: #endif /* PETSC_USE_LOG*/
2282: PetscCookie PETSC_LARGEST_COOKIE = PETSC_SMALLEST_COOKIE;
2283: PetscCookie PETSC_OBJECT_COOKIE = 0;
2287: /*@C
2288: PetscCookieRegister - Registers a new class name for objects and logging operations in an application code.
2290: Not Collective
2292: Input Parameter:
2293: . name - The class name
2294:
2295: Output Parameter:
2296: . oclass - The class id or cookie
2298: Level: developer
2300: .keywords: log, class, register
2302: @*/
2303: PetscErrorCode PetscCookieRegister(const char name[],PetscCookie *oclass )
2304: {
2305: #if defined(PETSC_USE_LOG)
2306: StageLog stageLog;
2307: PetscInt stage;
2309: #endif
2312: *oclass = ++PETSC_LARGEST_COOKIE;
2313: #if defined(PETSC_USE_LOG)
2314: PetscLogGetStageLog(&stageLog);
2315: ClassRegLogRegister(stageLog->classLog, name, *oclass);
2316: for(stage = 0; stage < stageLog->numStages; stage++) {
2317: ClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
2318: }
2319: #endif
2320: return(0);
2321: }