diff --git a/Doc/library/profiling.sampling.rst b/Doc/library/profiling.sampling.rst index d2b7d9669ab07e..f0d00621e5f8e0 100644 --- a/Doc/library/profiling.sampling.rst +++ b/Doc/library/profiling.sampling.rst @@ -1003,6 +1003,47 @@ at the top indicate functions that consume significant time either directly or through their callees. +Differential flame graphs +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Differential flame graphs compare two profiling runs to highlight where +performance changed. This helps identify regressions introduced by code changes +and validate that optimizations achieved their intended effect:: + + # Capture baseline profile + python -m profiling.sampling run --binary -o baseline.bin script.py + + # After modifying code, generate differential flamegraph + python -m profiling.sampling run --diff-flamegraph baseline.bin -o diff.html script.py + +The visualization draws the current profile with frame widths showing current +time consumption, then applies color to indicate how each function changed +relative to the baseline. + +**Color coding**: + +- **Red**: Functions consuming more time (regressions). Lighter shades indicate + modest increases, while darker shades show severe regressions. + +- **Blue**: Functions consuming less time (improvements). Lighter shades for + modest reductions, darker shades for significant speedups. + +- **Gray**: Minimal or no change. + +- **Purple**: New functions not present in the baseline. + +Frame colors indicate changes in **direct time** (time when the function was at +the top of the stack, actively executing), not cumulative time including callees. +Hovering over a frame shows comparison details including baseline time, current +time, and the percentage change. + +Some call paths may disappear entirely between profiles. These are called +**elided stacks** and occur when optimizations eliminate code paths or certain +branches stop executing. If elided stacks are present, an elided toggle appears +allowing you to switch between the main differential view and an elided-only +view that shows just the removed paths (colored deep red). + + Gecko format ------------ @@ -1488,6 +1529,10 @@ Output options Generate self-contained HTML flame graph. +.. option:: --diff-flamegraph + + Generate differential flamegraph comparing to a baseline binary profile. + .. option:: --gecko Generate Gecko JSON format for Firefox Profiler. diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css index 24e67bedee5242..0f0cbfd49167d6 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css @@ -5,6 +5,29 @@ This file extends the shared foundation with flamegraph-specific styles. ========================================================================== */ +/* -------------------------------------------------------------------------- + Differential Flamegraph + -------------------------------------------------------------------------- */ + +:root { + /* Regression colors */ + --diff-regression-deep: #d32f2f; + --diff-regression-medium: #e57373; + --diff-regression-light: #ef9a9a; + --diff-regression-verylight: #ffcdd2; + + /* Improvement colors */ + --diff-improvement-deep: #1976d2; + --diff-improvement-medium: #42a5f5; + --diff-improvement-light: #64b5f6; + --diff-improvement-verylight: #90caf9; + + /* Other differential colors */ + --diff-neutral: #bdbdbd; + --diff-new: #9575cd; + --diff-elided: #d32f2f; +} + /* -------------------------------------------------------------------------- Layout Overrides (Flamegraph-specific) -------------------------------------------------------------------------- */ @@ -277,7 +300,9 @@ body.resizing-sidebar { /* View Mode Section */ .view-mode-section .section-content { display: flex; - justify-content: center; + flex-direction: column; + gap: 10px; + align-items: center; } /* Collapsible sections */ @@ -815,6 +840,41 @@ body.resizing-sidebar { color: var(--accent); } +.tooltip-diff { + margin-top: 12px; + padding-top: 12px; + border-top: 1px solid var(--border); +} + +.tooltip-diff-title { + font-size: 11px; + font-weight: 600; + color: var(--accent); + margin-bottom: 8px; +} + +.tooltip-diff-row { + display: grid; + grid-template-columns: auto 1fr; + gap: 4px 14px; + font-size: 12px; + margin-bottom: 4px; +} + +.tooltip-diff-row.regression .tooltip-stat-value { + color: rgb(220, 60, 60); + font-weight: 700; +} + +.tooltip-diff-row.improvement .tooltip-stat-value { + color: rgb(60, 120, 220); + font-weight: 700; +} + +.tooltip-diff-row.neutral .tooltip-stat-value { + color: var(--text-secondary); +} + .tooltip-source { margin-top: 10px; padding-top: 10px; @@ -989,7 +1049,8 @@ body.resizing-sidebar { Flamegraph-Specific Toggle Override -------------------------------------------------------------------------- */ -#toggle-invert .toggle-track.on { +#toggle-invert .toggle-track.on, +#toggle-elided .toggle-track.on { background: #8e44ad; border-color: #8e44ad; box-shadow: 0 0 8px rgba(142, 68, 173, 0.3); diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js index a2b21da2970064..f55955d61c66de 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js @@ -40,39 +40,39 @@ function getOpcodeInfo(opcode) { // String Resolution // ============================================================================ -function resolveString(index) { +function resolveString(index, table = stringTable) { if (index === null || index === undefined) { return null; } - if (typeof index === 'number' && index >= 0 && index < stringTable.length) { - return stringTable[index]; + if (typeof index === 'number' && index >= 0 && index < table.length) { + return table[index]; } return String(index); } -function resolveStringIndices(node) { +function resolveStringIndices(node, table) { if (!node) return node; const resolved = { ...node }; if (typeof resolved.name === 'number') { - resolved.name = resolveString(resolved.name); + resolved.name = resolveString(resolved.name, table); } if (typeof resolved.filename === 'number') { - resolved.filename = resolveString(resolved.filename); + resolved.filename = resolveString(resolved.filename, table); } if (typeof resolved.funcname === 'number') { - resolved.funcname = resolveString(resolved.funcname); + resolved.funcname = resolveString(resolved.funcname, table); } if (Array.isArray(resolved.source)) { resolved.source = resolved.source.map(index => - typeof index === 'number' ? resolveString(index) : index + typeof index === 'number' ? resolveString(index, table) : index ); } if (Array.isArray(resolved.children)) { - resolved.children = resolved.children.map(child => resolveStringIndices(child)); + resolved.children = resolved.children.map(child => resolveStringIndices(child, table)); } return resolved; @@ -89,7 +89,7 @@ function toggleTheme() { if (window.flamegraphData && normalData) { const currentData = isInverted ? invertedData : normalData; const tooltip = createPythonTooltip(currentData); - const chart = createFlamegraph(tooltip, currentData.value); + const chart = createFlamegraph(tooltip, currentData.value, currentData); renderFlamegraph(chart, window.flamegraphData); } } @@ -342,6 +342,34 @@ function createPythonTooltip(data) { const fileLocationHTML = isSpecialFrame ? "" : `
${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
`; + // Differential stats section + let diffSection = ""; + if (d.data.diff !== undefined && d.data.baseline !== undefined) { + const baselineSelf = (d.data.baseline / 1000).toFixed(2); + const currentSelf = ((d.data.self_time || 0) / 1000).toFixed(2); + const diffSamples = d.data.diff; + const diffPct = d.data.diff_pct; + const sign = diffSamples >= 0 ? "+" : ""; + const diffClass = diffSamples > 0 ? "regression" : (diffSamples < 0 ? "improvement" : "neutral"); + + diffSection = ` +
+
Self-Time Comparison:
+
+ Baseline Self: + ${baselineSelf} ms +
+
+ Current Self: + ${currentSelf} ms +
+
+ Difference: + ${sign}${diffSamples.toFixed(1)} samples (${sign}${diffPct.toFixed(1)}%) +
+
`; + } + const tooltipHTML = `
${funcname}
@@ -364,6 +392,7 @@ function createPythonTooltip(data) { ${childCount} ` : ''}
+ ${diffSection} ${sourceSection} ${opcodeSection}
@@ -458,11 +487,47 @@ function getHeatColors() { return colors; } -function createFlamegraph(tooltip, rootValue) { +function getDiffColor(node) { + const style = getComputedStyle(document.documentElement); + + if (isShowingElided) { + return style.getPropertyValue('--diff-elided').trim(); + } + + const diff_pct = node.data.diff_pct || 0; + const diff_samples = node.data.diff || 0; + const self_time = node.data.self_time || 0; + + if (diff_pct === 100 && self_time > 0 && Math.abs(diff_samples - self_time) < 0.1) { + return style.getPropertyValue('--diff-new').trim(); + } + + // Neutral zone: small percentage change + if (Math.abs(diff_pct) < 15) { + return style.getPropertyValue('--diff-neutral').trim(); + } + + // Regression (red scale) + if (diff_pct > 0) { + if (diff_pct >= 100) return style.getPropertyValue('--diff-regression-deep').trim(); + if (diff_pct > 50) return style.getPropertyValue('--diff-regression-medium').trim(); + if (diff_pct > 30) return style.getPropertyValue('--diff-regression-light').trim(); + return style.getPropertyValue('--diff-regression-verylight').trim(); + } + + // Improvement (blue scale) + if (diff_pct < -50) return style.getPropertyValue('--diff-improvement-medium').trim(); + if (diff_pct < -30) return style.getPropertyValue('--diff-improvement-light').trim(); + return style.getPropertyValue('--diff-improvement-verylight').trim(); +} + +function createFlamegraph(tooltip, rootValue, data) { const chartArea = document.querySelector('.chart-area'); const width = chartArea ? chartArea.clientWidth - 32 : window.innerWidth - 320; const heatColors = getHeatColors(); + const isDifferential = data && data.stats && data.stats.is_differential; + let chart = flamegraph() .width(width) .cellHeight(20) @@ -471,9 +536,12 @@ function createFlamegraph(tooltip, rootValue) { .tooltip(tooltip) .inverted(true) .setColorMapper(function (d) { - // Root node should be transparent if (d.depth === 0) return 'transparent'; + if (isDifferential) { + return getDiffColor(d); + } + const percentage = d.data.value / rootValue; const level = getHeatLevel(percentage); return heatColors[level]; @@ -857,6 +925,54 @@ function populateProfileSummary(data) { } } +// ============================================================================ +// Elided Stacks (Differential) +// ============================================================================ + +let elidedFlamegraphData = null; +let invertedElidedData = null; +let isShowingElided = false; + +function setupElidedToggle(data) { + const stats = data.stats || {}; + const elidedCount = stats.elided_count || 0; + const elidedFlamegraph = stats.elided_flamegraph; + + if (!elidedCount || !elidedFlamegraph) { + return; + } + + elidedFlamegraphData = resolveStringIndices(elidedFlamegraph, elidedFlamegraph.baseline_strings); + + const toggleElided = document.getElementById('toggle-elided'); + if (toggleElided) { + toggleElided.style.display = 'flex'; + + toggleElided.onclick = function() { + isShowingElided = !isShowingElided; + updateToggleUI('toggle-elided', isShowingElided); + + let dataToRender; + if (isShowingElided) { + if (isInverted) { + if (!invertedElidedData) { + invertedElidedData = generateInvertedFlamegraph(elidedFlamegraphData); + } + dataToRender = invertedElidedData; + } else { + dataToRender = elidedFlamegraphData; + } + } else { + dataToRender = isInverted ? invertedData : normalData; + } + + const tooltip = createPythonTooltip(dataToRender); + const chart = createFlamegraph(tooltip, dataToRender.value, dataToRender); + renderFlamegraph(chart, dataToRender); + }; + } +} + // ============================================================================ // Hotspot Stats // ============================================================================ @@ -868,6 +984,9 @@ function populateStats(data) { // Populate thread statistics if available populateThreadStats(data); + // Setup elided stacks toggle if this is a differential flamegraph + setupElidedToggle(data); + // For hotspots: use normal (non-inverted) tree structure, but respect thread filtering. // In inverted view, the tree structure changes but the hottest functions remain the same. // However, if a thread filter is active, we need to show that thread's hotspots. @@ -1053,12 +1172,12 @@ function filterByThread() { if (filteredData.strings) { stringTable = filteredData.strings; - filteredData = resolveStringIndices(filteredData); + filteredData = resolveStringIndices(filteredData, filteredData.strings); } } const tooltip = createPythonTooltip(filteredData); - const chart = createFlamegraph(tooltip, filteredData.value); + const chart = createFlamegraph(tooltip, filteredData.value, filteredData); renderFlamegraph(chart, filteredData); populateThreadStats(baseData, selectedThreadId); @@ -1138,11 +1257,11 @@ function getInvertNodeKey(node) { return `${node.filename || '~'}|${node.lineno || 0}|${node.funcname || node.name}`; } -function accumulateInvertedNode(parent, stackFrame, leaf) { +function accumulateInvertedNode(parent, stackFrame, leaf, isDifferential) { const key = getInvertNodeKey(stackFrame); if (!parent.children[key]) { - parent.children[key] = { + const newNode = { name: stackFrame.name, value: 0, children: {}, @@ -1150,8 +1269,19 @@ function accumulateInvertedNode(parent, stackFrame, leaf) { lineno: stackFrame.lineno, funcname: stackFrame.funcname, source: stackFrame.source, + opcodes: stackFrame.opcodes, threads: new Set() }; + + if (isDifferential) { + newNode.baseline = 0; + newNode.baseline_total = 0; + newNode.self_time = 0; + newNode.diff = 0; + newNode.diff_pct = 0; + } + + parent.children[key] = newNode; } const node = parent.children[key]; @@ -1160,32 +1290,45 @@ function accumulateInvertedNode(parent, stackFrame, leaf) { leaf.threads.forEach(t => node.threads.add(t)); } + if (isDifferential) { + node.baseline += stackFrame.baseline || 0; + node.baseline_total += stackFrame.baseline_total || 0; + node.self_time += stackFrame.self_time || 0; + node.diff += stackFrame.diff || 0; + + if (node.baseline > 0) { + node.diff_pct = (node.diff / node.baseline) * 100.0; + } else if (node.self_time > 0) { + node.diff_pct = 100.0; + } + } + return node; } -function processLeaf(invertedRoot, path, leafNode) { +function processLeaf(invertedRoot, path, leafNode, isDifferential) { if (!path || path.length === 0) { return; } - let invertedParent = accumulateInvertedNode(invertedRoot, leafNode, leafNode); + let invertedParent = accumulateInvertedNode(invertedRoot, leafNode, leafNode, isDifferential); // Walk backwards through the call stack for (let i = path.length - 2; i >= 0; i--) { - invertedParent = accumulateInvertedNode(invertedParent, path[i], leafNode); + invertedParent = accumulateInvertedNode(invertedParent, path[i], leafNode, isDifferential); } } -function traverseInvert(path, currentNode, invertedRoot) { +function traverseInvert(path, currentNode, invertedRoot, isDifferential) { const children = currentNode.children || []; const childThreads = new Set(children.flatMap(c => c.threads || [])); const selfThreads = (currentNode.threads || []).filter(t => !childThreads.has(t)); if (selfThreads.length > 0) { - processLeaf(invertedRoot, path, { ...currentNode, threads: selfThreads }); + processLeaf(invertedRoot, path, { ...currentNode, threads: selfThreads }, isDifferential); } - children.forEach(child => traverseInvert(path.concat([child]), child, invertedRoot)); + children.forEach(child => traverseInvert(path.concat([child]), child, invertedRoot, isDifferential)); } function convertInvertDictToArray(node) { @@ -1203,6 +1346,8 @@ function convertInvertDictToArray(node) { } function generateInvertedFlamegraph(data) { + const isDifferential = data && data.stats && data.stats.is_differential; + const invertedRoot = { name: data.name, value: data.value, @@ -1214,9 +1359,9 @@ function generateInvertedFlamegraph(data) { const children = data.children || []; if (children.length === 0) { // Single-frame tree: the root is its own leaf - processLeaf(invertedRoot, [data], data); + processLeaf(invertedRoot, [data], data, isDifferential); } else { - children.forEach(child => traverseInvert([child], child, invertedRoot)); + children.forEach(child => traverseInvert([child], child, invertedRoot, isDifferential)); } convertInvertDictToArray(invertedRoot); @@ -1227,19 +1372,30 @@ function toggleInvert() { isInverted = !isInverted; updateToggleUI('toggle-invert', isInverted); - // Build inverted data on first use - if (isInverted && !invertedData) { - invertedData = generateInvertedFlamegraph(normalData); - } + let dataToRender; - let dataToRender = isInverted ? invertedData : normalData; + if (isShowingElided) { + if (isInverted) { + if (!invertedElidedData) { + invertedElidedData = generateInvertedFlamegraph(elidedFlamegraphData); + } + dataToRender = invertedElidedData; + } else { + dataToRender = elidedFlamegraphData; + } + } else { + if (isInverted && !invertedData) { + invertedData = generateInvertedFlamegraph(normalData); + } + dataToRender = isInverted ? invertedData : normalData; - if (currentThreadFilter !== 'all') { - dataToRender = filterDataByThread(dataToRender, parseInt(currentThreadFilter)); + if (currentThreadFilter !== 'all') { + dataToRender = filterDataByThread(dataToRender, parseInt(currentThreadFilter)); + } } const tooltip = createPythonTooltip(dataToRender); - const chart = createFlamegraph(tooltip, dataToRender.value); + const chart = createFlamegraph(tooltip, dataToRender.value, dataToRender); renderFlamegraph(chart, dataToRender); } @@ -1254,7 +1410,7 @@ function initFlamegraph() { if (EMBEDDED_DATA.strings) { stringTable = EMBEDDED_DATA.strings; - normalData = resolveStringIndices(EMBEDDED_DATA); + normalData = resolveStringIndices(EMBEDDED_DATA, EMBEDDED_DATA.strings); } else { normalData = EMBEDDED_DATA; } @@ -1267,8 +1423,20 @@ function initFlamegraph() { initThreadFilter(normalData); + // Toggle legend based on differential mode + const isDifferential = normalData && normalData.stats && normalData.stats.is_differential; + const heatmapLegend = document.getElementById('heatmap-legend-section'); + const diffLegend = document.getElementById('diff-legend-section'); + if (isDifferential) { + if (heatmapLegend) heatmapLegend.style.display = 'none'; + if (diffLegend) diffLegend.style.display = 'block'; + } else { + if (heatmapLegend) heatmapLegend.style.display = 'block'; + if (diffLegend) diffLegend.style.display = 'none'; + } + const tooltip = createPythonTooltip(normalData); - const chart = createFlamegraph(tooltip, normalData.value); + const chart = createFlamegraph(tooltip, normalData.value, normalData); renderFlamegraph(chart, normalData); initSearchHandlers(); initSidebarResize(); diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html b/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html index 07b15a5a2b48c7..66df0a759b3ba4 100644 --- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html @@ -3,7 +3,7 @@ - Tachyon Profiler - Flamegraph Report + {{TITLE}} @@ -18,7 +18,7 @@ Tachyon - Flamegraph Report + {{SUBTITLE}}
View Mode
+ + +
Flamegraph
@@ -294,9 +301,9 @@

Hotspots

- -
+ + +
diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index f4b31aad45b922..2597c378e5db46 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -16,7 +16,7 @@ from .errors import SamplingUnknownProcessError, SamplingModuleNotFoundError, SamplingScriptNotFoundError from .sample import sample, sample_live, _is_process_running from .pstats_collector import PstatsCollector -from .stack_collector import CollapsedStackCollector, FlamegraphCollector +from .stack_collector import CollapsedStackCollector, FlamegraphCollector, DiffFlamegraphCollector from .heatmap_collector import HeatmapCollector from .gecko_collector import GeckoCollector from .binary_collector import BinaryCollector @@ -56,6 +56,13 @@ class CustomFormatter( pass +class DiffFlamegraphAction(argparse.Action): + """Custom action for --diff-flamegraph that sets both format and baseline path.""" + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, 'format', 'diff_flamegraph') + setattr(namespace, 'diff_baseline', values) + + _HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data. Examples: @@ -85,6 +92,7 @@ class CustomFormatter( "pstats": "pstats", "collapsed": "txt", "flamegraph": "html", + "diff_flamegraph": "html", "gecko": "json", "heatmap": "html", "binary": "bin", @@ -94,6 +102,7 @@ class CustomFormatter( "pstats": PstatsCollector, "collapsed": CollapsedStackCollector, "flamegraph": FlamegraphCollector, + "diff_flamegraph": DiffFlamegraphCollector, "gecko": GeckoCollector, "heatmap": HeatmapCollector, "binary": BinaryCollector, @@ -467,6 +476,12 @@ def _add_format_options(parser, include_compression=True, include_binary=True): dest="format", help="Generate interactive HTML heatmap visualization with line-level sample counts", ) + format_group.add_argument( + "--diff-flamegraph", + metavar="BASELINE.bin", + action=DiffFlamegraphAction, + help="Generate differential flamegraph comparing current profile to BASELINE.bin binary file", + ) if include_binary: format_group.add_argument( "--binary", @@ -545,7 +560,7 @@ def _sort_to_mode(sort_choice): return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=False, - output_file=None, compression='auto'): + output_file=None, compression='auto', diff_baseline=None): """Create the appropriate collector based on format type. Args: @@ -556,6 +571,7 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals for creating interval markers in Firefox Profiler) output_file: Output file path (required for binary format) compression: Compression type for binary format ('auto', 'zstd', 'none') + diff_baseline: Path to baseline binary file for differential flamegraph Returns: A collector instance of the appropriate type @@ -564,6 +580,17 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals if collector_class is None: raise ValueError(f"Unknown format: {format_type}") + if format_type == "diff_flamegraph": + if diff_baseline is None: + raise ValueError("Differential flamegraph requires a baseline file") + if not os.path.exists(diff_baseline): + raise ValueError(f"Baseline file not found: {diff_baseline}") + return collector_class( + sample_interval_usec, + baseline_binary_path=diff_baseline, + skip_idle=skip_idle + ) + # Binary format requires output file and compression if format_type == "binary": if output_file is None: @@ -663,7 +690,7 @@ def _handle_output(collector, args, pid, mode): collector.export(filename) # Auto-open browser for HTML output if --browser flag is set - if args.format in ('flamegraph', 'heatmap') and getattr(args, 'browser', False): + if args.format in ('flamegraph', 'diff_flamegraph', 'heatmap') and getattr(args, 'browser', False): _open_in_browser(filename) @@ -756,7 +783,7 @@ def _validate_args(args, parser): ) # Validate --opcodes is only used with compatible formats - opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap", "binary") + opcodes_compatible_formats = ("live", "gecko", "flamegraph", "diff_flamegraph", "heatmap", "binary") if getattr(args, 'opcodes', False) and args.format not in opcodes_compatible_formats: parser.error( f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}." @@ -953,7 +980,8 @@ def _handle_attach(args): collector = _create_collector( args.format, args.sample_interval_usec, skip_idle, args.opcodes, output_file=output_file, - compression=getattr(args, 'compression', 'auto') + compression=getattr(args, 'compression', 'auto'), + diff_baseline=getattr(args, 'diff_baseline', None) ) with _get_child_monitor_context(args, args.pid): @@ -1031,7 +1059,8 @@ def _handle_run(args): collector = _create_collector( args.format, args.sample_interval_usec, skip_idle, args.opcodes, output_file=output_file, - compression=getattr(args, 'compression', 'auto') + compression=getattr(args, 'compression', 'auto'), + diff_baseline=getattr(args, 'diff_baseline', None) ) with _get_child_monitor_context(args, process.pid): @@ -1180,7 +1209,10 @@ def _handle_replay(args): print(f" Sample interval: {interval} us") print(f" Compression: {'zstd' if info.get('compression_type', 0) == 1 else 'none'}") - collector = _create_collector(args.format, interval, skip_idle=False) + collector = _create_collector( + args.format, interval, skip_idle=False, + diff_baseline=getattr(args, 'diff_baseline', None) + ) def progress_callback(current, total): if total > 0: @@ -1206,7 +1238,7 @@ def progress_callback(current, total): collector.export(filename) # Auto-open browser for HTML output if --browser flag is set - if args.format in ('flamegraph', 'heatmap') and getattr(args, 'browser', False): + if args.format in ('flamegraph', 'diff_flamegraph', 'heatmap') and getattr(args, 'browser', False): _open_in_browser(filename) print(f"Replayed {count} samples") diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index 931bc2c487b55b..cc1bee89683ff0 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -333,7 +333,7 @@ def process_frames(self, frames, thread_id, weight=1): node = current["children"].get(func) if node is None: - node = {"samples": 0, "children": {}, "threads": set(), "opcodes": collections.Counter()} + node = {"samples": 0, "children": {}, "threads": set(), "opcodes": collections.Counter(), "self": 0} current["children"][func] = node node["samples"] += weight node["threads"].add(thread_id) @@ -343,6 +343,9 @@ def process_frames(self, frames, thread_id, weight=1): current = node + if current is not self._root: + current["self"] += weight + def _get_source_lines(self, func): filename, lineno, _ = func @@ -381,6 +384,18 @@ def _create_flamegraph_html(self, data): component_js = (template_dir / "_flamegraph_assets" / "flamegraph.js").read_text(encoding="utf-8") js_content = f"{base_js}\n{component_js}" + # Set title and subtitle based on whether this is a differential flamegraph + is_differential = data.get("stats", {}).get("is_differential", False) + if is_differential: + title = "Tachyon Profiler - Differential Flamegraph Report" + subtitle = "Differential Flamegraph Report" + else: + title = "Tachyon Profiler - Flamegraph Report" + subtitle = "Flamegraph Report" + + html_template = html_template.replace("{{TITLE}}", title) + html_template = html_template.replace("{{SUBTITLE}}", subtitle) + # Inline first-party CSS/JS html_template = html_template.replace( "", f"" @@ -427,3 +442,233 @@ def _create_flamegraph_html(self, data): ) return html_content + + +class DiffFlamegraphCollector(FlamegraphCollector): + """Differential flamegraph collector that compares against a baseline binary profile.""" + + def __init__(self, sample_interval_usec, *, baseline_binary_path, skip_idle=False): + super().__init__(sample_interval_usec, skip_idle=skip_idle) + self.baseline_binary_path = baseline_binary_path + self._baseline_collector = None + self._elided_paths = set() + + + def _load_baseline(self): + """Load baseline profile from binary file.""" + from .binary_reader import BinaryReader + + with BinaryReader(self.baseline_binary_path) as reader: + info = reader.get_info() + + baseline_collector = FlamegraphCollector( + sample_interval_usec=info['sample_interval_us'], + skip_idle=self.skip_idle + ) + + reader.replay_samples(baseline_collector) + + self._baseline_collector = baseline_collector + + def _aggregate_path_samples(self, root_node, path=None): + """Aggregate samples by stack path, excluding line numbers for cross-profile matching.""" + if path is None: + path = () + + stats = {} + + for func, node in root_node["children"].items(): + func_key = (func[0], func[2]) + path_key = path + (func_key,) + + total_samples = node.get("samples", 0) + self_samples = node.get("self", 0) + + if path_key in stats: + stats[path_key]["total"] += total_samples + stats[path_key]["self"] += self_samples + else: + stats[path_key] = { + "total": total_samples, + "self": self_samples + } + + child_stats = self._aggregate_path_samples(node, path_key) + for key, data in child_stats.items(): + if key in stats: + stats[key]["total"] += data["total"] + stats[key]["self"] += data["self"] + else: + stats[key] = data + + return stats + + def _convert_to_flamegraph_format(self): + """Convert to flamegraph format with differential annotations.""" + if self._baseline_collector is None: + self._load_baseline() + + current_flamegraph = super()._convert_to_flamegraph_format() + if self._total_samples == 0: + return current_flamegraph + + current_stats = self._aggregate_path_samples(self._root) + baseline_stats = self._aggregate_path_samples(self._baseline_collector._root) + + # Scale baseline values to make them comparable when sample counts differ + scale = (self._total_samples / self._baseline_collector._total_samples + if self._baseline_collector._total_samples > 0 else 1.0) + + self._annotate_nodes_with_diff(current_flamegraph, current_stats, baseline_stats, scale) + self._add_elided_flamegraph(current_flamegraph, current_stats, baseline_stats, scale) + + return current_flamegraph + + def _annotate_nodes_with_diff(self, current_flamegraph, current_stats, baseline_stats, scale): + """Annotate each node in the tree with diff metadata.""" + if "stats" not in current_flamegraph: + current_flamegraph["stats"] = {} + + current_flamegraph["stats"]["baseline_samples"] = self._baseline_collector._total_samples + current_flamegraph["stats"]["current_samples"] = self._total_samples + current_flamegraph["stats"]["baseline_scale"] = scale + current_flamegraph["stats"]["is_differential"] = True + + if self._is_promoted_root(current_flamegraph): + self._add_diff_data_to_node(current_flamegraph, (), current_stats, baseline_stats, scale) + else: + for child in current_flamegraph["children"]: + self._add_diff_data_to_node(child, (), current_stats, baseline_stats, scale) + + def _add_diff_data_to_node(self, node, path, current_stats, baseline_stats, scale): + """Recursively add diff metadata to nodes.""" + func_key = self._extract_func_key(node, self._string_table) + path_key = path + (func_key,) if func_key else path + + current_data = current_stats.get(path_key, {"total": 0, "self": 0}) + baseline_data = baseline_stats.get(path_key, {"total": 0, "self": 0}) + + current_self = current_data["self"] + baseline_self = baseline_data["self"] * scale + baseline_total = baseline_data["total"] * scale + + diff = current_self - baseline_self + if baseline_self > 0: + diff_pct = (diff / baseline_self) * 100.0 + elif current_self > 0: + diff_pct = 100.0 + else: + diff_pct = 0.0 + + node["baseline"] = baseline_self + node["baseline_total"] = baseline_total + node["self_time"] = current_self + node["diff"] = diff + node["diff_pct"] = diff_pct + + if "children" in node and node["children"]: + for child in node["children"]: + self._add_diff_data_to_node(child, path_key, current_stats, baseline_stats, scale) + + def _is_promoted_root(self, data): + """Check if the data represents a promoted root node.""" + return "filename" in data and "funcname" in data + + def _add_elided_flamegraph(self, current_flamegraph, current_stats, baseline_stats, scale): + """Calculate elided paths and add elided flamegraph to stats.""" + self._elided_paths = set(baseline_stats.keys()) - set(current_stats.keys()) + + current_flamegraph["stats"]["elided_count"] = len(self._elided_paths) + + if self._elided_paths: + elided_flamegraph = self._build_elided_flamegraph(baseline_stats, scale) + if elided_flamegraph: + current_flamegraph["stats"]["elided_flamegraph"] = elided_flamegraph + + def _build_elided_flamegraph(self, baseline_stats, scale): + """Build flamegraph containing only elided paths from baseline.""" + if not self._baseline_collector or not self._elided_paths: + return None + + baseline_data = self._baseline_collector._convert_to_flamegraph_format() + + # Remove non-elided nodes and recalculate values + if not self._extract_elided_nodes(baseline_data, path=()): + return None + + self._add_elided_metadata(baseline_data, baseline_stats, scale, path=()) + + baseline_data["stats"].update(self.stats) + baseline_data["stats"]["is_differential"] = True + baseline_data["stats"]["baseline_samples"] = self._baseline_collector._total_samples + baseline_data["stats"]["current_samples"] = self._total_samples + baseline_data["baseline_strings"] = self._baseline_collector._string_table.get_strings() + + return baseline_data + + def _extract_elided_nodes(self, node, path): + """Remove non-elided nodes and recalculate values bottom-up.""" + if not node: + return False + + func_key = self._extract_func_key(node, self._baseline_collector._string_table) + current_path = path + (func_key,) if func_key else path + + is_elided = current_path in self._elided_paths if func_key else False + + if "children" in node: + # Filter children, keeping only those with elided descendants + elided_children = [] + total_value = 0 + for child in node["children"]: + if self._extract_elided_nodes(child, current_path): + elided_children.append(child) + total_value += child.get("value", 0) + node["children"] = elided_children + + # Recalculate value based on remaining children + if elided_children: + node["value"] = total_value + + # Keep this node if it's elided or has elided descendants + return is_elided or bool(node.get("children")) + + def _add_elided_metadata(self, node, baseline_stats, scale, path): + """Add differential metadata showing this path disappeared.""" + if not node: + return + + func_key = self._extract_func_key(node, self._baseline_collector._string_table) + current_path = path + (func_key,) if func_key else path + + if func_key and current_path in baseline_stats: + baseline_data = baseline_stats[current_path] + baseline_self = baseline_data["self"] * scale + baseline_total = baseline_data["total"] * scale + + node["baseline"] = baseline_self + node["baseline_total"] = baseline_total + node["diff"] = -baseline_self + else: + node["baseline"] = 0 + node["baseline_total"] = 0 + node["diff"] = 0 + + node["self_time"] = 0 + node["diff_pct"] = -100.0 + + if "children" in node and node["children"]: + for child in node["children"]: + self._add_elided_metadata(child, baseline_stats, scale, current_path) + + def _extract_func_key(self, node, string_table): + """Extract (filename, funcname) key from node, excluding line numbers. + + Line numbers are excluded to match functions even if they moved. + Returns None for root nodes that don't have function information. + """ + if "filename" not in node or "funcname" not in node: + return None + filename = string_table.get_string(node["filename"]) + funcname = string_table.get_string(node["funcname"]) + return (filename, funcname) diff --git a/Lib/test/test_profiling/test_sampling_profiler/mocks.py b/Lib/test/test_profiling/test_sampling_profiler/mocks.py index 4e0f7a87c6da54..921959d04e252e 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/mocks.py +++ b/Lib/test/test_profiling/test_sampling_profiler/mocks.py @@ -91,3 +91,20 @@ def __init__(self, thread_id, awaited_by): def __repr__(self): return f"MockAwaitedInfo(thread_id={self.thread_id}, awaited_by={len(self.awaited_by)} tasks)" + + +def make_diff_collector_with_mock_baseline(baseline_samples): + """Create a DiffFlamegraphCollector with baseline injected directly, + skipping the binary round-trip that _load_baseline normally does.""" + from profiling.sampling.stack_collector import ( + DiffFlamegraphCollector, + FlamegraphCollector, + ) + + baseline = FlamegraphCollector(1000) + for sample in baseline_samples: + baseline.collect(sample) + + diff = DiffFlamegraphCollector(1000, baseline_binary_path="baseline.bin") + diff._baseline_collector = baseline + return diff diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index 8e6afa91e89daf..c766d0bcba0238 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -36,7 +36,7 @@ from test.support import captured_stdout, captured_stderr -from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo, LocationInfo +from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo, LocationInfo, make_diff_collector_with_mock_baseline from .helpers import close_and_unlink @@ -1208,6 +1208,330 @@ def test_flamegraph_collector_per_thread_gc_percentage(self): self.assertEqual(collector.per_thread_stats[2]["total"], 6) self.assertAlmostEqual(per_thread_stats[2]["gc_pct"], 10.0, places=1) + def test_diff_flamegraph_identical_profiles(self): + """When baseline and current are identical, diff should be ~0.""" + test_frames = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ]) + ]) + ] + + diff = make_diff_collector_with_mock_baseline([test_frames] * 3) + for _ in range(3): + diff.collect(test_frames) + + data = diff._convert_to_flamegraph_format() + strings = data.get("strings", []) + + self.assertTrue(data["stats"]["is_differential"]) + self.assertEqual(data["stats"]["baseline_samples"], 3) + self.assertEqual(data["stats"]["current_samples"], 3) + self.assertAlmostEqual(data["stats"]["baseline_scale"], 1.0) + + children = data.get("children", []) + self.assertEqual(len(children), 1) + child = children[0] + child_name_index = child.get("name", 0) + child_name = ( + strings[child_name_index] + if isinstance(child_name_index, int) + and 0 <= child_name_index < len(strings) + else str(child_name_index) + ) + self.assertIn("func1", child_name) + self.assertAlmostEqual(child["diff"], 0.0, places=1) + self.assertAlmostEqual(child["diff_pct"], 0.0, places=1) + + self.assertEqual(data["stats"]["elided_count"], 0) + self.assertNotIn("elided_flamegraph", data["stats"]) + + def test_diff_flamegraph_new_function(self): + """A function only in current should have diff_pct=100 and baseline=0.""" + baseline_frames = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ]) + ]) + ] + + diff = make_diff_collector_with_mock_baseline([baseline_frames]) + diff.collect([ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 30, "new_func"), + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ]) + ]) + ]) + + data = diff._convert_to_flamegraph_format() + strings = data.get("strings", []) + + children = data.get("children", []) + self.assertEqual(len(children), 1) + func1_node = children[0] + func1_name_index = func1_node.get("name", 0) + func1_name = ( + strings[func1_name_index] + if isinstance(func1_name_index, int) + and 0 <= func1_name_index < len(strings) + else str(func1_name_index) + ) + self.assertIn("func1", func1_name) + + func1_children = func1_node.get("children", []) + self.assertEqual(len(func1_children), 1) + new_func_node = func1_children[0] + new_func_name_index = new_func_node.get("name", 0) + new_func_name = ( + strings[new_func_name_index] + if isinstance(new_func_name_index, int) + and 0 <= new_func_name_index < len(strings) + else str(new_func_name_index) + ) + self.assertIn("new_func", new_func_name) + self.assertEqual(new_func_node["baseline"], 0) + self.assertGreater(new_func_node["self_time"], 0) + self.assertAlmostEqual(new_func_node["diff_pct"], 100.0) + + def test_diff_flamegraph_scale_factor(self): + """Scale factor adjusts when sample counts differ.""" + baseline_frames = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ]) + ]) + ] + + diff = make_diff_collector_with_mock_baseline([baseline_frames]) + for _ in range(4): + diff.collect(baseline_frames) + + data = diff._convert_to_flamegraph_format() + self.assertAlmostEqual(data["stats"]["baseline_scale"], 4.0) + + def test_diff_flamegraph_elided_stacks(self): + """Paths in baseline but not current produce elided stacks.""" + baseline_frames_1 = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ]) + ]) + ] + baseline_frames_2 = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 30, "old_func"), + MockFrameInfo("file.py", 20, "func2"), + ]) + ]) + ] + + diff = make_diff_collector_with_mock_baseline([baseline_frames_1, baseline_frames_2]) + for _ in range(2): + diff.collect(baseline_frames_1) + + data = diff._convert_to_flamegraph_format() + + self.assertGreater(data["stats"]["elided_count"], 0) + self.assertIn("elided_flamegraph", data["stats"]) + elided = data["stats"]["elided_flamegraph"] + self.assertTrue(elided["stats"]["is_differential"]) + self.assertIn("baseline_strings", elided) + + elided_strings = elided.get("strings", []) + children = elided.get("children", []) + self.assertEqual(len(children), 1) + child = children[0] + child_name_index = child.get("name", 0) + child_name = ( + elided_strings[child_name_index] + if isinstance(child_name_index, int) + and 0 <= child_name_index < len(elided_strings) + else str(child_name_index) + ) + self.assertIn("old_func", child_name) + + def test_diff_flamegraph_function_matched_despite_line_change(self): + """Functions match by (filename, funcname), ignoring lineno.""" + baseline_frames = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ]) + ]) + ] + + diff = make_diff_collector_with_mock_baseline([baseline_frames]) + # Same functions but different line numbers + diff.collect([ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 99, "func1"), + MockFrameInfo("file.py", 55, "func2"), + ]) + ]) + ]) + + data = diff._convert_to_flamegraph_format() + strings = data.get("strings", []) + + # func1 is child of promoted root func2 + children = data.get("children", []) + self.assertEqual(len(children), 1) + child = children[0] + child_name_index = child.get("name", 0) + child_name = ( + strings[child_name_index] + if isinstance(child_name_index, int) + and 0 <= child_name_index < len(strings) + else str(child_name_index) + ) + self.assertIn("func1", child_name) + self.assertGreater(child["baseline"], 0) + self.assertGreater(child["self_time"], 0) + self.assertAlmostEqual(child["diff"], 0.0, places=1) + self.assertAlmostEqual(child["diff_pct"], 0.0, places=1) + + def test_diff_flamegraph_empty_current(self): + """Empty current profile returns early without crashing.""" + baseline_frames = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [MockFrameInfo("file.py", 10, "func1")]) + ]) + ] + + diff = make_diff_collector_with_mock_baseline([baseline_frames]) + # Don't collect anything in current + + data = diff._convert_to_flamegraph_format() + self.assertIn("name", data) + self.assertEqual(data["value"], 0) + + def test_diff_flamegraph_export(self): + """DiffFlamegraphCollector export produces differential HTML.""" + test_frames = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ]) + ]) + ] + + diff = make_diff_collector_with_mock_baseline([test_frames]) + diff.collect(test_frames) + + flamegraph_out = tempfile.NamedTemporaryFile( + suffix=".html", delete=False + ) + self.addCleanup(close_and_unlink, flamegraph_out) + + with captured_stdout(), captured_stderr(): + diff.export(flamegraph_out.name) + + self.assertTrue(os.path.exists(flamegraph_out.name)) + self.assertGreater(os.path.getsize(flamegraph_out.name), 0) + + with open(flamegraph_out.name, "r", encoding="utf-8") as f: + content = f.read() + + self.assertIn("", content.lower()) + self.assertIn("Differential Flamegraph", content) + self.assertIn('"is_differential": true', content) + self.assertIn("d3-flame-graph", content) + self.assertIn('id="diff-legend-section"', content) + self.assertIn("Differential Colors", content) + + def test_diff_flamegraph_preserves_metadata(self): + """Differential mode preserves threads and opcodes metadata.""" + test_frames = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [MockFrameInfo("a.py", 10, "func_a", opcode=100)]), + MockThreadInfo(2, [MockFrameInfo("b.py", 20, "func_b", opcode=200)]), + ]) + ] + + diff = make_diff_collector_with_mock_baseline([test_frames]) + diff.collect(test_frames) + + data = diff._convert_to_flamegraph_format() + strings = data.get("strings", []) + + self.assertTrue(data["stats"]["is_differential"]) + + self.assertIn("threads", data) + self.assertEqual(len(data["threads"]), 2) + + children = data.get("children", []) + self.assertEqual(len(children), 2) + + opcodes_found = set() + for child in children: + self.assertIn("diff", child) + self.assertIn("diff_pct", child) + self.assertIn("baseline", child) + self.assertIn("self_time", child) + self.assertIn("threads", child) + + if "opcodes" in child: + opcodes_found.update(child["opcodes"].keys()) + + self.assertIn(100, opcodes_found) + self.assertIn(200, opcodes_found) + + self.assertIn("per_thread_stats", data["stats"]) + per_thread_stats = data["stats"]["per_thread_stats"] + self.assertIn(1, per_thread_stats) + self.assertIn(2, per_thread_stats) + + def test_diff_flamegraph_elided_preserves_metadata(self): + """Elided flamegraph preserves thread_stats, per_thread_stats, and opcodes.""" + baseline_frames_1 = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 10, "func1", opcode=100), + MockFrameInfo("file.py", 20, "func2", opcode=101), + ], status=THREAD_STATUS_HAS_GIL) + ]) + ] + baseline_frames_2 = [ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [ + MockFrameInfo("file.py", 30, "old_func", opcode=200), + MockFrameInfo("file.py", 20, "func2", opcode=101), + ], status=THREAD_STATUS_HAS_GIL) + ]) + ] + + diff = make_diff_collector_with_mock_baseline([baseline_frames_1, baseline_frames_2]) + for _ in range(2): + diff.collect(baseline_frames_1) + + data = diff._convert_to_flamegraph_format() + elided = data["stats"]["elided_flamegraph"] + + self.assertIn("thread_stats", elided["stats"]) + self.assertIn("per_thread_stats", elided["stats"]) + + elided_strings = elided.get("strings", []) + children = elided.get("children", []) + self.assertEqual(len(children), 1) + old_func_node = children[0] + if "opcodes" in old_func_node: + self.assertIn(200, old_func_node["opcodes"]) + class TestRecursiveFunctionHandling(unittest.TestCase): """Tests for correct handling of recursive functions in cumulative stats.""" diff --git a/Misc/NEWS.d/next/Library/2026-03-10-19-50-59.gh-issue-138122.CsoBEo.rst b/Misc/NEWS.d/next/Library/2026-03-10-19-50-59.gh-issue-138122.CsoBEo.rst new file mode 100644 index 00000000000000..2059557e8bb92e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-10-19-50-59.gh-issue-138122.CsoBEo.rst @@ -0,0 +1,4 @@ +The ``profiling.sampling`` module now supports differential flamegraph +visualization via ``--diff-flamegraph`` to compare two profiling runs. +Functions are colored red (regressions), blue (improvements), gray (neutral), +or purple (new). Elided stacks show code paths that disappeared between runs.