Source code for flameiq.core.comparator

"""FlameIQ deterministic comparison engine.

This is the most critical module in the codebase. It compares a current
:class:`~flameiq.schema.v1.models.PerformanceSnapshot` against a baseline
and produces a :class:`~flameiq.core.models.ComparisonResult`.

Determinism guarantee
---------------------
Given identical inputs this module **always** produces identical outputs.

- No randomness of any kind.
- No ``datetime.now()`` calls.
- No network I/O.
- Floating-point arithmetic is explicit and documented.
- All rounding uses Python's built-in ``round()`` with fixed precision.

Floating-point policy
---------------------
``change_percent`` is computed as::

    ((current - baseline) / baseline) * 100

rounded to **4 decimal places** for stable threshold comparisons.
Division by zero is guarded — if ``baseline == 0`` the metric is skipped
with a warning and a :class:`~flameiq.core.errors.ComparisonError`
is raised internally (caught and logged).
"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING

from flameiq.core.errors import ComparisonError
from flameiq.core.models import ComparisonResult, MetricDiff, RegressionStatus
from flameiq.core.thresholds import (
    DEFAULT_THRESHOLD_PERCENT,
    build_threshold_map,
    evaluate_threshold,
)

if TYPE_CHECKING:
    from flameiq.schema.v1.models import PerformanceSnapshot

logger = logging.getLogger(__name__)

# Decimal places for change_percent — do not change without an RFC.
_CHANGE_PERCENT_PRECISION: int = 4

# A metric must be within this many percentage points of its threshold
# to trigger a WARNING (rather than PASS).
_WARNING_MARGIN_PERCENT: float = 5.0


[docs] def compute_change_percent(baseline: float, current: float) -> float: """Compute the signed percentage change from *baseline* to *current*. Formula:: ((current - baseline) / baseline) * 100 Rounded to :data:`_CHANGE_PERCENT_PRECISION` decimal places. Args: baseline: Reference value. Must be non-zero. current: Measured value. Returns: Signed percentage change, rounded to 4 d.p. Positive means current is larger than baseline. Raises: :class:`~flameiq.core.errors.ComparisonError`: If ``baseline`` is exactly zero. Examples:: compute_change_percent(100.0, 110.0) # → 10.0 compute_change_percent(100.0, 90.0) # → -10.0 compute_change_percent(100.0, 100.0) # → 0.0 """ if baseline == 0.0: raise ComparisonError( f"Cannot compute percent change: baseline value is zero " f"(current={current}). Metric will be skipped." ) raw = ((current - baseline) / baseline) * 100.0 return round(raw, _CHANGE_PERCENT_PRECISION)
[docs] def compare_snapshots( baseline: PerformanceSnapshot, current: PerformanceSnapshot, threshold_config: dict[str, str | float] | None = None, warning_margin_percent: float = _WARNING_MARGIN_PERCENT, ) -> ComparisonResult: """Compare *current* against *baseline* and return a full diff. For every metric present in the baseline, the engine: 1. Computes ``change_percent`` via :func:`compute_change_percent`. 2. Looks up the configured threshold (or applies the default). 3. Calls :func:`~flameiq.core.thresholds.evaluate_threshold` to determine pass / warning / regression. Metrics present in *current* but absent from *baseline* are ignored — they have no reference value and cannot regress. Args: baseline: The reference snapshot. current: The snapshot under evaluation. threshold_config: Raw threshold dict from ``flameiq.yaml``, e.g. ``{"latency.p95": "10%"}``. Falls back to defaults if ``None``. warning_margin_percent: Distance from threshold that triggers a WARNING instead of PASS. Returns: A :class:`~flameiq.core.models.ComparisonResult` with complete per-metric diffs and an overall :class:`~flameiq.core.models.RegressionStatus`. """ thresholds = build_threshold_map(threshold_config or {}) baseline_flat = baseline.metrics.flat() current_flat = current.metrics.flat() diffs: list[MetricDiff] = [] any_regression = False for metric_key in sorted(baseline_flat.keys()): baseline_value = baseline_flat[metric_key] current_value = current_flat.get(metric_key) if current_value is None: logger.warning( "Metric '%s' in baseline but missing from current — skipped.", metric_key, ) continue try: change_pct = compute_change_percent(baseline_value, current_value) except ComparisonError as exc: logger.warning("Skipping metric '%s': %s", metric_key, exc) continue threshold = thresholds.get(metric_key, DEFAULT_THRESHOLD_PERCENT) is_regression = evaluate_threshold(metric_key, change_pct, threshold) # Warn if approaching threshold (but not yet breaching it) is_warning = ( not is_regression and abs(change_pct) >= abs(threshold) - warning_margin_percent and abs(change_pct) > 0 ) diff = MetricDiff( metric_key=metric_key, baseline_value=baseline_value, current_value=current_value, change_percent=change_pct, threshold_percent=threshold, is_regression=is_regression, is_warning=is_warning, ) diffs.append(diff) if is_regression: any_regression = True logger.info( "REGRESSION: %s changed %+.2f%% (threshold: %.1f%%)", metric_key, change_pct, threshold, ) status = RegressionStatus.REGRESSION if any_regression else RegressionStatus.PASS regression_keys = [d.metric_key for d in diffs if d.is_regression] summary = ( f"{len(regression_keys)} regression(s) in: {', '.join(regression_keys)}" if regression_keys else f"All {len(diffs)} metric(s) within threshold." ) return ComparisonResult( status=status, diffs=diffs, baseline_commit=baseline.metadata.commit, current_commit=current.metadata.commit, statistical_mode=False, summary=summary, )