"""FlameIQ deterministic comparison engine.
This is the most critical module in the codebase. It compares a current
:class:`~flameiq.schema.v1.models.PerformanceSnapshot` against a baseline
and produces a :class:`~flameiq.core.models.ComparisonResult`.
Determinism guarantee
---------------------
Given identical inputs this module **always** produces identical outputs.
- No randomness of any kind.
- No ``datetime.now()`` calls.
- No network I/O.
- Floating-point arithmetic is explicit and documented.
- All rounding uses Python's built-in ``round()`` with fixed precision.
Floating-point policy
---------------------
``change_percent`` is computed as::
((current - baseline) / baseline) * 100
rounded to **4 decimal places** for stable threshold comparisons.
Division by zero is guarded — if ``baseline == 0`` the metric is skipped
with a warning and a :class:`~flameiq.core.errors.ComparisonError`
is raised internally (caught and logged).
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from flameiq.core.errors import ComparisonError
from flameiq.core.models import ComparisonResult, MetricDiff, RegressionStatus
from flameiq.core.thresholds import (
DEFAULT_THRESHOLD_PERCENT,
build_threshold_map,
evaluate_threshold,
)
if TYPE_CHECKING:
from flameiq.schema.v1.models import PerformanceSnapshot
logger = logging.getLogger(__name__)
# Decimal places for change_percent — do not change without an RFC.
_CHANGE_PERCENT_PRECISION: int = 4
# A metric must be within this many percentage points of its threshold
# to trigger a WARNING (rather than PASS).
_WARNING_MARGIN_PERCENT: float = 5.0
[docs]
def compute_change_percent(baseline: float, current: float) -> float:
"""Compute the signed percentage change from *baseline* to *current*.
Formula::
((current - baseline) / baseline) * 100
Rounded to :data:`_CHANGE_PERCENT_PRECISION` decimal places.
Args:
baseline: Reference value. Must be non-zero.
current: Measured value.
Returns:
Signed percentage change, rounded to 4 d.p.
Positive means current is larger than baseline.
Raises:
:class:`~flameiq.core.errors.ComparisonError`: If ``baseline``
is exactly zero.
Examples::
compute_change_percent(100.0, 110.0) # → 10.0
compute_change_percent(100.0, 90.0) # → -10.0
compute_change_percent(100.0, 100.0) # → 0.0
"""
if baseline == 0.0:
raise ComparisonError(
f"Cannot compute percent change: baseline value is zero "
f"(current={current}). Metric will be skipped."
)
raw = ((current - baseline) / baseline) * 100.0
return round(raw, _CHANGE_PERCENT_PRECISION)
[docs]
def compare_snapshots(
baseline: PerformanceSnapshot,
current: PerformanceSnapshot,
threshold_config: dict[str, str | float] | None = None,
warning_margin_percent: float = _WARNING_MARGIN_PERCENT,
) -> ComparisonResult:
"""Compare *current* against *baseline* and return a full diff.
For every metric present in the baseline, the engine:
1. Computes ``change_percent`` via :func:`compute_change_percent`.
2. Looks up the configured threshold (or applies the default).
3. Calls :func:`~flameiq.core.thresholds.evaluate_threshold` to
determine pass / warning / regression.
Metrics present in *current* but absent from *baseline* are ignored —
they have no reference value and cannot regress.
Args:
baseline: The reference snapshot.
current: The snapshot under evaluation.
threshold_config: Raw threshold dict from ``flameiq.yaml``,
e.g. ``{"latency.p95": "10%"}``.
Falls back to defaults if ``None``.
warning_margin_percent: Distance from threshold that triggers a
WARNING instead of PASS.
Returns:
A :class:`~flameiq.core.models.ComparisonResult` with complete
per-metric diffs and an overall :class:`~flameiq.core.models.RegressionStatus`.
"""
thresholds = build_threshold_map(threshold_config or {})
baseline_flat = baseline.metrics.flat()
current_flat = current.metrics.flat()
diffs: list[MetricDiff] = []
any_regression = False
for metric_key in sorted(baseline_flat.keys()):
baseline_value = baseline_flat[metric_key]
current_value = current_flat.get(metric_key)
if current_value is None:
logger.warning(
"Metric '%s' in baseline but missing from current — skipped.",
metric_key,
)
continue
try:
change_pct = compute_change_percent(baseline_value, current_value)
except ComparisonError as exc:
logger.warning("Skipping metric '%s': %s", metric_key, exc)
continue
threshold = thresholds.get(metric_key, DEFAULT_THRESHOLD_PERCENT)
is_regression = evaluate_threshold(metric_key, change_pct, threshold)
# Warn if approaching threshold (but not yet breaching it)
is_warning = (
not is_regression
and abs(change_pct) >= abs(threshold) - warning_margin_percent
and abs(change_pct) > 0
)
diff = MetricDiff(
metric_key=metric_key,
baseline_value=baseline_value,
current_value=current_value,
change_percent=change_pct,
threshold_percent=threshold,
is_regression=is_regression,
is_warning=is_warning,
)
diffs.append(diff)
if is_regression:
any_regression = True
logger.info(
"REGRESSION: %s changed %+.2f%% (threshold: %.1f%%)",
metric_key,
change_pct,
threshold,
)
status = RegressionStatus.REGRESSION if any_regression else RegressionStatus.PASS
regression_keys = [d.metric_key for d in diffs if d.is_regression]
summary = (
f"{len(regression_keys)} regression(s) in: {', '.join(regression_keys)}"
if regression_keys
else f"All {len(diffs)} metric(s) within threshold."
)
return ComparisonResult(
status=status,
diffs=diffs,
baseline_commit=baseline.metadata.commit,
current_commit=current.metadata.commit,
statistical_mode=False,
summary=summary,
)