Source code for flameiq.providers.pytest_provider

"""FlameIQ pytest-benchmark provider.

Adapts output from `pytest-benchmark <https://pytest-benchmark.readthedocs.io/>`_
into a FlameIQ v1 :class:`~flameiq.schema.v1.models.PerformanceSnapshot`.

Generate compatible output with::

    pytest --benchmark-json=benchmark_output.json

Then run::

    flameiq run --metrics benchmark_output.json --provider pytest-benchmark

Mapping
-------

.. list-table::
   :header-rows: 1

   * - pytest-benchmark field
     - FlameIQ field
   * - ``stats.mean`` (s)
     - ``latency.mean`` (ms)
   * - ``stats.median`` (s)
     - ``latency.p50`` (ms)
   * - ``stats.ops``
     - ``throughput`` (ops/s)
   * - ``commit_info.id``
     - ``metadata.commit``
   * - ``commit_info.branch``
     - ``metadata.branch``

When multiple benchmarks are present in a single JSON file, each benchmark
is stored as a separate ``custom.<benchmark_name>`` metric (mean latency in ms),
and the first benchmark's latency percentiles are used for the primary signals.
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

from flameiq.core.errors import MetricsFileNotFoundError, ProviderError
from flameiq.providers.base import MetricProvider
from flameiq.schema.v1.models import (
    LatencyMetrics,
    Metrics,
    PerformanceSnapshot,
    SnapshotMetadata,
)



[docs]
class PytestBenchmarkProvider(MetricProvider):
    """Provider for pytest-benchmark JSON output files."""

    @property
    def name(self) -> str:
        """Return the unique provider name."""
        return "pytest-benchmark"


[docs]
    def collect(self, source: str) -> dict[str, Any]:
        """Collect raw metrics data from a pytest-benchmark JSON file."""
        path = Path(source)
        if not path.exists():
            raise MetricsFileNotFoundError(source)
        try:
            result: dict[str, Any] = json.loads(path.read_text(encoding="utf-8"))
        except json.JSONDecodeError as exc:
            raise ProviderError(f"Invalid JSON in '{source}': {exc}") from exc
        return result



[docs]
    def validate(self, raw: dict[str, Any]) -> bool:
        """Check if *raw* data has the expected pytest-benchmark structure."""
        return (
            isinstance(raw, dict)
            and "benchmarks" in raw
            and isinstance(raw["benchmarks"], list)
            and len(raw["benchmarks"]) > 0
        )



[docs]
    def normalize(self, raw: dict[str, Any]) -> PerformanceSnapshot:
        """Parse *raw* pytest-benchmark data into a ``PerformanceSnapshot``.

        Full type: :class:`~flameiq.schema.v1.models.PerformanceSnapshot`.
        """
        benchmarks: list[dict[str, Any]] = raw.get("benchmarks", [])
        if not benchmarks:
            raise ProviderError("pytest-benchmark output contains no benchmarks.")

        def _ms(val: float | None) -> float | None:
            """Convert seconds → milliseconds."""
            return val * 1000.0 if val is not None else None

        # Primary benchmark latency (first entry)
        primary_stats: dict[str, Any] = benchmarks[0].get("stats", {})
        latency = LatencyMetrics(
            mean=_ms(primary_stats.get("mean")),
            p50=_ms(primary_stats.get("median")),
        )

        # Sum ops/s across all benchmarks
        total_ops = sum(float(b.get("stats", {}).get("ops", 0.0)) for b in benchmarks)

        # Per-benchmark means as custom metrics (useful for multi-benchmark files)
        custom: dict[str, float] = {}
        for bench in benchmarks:
            bench_name: str = bench.get("name", "unknown")
            mean_s: float | None = bench.get("stats", {}).get("mean")
            if mean_s is not None:
                safe_name = bench_name.replace(".", "_").replace(" ", "_")
                custom[safe_name] = mean_s * 1000.0

        commit_info: dict[str, Any] = raw.get("commit_info") or {}
        metadata = SnapshotMetadata(
            commit=commit_info.get("id"),
            branch=commit_info.get("branch"),
        )

        metrics = Metrics(
            latency=latency if (latency.mean or latency.p50) else None,
            throughput=total_ops if total_ops > 0 else None,
            custom=custom,
        )

        return PerformanceSnapshot(
            schema_version=1,
            metadata=metadata,
            metrics=metrics,
        )