Source code for merton.backtest.metrics

r"""Backtest metrics for PD models.

All metrics here use the Wilcoxon-Mann-Whitney / rank-based identities so
there's no sklearn dependency. They match ``sklearn.metrics`` outputs to
machine precision on the binary-classification case.
"""

from __future__ import annotations

import numpy as np
from scipy.stats import rankdata

from .._typing import ArrayLike, FloatArray
from ..exceptions import MertonInputError


def _check_pair(predictions: ArrayLike, defaults: ArrayLike) -> tuple[FloatArray, FloatArray]:
    p = np.asarray(predictions, dtype=np.float64)
    y = np.asarray(defaults, dtype=np.float64)
    if p.shape != y.shape:
        raise MertonInputError(f"predictions shape {p.shape} != defaults shape {y.shape}")
    if p.ndim != 1:
        raise MertonInputError("predictions and defaults must be 1-D")
    if not np.all(np.isin(y, [0.0, 1.0])):
        raise MertonInputError("defaults must be 0/1 indicators")
    return p, y



[docs]
def auc(predictions: ArrayLike, defaults: ArrayLike) -> float:
    r"""Area under the ROC curve.

    Uses the Wilcoxon-Mann-Whitney identity: AUC equals the probability
    that a randomly chosen defaulter has a higher predicted PD than a
    randomly chosen non-defaulter.

    .. math::

        \mathrm{AUC} = \frac{R_+ - n_+(n_+ + 1)/2}{n_+ \cdot n_-}

    where ``R_+`` is the sum of ranks of the positive class, ``n_+`` is
    the number of positives, and ``n_-`` the number of negatives.
    """
    p, y = _check_pair(predictions, defaults)
    n_pos = int(y.sum())
    n_neg = int(y.size - n_pos)
    if n_pos == 0 or n_neg == 0:
        raise MertonInputError("AUC undefined when only one class is present")
    ranks = rankdata(p)
    r_plus = float(ranks[y == 1].sum())
    return (r_plus - n_pos * (n_pos + 1) / 2.0) / (n_pos * n_neg)




[docs]
def accuracy_ratio(predictions: ArrayLike, defaults: ArrayLike) -> float:
    """Gini coefficient ``AR = 2 · AUC − 1``."""
    return 2.0 * auc(predictions, defaults) - 1.0




[docs]
def brier(predictions: ArrayLike, defaults: ArrayLike) -> float:
    """Brier score (mean squared error of probabilistic predictions)."""
    p, y = _check_pair(predictions, defaults)
    return float(np.mean((p - y) ** 2))




[docs]
def ks_statistic(predictions: ArrayLike, defaults: ArrayLike) -> float:
    """Kolmogorov-Smirnov statistic — max gap between the two empirical CDFs."""
    p, y = _check_pair(predictions, defaults)
    if not (y == 1).any() or not (y == 0).any():
        raise MertonInputError("KS undefined when only one class is present")
    grid = np.sort(np.unique(p))
    pos = np.sort(p[y == 1])
    neg = np.sort(p[y == 0])
    # CDF at each grid point.
    f_pos = np.searchsorted(pos, grid, side="right") / pos.size
    f_neg = np.searchsorted(neg, grid, side="right") / neg.size
    return float(np.max(np.abs(f_pos - f_neg)))




[docs]
def hosmer_lemeshow(
    predictions: ArrayLike,
    defaults: ArrayLike,
    *,
    bins: int = 10,
) -> tuple[float, float]:
    """Hosmer-Lemeshow goodness-of-fit χ² and (chi², dof).

    Returns ``(chi_squared, degrees_of_freedom)``. P-values come from
    ``scipy.stats.chi2.sf(chi2, dof)``.
    """
    p, y = _check_pair(predictions, defaults)
    n = p.size
    if n < bins * 5:
        raise MertonInputError(f"need at least {bins * 5} obs for Hosmer-Lemeshow with bins={bins}")
    # Equal-count bins by predicted-PD rank.
    order = np.argsort(p)
    p_sorted = p[order]
    y_sorted = y[order]
    bin_sizes = [n // bins] * bins
    for i in range(n % bins):
        bin_sizes[i] += 1
    chi2 = 0.0
    idx = 0
    for sz in bin_sizes:
        seg_p = p_sorted[idx : idx + sz]
        seg_y = y_sorted[idx : idx + sz]
        observed_pos = float(seg_y.sum())
        expected_pos = float(seg_p.sum())
        observed_neg = sz - observed_pos
        expected_neg = sz - expected_pos
        if expected_pos > 0:
            chi2 += (observed_pos - expected_pos) ** 2 / expected_pos
        if expected_neg > 0:
            chi2 += (observed_neg - expected_neg) ** 2 / expected_neg
        idx += sz
    return float(chi2), float(bins - 2)



__all__ = [
    "accuracy_ratio",
    "auc",
    "brier",
    "hosmer_lemeshow",
    "ks_statistic",
]