empml.metrics¶

Object	Description
`MSE`	Mean Squared Error.
`RMSE`	Root Mean Squared Error.
`MAE`	Mean Absolute Error.
`MSLE`	Mean Squared Logarithmic Error.
`RMSLE`	Root Mean Squared Logarithmic Error.
`MAPE`	Mean Absolute Percentage Error.
`WMAE`	Weighted Mean Absolute Error.
`Accuracy`	Classification accuracy.
`Precision`	Precision for binary classification.
`Recall`	Recall (Sensitivity) for binary classification.
`F1Score`	F1 Score for binary classification.
`Specificity`	Specificity (True Negative Rate) for binary classification.
`BalancedAccuracy`	Balanced Accuracy for binary classification.
`ROCAUC`	Area Under the ROC Curve for binary classification.

MSE¶

Mean Squared Error.

Methods¶

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (pl.col(target) - pl.col(preds)).pow(2).mean()
    return lf.select(metric_expr).collect().item()

RMSE¶

Root Mean Squared Error.

Methods¶

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (pl.col(target) - pl.col(preds)).pow(2).mean().sqrt()
    return lf.select(metric_expr).collect().item()

MAE¶

Mean Absolute Error.

Methods¶

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (pl.col(target) - pl.col(preds)).abs().mean()
    return lf.select(metric_expr).collect().item()

MSLE¶

Mean Squared Logarithmic Error. Uses log1p for numerical stability.

Methods¶

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        (pl.col(target).log1p() - pl.col(preds).log1p()).pow(2).mean()
    )
    return lf.select(metric_expr).collect().item()

RMSLE¶

Root Mean Squared Logarithmic Error. Uses log1p for numerical stability.

Methods¶

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        (pl.col(target).log1p() - pl.col(preds).log1p()).pow(2).mean().sqrt()
    )
    return lf.select(metric_expr).collect().item()

MAPE¶

Mean Absolute Percentage Error. Returns percentage value (0-100).

Methods¶

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        ((pl.col(target) - pl.col(preds)).abs() / pl.col(target).abs())
        .mean() * 100
    )
    return lf.select(metric_expr).collect().item()

WMAE¶

Weighted Mean Absolute Error. Computed as sum(|errors|) / sum(target).

Methods¶

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        (pl.col(target) - pl.col(preds)).abs().sum() / pl.col(target).sum()
    )
    return lf.select(metric_expr).collect().item()

Accuracy¶

Classification accuracy. Proportion of correct predictions.

Methods¶

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (pl.col(target) == pl.col(preds)).mean()
    return lf.select(metric_expr).collect().item()

Precision¶

Precision for binary classification. TP / (TP + FP).

Methods¶

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        ((pl.col(preds) == self.positive_class) & (pl.col(target) == self.positive_class)).sum() /
        (pl.col(preds) == self.positive_class).sum()
    )
    return lf.select(metric_expr).collect().item()

Recall¶

Recall (Sensitivity) for binary classification. TP / (TP + FN).

Methods¶

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        ((pl.col(preds) == self.positive_class) & (pl.col(target) == self.positive_class)).sum() /
        (pl.col(target) == self.positive_class).sum()
    )
    return lf.select(metric_expr).collect().item()

F1Score¶

F1 Score for binary classification. Harmonic mean of precision and recall.

Methods¶

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    tp = ((pl.col(preds) == self.positive_class) & (pl.col(target) == self.positive_class)).sum()
    pred_pos = (pl.col(preds) == self.positive_class).sum()
    actual_pos = (pl.col(target) == self.positive_class).sum()

    precision = tp / pred_pos
    recall = tp / actual_pos
    f1 = 2 * (precision * recall) / (precision + recall)

    return lf.select(f1).collect().item()

Specificity¶

Specificity (True Negative Rate) for binary classification. TN / (TN + FP).

Methods¶

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        ((pl.col(preds) != self.positive_class) & (pl.col(target) != self.positive_class)).sum() /
        (pl.col(target) != self.positive_class).sum()
    )
    return lf.select(metric_expr).collect().item()

BalancedAccuracy¶

Balanced Accuracy for binary classification. (Recall + Specificity) / 2.

Methods¶

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    sensitivity = (
        ((pl.col(preds) == self.positive_class) & (pl.col(target) == self.positive_class)).sum() /
        (pl.col(target) == self.positive_class).sum()
    )
    specificity = (
        ((pl.col(preds) != self.positive_class) & (pl.col(target) != self.positive_class)).sum() /
        (pl.col(target) != self.positive_class).sum()
    )
    balanced_acc = (sensitivity + specificity) / 2

    return lf.select(balanced_acc).collect().item()

ROCAUC¶

Area Under the ROC Curve for binary classification. Requires probability scores.

Methods¶

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    # Collect data and convert to numpy for sklearn computation
    df = lf.select([pl.col(target), pl.col(preds)]).collect()
    y_true = df[target].to_numpy()
    y_scores = df[preds].to_numpy()

    from sklearn.metrics import roc_auc_score
    return roc_auc_score(y_true, y_scores)