Skip to content

empml.metrics

Object Description
MSE Mean Squared Error.
RMSE Root Mean Squared Error.
MAE Mean Absolute Error.
MSLE Mean Squared Logarithmic Error.
RMSLE Root Mean Squared Logarithmic Error.
MAPE Mean Absolute Percentage Error.
WMAE Weighted Mean Absolute Error.
Accuracy Classification accuracy.
Precision Precision for binary classification.
Recall Recall (Sensitivity) for binary classification.
F1Score F1 Score for binary classification.
Specificity Specificity (True Negative Rate) for binary classification.
BalancedAccuracy Balanced Accuracy for binary classification.
ROCAUC Area Under the ROC Curve for binary classification.

MSE

Mean Squared Error.

Methods

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (pl.col(target) - pl.col(preds)).pow(2).mean()
    return lf.select(metric_expr).collect().item()

RMSE

Root Mean Squared Error.

Methods

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (pl.col(target) - pl.col(preds)).pow(2).mean().sqrt()
    return lf.select(metric_expr).collect().item()

MAE

Mean Absolute Error.

Methods

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (pl.col(target) - pl.col(preds)).abs().mean()
    return lf.select(metric_expr).collect().item()

MSLE

Mean Squared Logarithmic Error. Uses log1p for numerical stability.

Methods

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        (pl.col(target).log1p() - pl.col(preds).log1p()).pow(2).mean()
    )
    return lf.select(metric_expr).collect().item()

RMSLE

Root Mean Squared Logarithmic Error. Uses log1p for numerical stability.

Methods

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        (pl.col(target).log1p() - pl.col(preds).log1p()).pow(2).mean().sqrt()
    )
    return lf.select(metric_expr).collect().item()

MAPE

Mean Absolute Percentage Error. Returns percentage value (0-100).

Methods

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        ((pl.col(target) - pl.col(preds)).abs() / pl.col(target).abs())
        .mean() * 100
    )
    return lf.select(metric_expr).collect().item()

WMAE

Weighted Mean Absolute Error. Computed as sum(|errors|) / sum(target).

Methods

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        (pl.col(target) - pl.col(preds)).abs().sum() / pl.col(target).sum()
    )
    return lf.select(metric_expr).collect().item()

Accuracy

Classification accuracy. Proportion of correct predictions.

Methods

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (pl.col(target) == pl.col(preds)).mean()
    return lf.select(metric_expr).collect().item()

Precision

Precision for binary classification. TP / (TP + FP).

Methods

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        ((pl.col(preds) == self.positive_class) & (pl.col(target) == self.positive_class)).sum() /
        (pl.col(preds) == self.positive_class).sum()
    )
    return lf.select(metric_expr).collect().item()

Recall

Recall (Sensitivity) for binary classification. TP / (TP + FN).

Methods

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        ((pl.col(preds) == self.positive_class) & (pl.col(target) == self.positive_class)).sum() /
        (pl.col(target) == self.positive_class).sum()
    )
    return lf.select(metric_expr).collect().item()

F1Score

F1 Score for binary classification. Harmonic mean of precision and recall.

Methods

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    tp = ((pl.col(preds) == self.positive_class) & (pl.col(target) == self.positive_class)).sum()
    pred_pos = (pl.col(preds) == self.positive_class).sum()
    actual_pos = (pl.col(target) == self.positive_class).sum()

    precision = tp / pred_pos
    recall = tp / actual_pos
    f1 = 2 * (precision * recall) / (precision + recall)

    return lf.select(f1).collect().item()

Specificity

Specificity (True Negative Rate) for binary classification. TN / (TN + FP).

Methods

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    metric_expr = (
        ((pl.col(preds) != self.positive_class) & (pl.col(target) != self.positive_class)).sum() /
        (pl.col(target) != self.positive_class).sum()
    )
    return lf.select(metric_expr).collect().item()

BalancedAccuracy

Balanced Accuracy for binary classification. (Recall + Specificity) / 2.

Methods

def __init__(self, positive_class: int = 1):
    pass

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    sensitivity = (
        ((pl.col(preds) == self.positive_class) & (pl.col(target) == self.positive_class)).sum() /
        (pl.col(target) == self.positive_class).sum()
    )
    specificity = (
        ((pl.col(preds) != self.positive_class) & (pl.col(target) != self.positive_class)).sum() /
        (pl.col(target) != self.positive_class).sum()
    )
    balanced_acc = (sensitivity + specificity) / 2

    return lf.select(balanced_acc).collect().item()

ROCAUC

Area Under the ROC Curve for binary classification. Requires probability scores.

Methods

def compute_metric(self, lf: pl.LazyFrame, target: str, preds: str) -> float:
    # Collect data and convert to numpy for sklearn computation
    df = lf.select([pl.col(target), pl.col(preds)]).collect()
    y_true = df[target].to_numpy()
    y_scores = df[preds].to_numpy()

    from sklearn.metrics import roc_auc_score
    return roc_auc_score(y_true, y_scores)