Source code for src.evaluate_performance

"""
Evaluate the performance of a model through its predictions.
"""
import math
import logging

import pandas as pd
from sklearn.metrics import max_error, mean_squared_error, median_absolute_error, r2_score

logger = logging.getLogger(__name__)


[docs]def evaluate_model(results_data: pd.DataFrame, y_true_colname: str, y_pred_colname: str) -> pd.DataFrame: """ Evaluate performance against a variety of regression metrics: - MSE - RMSE - MAD - R-squared - Max error Args: results_data (:obj:`pandas.DataFrame`): DataFrame containing (at least) predicted and ground truth values y_true_colname (str): Name of column containing true values y_pred_colname (str): Name of column containing predicted values Returns: :obj:`pandas.DataFrame` containing metrics and values """ logger.debug("Evaluating model performance") y_true = results_data[y_true_colname] y_pred = results_data[y_pred_colname] # Calculate metrics mse = mean_squared_error(y_true, y_pred) rmse = math.sqrt(mse) mad = median_absolute_error(y_true, y_pred) r_squared = r2_score(y_true, y_pred) max_err = max_error(y_true, y_pred) # Log results logger.info(""" MSE:\t\t%0.4f RMSE:\t\t%0.4f MAD:\t\t%0.4f R-squared:\t%0.4f Max error:\t%0.4f""", mse, rmse, mad, r_squared, max_err ) # Create a DataFrame of metrics and results metric_data = pd.DataFrame( data=[ ["mse", mse], ["rmse", rmse], ["mad", mad], ["r_squared", r_squared], ["max_err", max_err] ], columns=["metric", "performance"] ) return metric_data