diff --git a/src/skit/ModelMetrics.py b/src/skit/ModelMetrics.py deleted file mode 100644 index 8d71f17..0000000 --- a/src/skit/ModelMetrics.py +++ /dev/null @@ -1,383 +0,0 @@ -# -*- coding: utf-8 -*- -""" -MIT License - -Copyright (c) 2023 YanSte - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import pandas as pd -import numpy as np -import time - -from enum import Enum -from IPython.display import display -from skit.show import show_text, show_history - -class Metric(Enum): - ACCURACY = "accuracy" - AUC = "auc" - VAL_AUC = "val_auc" - VAL_ACCURACY = "val_accuracy" - - @property - def train_metric_key(self): - """ - Get the training metric key corresponding to the Metric enum value. - """ - return self.value - - @property - def val_metric_key(self): - """ - Get the validation metric key corresponding to the Metric enum value. - """ - if self == Metric.ACCURACY: - return "val_accuracy" - elif self == Metric.AUC: - return "auc" - elif self == Metric.VAL_AUC: - return "val_auc" - elif self == Metric.VAL_ACCURACY: - return "val_accuracy" - - @property - def plot_labels(self): - """ - Get the curve labels corresponding to the given Metric enum. - """ - if self == Metric.ACCURACY or self == Metric.VAL_ACCURACY: - return { - 'Accuracy': { - 'Training Accuracy': 'accuracy', - 'Validation Accuracy': 'val_accuracy' - }, - 'Loss': { - 'Training Loss': 'loss', - 'Validation Loss': 'val_loss' - } - } - elif self == Metric.AUC or self == Metric.VAL_AUC: - return { - 'AUC': { - 'Training AUC': 'auc', - 'Validation AUC': 'val_auc' - }, - 'Loss': { - 'Training Loss': 'loss', - 'Validation Loss': 'val_loss' - } - } - -class ModelMetrics: - def __init__(self, versions, metric_to_monitor=Metric.ACCURACY): - """ - Initialize ModelMetrics class. - - Parameters - ---------- - versions : list - List of model versions to track. - metric_to_monitor : Metric - The metric to monitor (default is Accuracy). - """ - self.output = {} - self.metric_to_monitor = metric_to_monitor - for version in versions: - self.output[version] = { - "history": None, - "duration": None, - "best_model_path": None, - "board_path": None - } - - def reset(self, version=None): - """ - Reset the tracking for a specific version or all versions. - - Parameters - ---------- - version : str, optional - The specific version to reset. If None, reset all versions. - """ - default_dict = { - "history": None, - "duration": None, - "best_model_path": None, - "board_path": None - } - - if version is not None: - self.output[version] = default_dict - else: - # Reset all versions - for version in self.output.keys(): - self.output[version] = default_dict.copy() - - def get_best_metric(self, version): - """ - Get the best training and validation metrics for a specific model version. - - Parameters - ---------- - Args: - version (str): The model version to retrieve metrics for. - - Returns: - dict: Dictionary containing best training and validation metrics. - """ - history = self.output[version]['history'].history - - train_metric_key = self.metric_to_monitor.train_metric_key - val_metric_key = self.metric_to_monitor.val_metric_key - - best_val_index = np.argmax(history[train_metric_key]) - best_train_metric = history[train_metric_key][best_val_index] - best_val_metric = history[val_metric_key][best_val_index] - - return { - f'best_train_{self.metric_to_monitor.name.lower()}': best_train_metric, - f'best_val_{self.metric_to_monitor.name.lower()}': best_val_metric, - } - - def get_best_report(self, version): - """ - Get the best model report for a specific model version. - - Parameters - ---------- - version : str - The model version for which to get the best model report. - - Returns: - dict or None: The best model report containing training and validation metrics, - duration, and paths. Returns None if the specified version is not found in the output. - """ - if version not in self.output: - return None - - metrics = self.get_best_metric(version) - - return { - 'version': version, - f'best_train_{self.metric_to_monitor.name.lower()}': metrics[f'best_train_{self.metric_to_monitor.name.lower()}'], - f'best_val_{self.metric_to_monitor.name.lower()}': metrics[f'best_val_{self.metric_to_monitor.name.lower()}'], - 'duration': self.output[version]['duration'], - 'best_model_path': self.output[version]['best_model_path'], - 'board_path': self.output[version]['board_path'], - } - - def show_report(self): - """ - Display a tabular report of the best model performance. - """ - # Initialize the report DataFrame - columns = ['version', f'best_train_{self.metric_to_monitor.name.lower()}', f'best_val_{self.metric_to_monitor.name.lower()}', 'duration', 'best_model_path', 'board_path'] - - df = pd.DataFrame(columns=columns) - - for version in self.output.keys(): - # Get the best training and validation metric for this version - report = self.get_best_report(version) - - # Add the data to the DataFrame - df = pd.concat([df, pd.DataFrame([report])], ignore_index=True) - - # Set 'version' as the index of the DataFrame - df.set_index('version', inplace=True) - - # Apply formatting to the duration and metric columns - df['duration'] = df['duration'].apply(lambda x: "{:.2f}".format(x)) - - metric_columns = [f'best_train_{self.metric_to_monitor.name.lower()}', f'best_val_{self.metric_to_monitor.name.lower()}'] - df[metric_columns] = df[metric_columns].applymap(lambda x: "{:.2f}".format(x*100) if self.metric_to_monitor != Metric.VAL_ACCURACY else "{:.2f}%".format(x)) - - # Highlight the maximum in the metric column - styled_df = df.style.highlight_max(subset=[f'best_val_{self.metric_to_monitor.name.lower()}'], color='lightgreen') - - # Display the report - display(styled_df) - - - def show_best_result(self, version): - """ - Display the result (best train metric, best validation metric, and duration) for a specific model version. - - Parameters - ---------- - version : str - The model version for which the result will be displayed. - """ - if version not in self.output: - show_text("b", f"No result available for {version}") - - result = self.get_best_report(version) - - if result is not None: - best_train_metric = result.get(f'best_train_{self.metric_to_monitor.name.lower()}', None) - best_val_metric = result.get(f'best_val_{self.metric_to_monitor.name.lower()}', None) - duration = result.get('duration', None) - - metric_name = self.metric_to_monitor.name.lower() - metric_suffix = '%' if self.metric_to_monitor != Metric.VAL_ACCURACY else '' - - if best_train_metric is not None and best_val_metric is not None and duration is not None: - show_text("b", f"Train {metric_name.capitalize()} = {best_train_metric * 100:.2f}{metric_suffix} - Validation {metric_name.capitalize()} = {best_val_metric * 100:.2f}{metric_suffix} - Duration = {duration:.2f}") - else: - show_text("b", f"Result not available for version {version}") - else: - show_text("b", f"Version {version} not found in the output") - - - def start_timer(self, version): - """ - Start the timer for tracking model training or evaluation duration. - - Parameters - ---------- - version : str - The name of the model version for which to start the timer. - """ - self.output[version]['duration'] = time.time() - - def stop_timer(self, version): - """ - Stop the timer for tracking model training or evaluation duration. - - Parameters - ---------- - version : str - The name of the model version for which to stop the timer. - """ - if self.output[version]['duration'] is not None: - duration = time.time() - self.output[version]['duration'] - self.output[version]['duration'] = duration - - def add_best_model_path(self, version, path): - """ - Add the link of the best model for the specified model version. - - Parameters - ---------- - version : str - The name of the model version for which to add the best model link. - link : str - The link or path to the best model. - """ - self.output[version]['best_model_path'] = path - - def add_board_path(self, version, path): - """ - Add the link of the tensor board for the specified model version. - - Parameters - ---------- - version : str - The name of the model version for which to add the tensor board link. - link : str - The link or path to the tensor board. - """ - self.output[version]['board_path'] = path - - def add_history(self, version, history): - """ - Add the history of the specified model version. - - Parameters - ---------- - version : str - The name of the model version for which to add the accuracy score. - history : dict - The accuracy score to be added. - """ - self.output[version]['history'] = history - - def show_history( - self, - version, - figsize=(8,6) - ): - """ - Visualizes the training and validation metrics from the model's history using matplotlib. - - The function generates separate plots for each main category (like 'Accuracy' and 'Loss') - defined in the `plot` parameter. For each main category, multiple curves (like 'Training Accuracy' - and 'Validation Accuracy') can be plotted based on the nested dictionary values. - - Parameters: - ----------- - history : dict - The history object typically returned from the .fit() method of a Keras model. It should - have a 'history' attribute containing the training and validation metrics. - - figsize : tuple, optional - The width and height in inches for the figure. Defaults to (8,6). - - plot : dict, optional - A nested dictionary defining the metrics to be plotted. - - The top-level key corresponds to the main category (e.g., 'Accuracy' or 'Loss'). - - The associated nested dictionary's keys are the curve labels (e.g., 'Training Accuracy') - and the values are the corresponding metric names in the 'history' object (e.g., 'accuracy'). - Defaults to plotting both training and validation accuracy and loss. - - Example: - -------- - show_history( - model_history, - figsize=(10,8), - plot={ - "Title A": { - "Legend Title 1": "metric_name_1", - "Legend Title 2": "metric_name_2" - } - } - ) - - Note: - ----- - The `plot` parameter allows you to customize which metrics to plot and how they are labeled - in the generated visualization. - """ - history = self.output[version]['history'] - plot = self.metric_to_monitor.plot_labels - display(show_history(history, figsize=figsize, plot=plot)) - - def get_best_model_path(self, version): - """ - Get the path of the best model based on accuracy. - - Parameters - ---------- - version : str - The name of the model version for which to get the best model path. - - Returns - ------- - str or None - The path of the best model based on the highest accuracy score. - Returns None if no model has been added or no best model path is available. - """ - report = self.get_best_report(version) - best_model_path = report.get('best_model_path') - - if best_model_path is not None: - return best_model_path - else: - return None diff --git a/src/skit/show.py b/src/skit/show.py index d03e2df..fdb3e53 100644 --- a/src/skit/show.py +++ b/src/skit/show.py @@ -102,26 +102,59 @@ def show_text(heading_level, text="", add_indent=True): # History # ============================== +def show_best_history( + history, + metric="val_acc", + add_metric=["acc"] +): + """ + Summarize the best model performance based on training history. + + Args: + history :dict + The training history dictionary returned by Keras. + primary_metric : str + The primary metric to monitor for the best value (default: "val_acc"). + additional_metrics : list + List of additional metrics to extract complementary values (default: ["acc"]). + """ + history = history.history + + if metric not in history: + print(f"The metric '{metric}' not in history.") + return + + for metric in add_metric: + if metric not in history: + print(f"The metric '{metric}' not in history.") + return + + best_index = np.argmax(history[metric]) + best_value = history[metric][best_index] + + result = f"Best {metric}: {best_value} with " + + for i, metric in enumerate(add_metric): + best_value = history[metric][best_index] + result += f"{metric}: {best_value}" + if i < len(add_metric) - 1: + result += ", " + + show_text("b", result) + def show_history( history, - figsize = (8,6), - plot = { - 'Accuracy': { - 'Training Accuracy': 'accuracy', - 'Validation Accuracy': 'val_accuracy' - }, - 'Loss': { - 'Training Loss': 'loss', - 'Validation Loss': 'val_loss' - } - } + title = "Accurancy", + y_label = "Accurancy", + metrics = ["acc", "val_acc"], + metric_labels = ["Train Accurancy", "Validation Accurancy"], + figsize = (8,6) ): """ Visualizes the training and validation metrics from the model's history using matplotlib. - The function generates separate plots for each main category (like 'Accuracy' and 'Loss') - defined in the `plot` parameter. For each main category, multiple curves (like 'Training Accuracy' - and 'Validation Accuracy') can be plotted based on the nested dictionary values. + The function generates separate plots for each metric defined in the `metrics` parameter. + It uses the corresponding labels provided in the `metric_labels` parameter for the plot legends. Parameters: ----------- @@ -129,42 +162,41 @@ def show_history( The history object typically returned from the .fit() method of a Keras model. It should have a 'history' attribute containing the training and validation metrics. - figsize : tuple, optional - The width and height in inches for the figure. Defaults to (8,6). + title : str, optional + The title for the plot. Defaults to "Accuracy". + + metrics : list of str, optional + A list of metric names to be plotted. Defaults to ["acc", "val_acc"]. - plot : dict, optional - A nested dictionary defining the metrics to be plotted. - - The top-level key corresponds to the main category (e.g., 'Accuracy' or 'Loss'). - - The associated nested dictionary's keys are the curve labels (e.g., 'Training Accuracy') - and the values are the corresponding metric names in the 'history' object (e.g., 'accuracy'). - Defaults to plotting both training and validation accuracy and loss. + metric_labels : list of str, optional + A list of labels for the plotted metrics. Should have the same length as `metrics`. + Defaults to ["Accuracy", "Validation Accuracy"]. + + figsize : tuple, optional + The width and height in inches for the figure. Defaults to (8, 6). Example: -------- show_history( model_history, - figsize=(10,8), - plot={ - "Titre A": { - "Legend Titre 1": "metric", - "Legend Titre 2": "metric" - } - } + title="Training and Validation Loss", + metrics=["loss", "val_loss"], + metric_labels=["Training Loss", "Validation Loss"], + figsize=(10, 8) ) """ - for title, curves in plot.items(): - plt.figure(figsize=figsize) - plt.title(title) + history = history.history + plt.figure(figsize=figsize) + plt.title(title) + + plt.ylabel(y_label) + plt.xlabel('Epoch') - # Extracting the name from the first metric and capitalizing the first letter for ylabel - y_label = list(curves.values())[0].capitalize() - plt.ylabel(y_label) - plt.xlabel('Epoch') + for metric_name, metric_label in zip(metrics, metric_labels): + plt.plot(history[metric_name], label=metric_label) - for curve_label, metric_name in curves.items(): - plt.plot(history.history[metric_name], label=curve_label) - plt.legend(loc='upper left') - plt.show() + plt.legend(loc='upper left') + plt.show() # ============================== # Image @@ -470,9 +502,6 @@ def show_histogram( plt.show() -# ============================== -# TensorFlow -# ============================== if IS_TENSORFLOW_IMPORTED: import tensorflow as tf