open-mmlab · ice-tong · Aug 31, 2022 · Sep 1, 2022 · Sep 26, 2022 · Sep 27, 2022
diff --git a/mmseg/evaluation/metrics/iou_metric.py b/mmseg/evaluation/metrics/iou_metric.py
@@ -1,250 +1,92 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from collections import OrderedDict
-from typing import Dict, List, Optional, Sequence
 
+import warnings
 import numpy as np
-import torch
-from mmengine.evaluator import BaseMetric
-from mmengine.logging import MMLogger, print_log
+from typing import Sequence
+from mmengine.logging import print_log
 from prettytable import PrettyTable
+from mmeval.metrics import MeanIoU
 
 from mmseg.registry import METRICS
 
 
 @METRICS.register_module()
-class IoUMetric(BaseMetric):
-    """IoU evaluation metric.
+class IoUMetric(MeanIoU):
+    """A wrapper of ``mmeval.MeanIoU``.
+
+    This wrapper implements the `process` method that parses predictions and 
+    labels from inputs. This enables ``mmengine.Evaluator`` to handle the data
+    flow of different tasks through a unified interface.
+
+    In addition, this wrapper also implements the ``evaluate`` method that
+    parses metric results and print pretty tabel of metrics per class.
 
     Args:
-        ignore_index (int): Index that will be ignored in evaluation.
-            Default: 255.
-        iou_metrics (list[str] | str): Metrics to be calculated, the options
-            includes 'mIoU', 'mDice' and 'mFscore'.
-        nan_to_num (int, optional): If specified, NaN values will be replaced
-            by the numbers defined by the user. Default: None.
-        beta (int): Determines the weight of recall in the combined score.
-            Default: 1.
-        collect_device (str): Device name used for collecting results from
-            different ranks during distributed training. Must be 'cpu' or
-            'gpu'. Defaults to 'cpu'.
-        prefix (str, optional): The prefix that will be added in the metric
-            names to disambiguate homonymous metrics of different evaluators.
-            If prefix is not provided in the argument, self.default_prefix
-            will be used instead. Defaults to None.
+        dist_backend (str | None): The name of the distributed communication
+            backend. Refer to :class:`mmeval.BaseMetric`.
+            Defaults to 'torch_cuda'.
+        **kwargs: Keyword parameters passed to :class:`mmeval.MeanIoU`.
     """
 
-    def __init__(self,
-                 ignore_index: int = 255,
-                 iou_metrics: List[str] = ['mIoU'],
-                 nan_to_num: Optional[int] = None,
-                 beta: int = 1,
-                 collect_device: str = 'cpu',
-                 prefix: Optional[str] = None) -> None:
-        super().__init__(collect_device=collect_device, prefix=prefix)
+    def __init__(self, dist_backend='torch_cuda', **kwargs):
+        iou_metrics = kwargs.pop('iou_metrics', None)
+        if iou_metrics is not None:
+            warnings.warn(
+                'DeprecationWarning: The `iou_metrics` parameter of '
+                '`IoUMetric` is deprecated, defaults return all metrics now!')
+        collect_device = kwargs.pop('collect_device', None)
+
+        if collect_device is not None:
+            warnings.warn(
+                'DeprecationWarning: The `collect_device` parameter of '
+                '`IoUMetric` is deprecated, use `dist_backend` instead.')
 
-        self.ignore_index = ignore_index
-        self.metrics = iou_metrics
-        self.nan_to_num = nan_to_num
-        self.beta = beta
+        # Changes the default value of `classwise_results` to True.
+        super().__init__(classwise_results=True,
+                         dist_backend=dist_backend,
+                         **kwargs)
 
     def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
         """Process one batch of data and data_samples.
 
-        The processed results should be stored in ``self.results``, which will
-        be used to computed the metrics when all batches have been processed.
+        Parse predictions and labels from ``data_samples`` and invoke
+        ``self.add``.
 
         Args:
             data_batch (dict): A batch of data from the dataloader.
             data_samples (Sequence[dict]): A batch of outputs from the model.
         """
-        num_classes = len(self.dataset_meta['classes'])
+        predictions, labels = [], []
         for data_sample in data_samples:
             pred_label = data_sample['pred_sem_seg']['data'].squeeze()
             label = data_sample['gt_sem_seg']['data'].squeeze().to(pred_label)
-            self.results.append(
-                self.intersect_and_union(pred_label, label, num_classes,
-                                         self.ignore_index))
+            predictions.append(pred_label)
+            labels.append(label)
 
-    def compute_metrics(self, results: list) -> Dict[str, float]:
-        """Compute the metrics from processed results.
+        self.add(predictions, labels)
 
-        Args:
-            results (list): The processed results of each batch.
+    def evaluate(self, *args, **kwargs):
+        """Returns metric results and print pretty tabel of metrics per class.
 
-        Returns:
-            Dict[str, float]: The computed metrics. The keys are the names of
-                the metrics, and the values are corresponding results. The key
-                mainly includes aAcc, mIoU, mAcc, mDice, mFscore, mPrecision,
-                mRecall.
+        This method would be invoked by ``mmengine.Evaluator``.
         """
-        logger: MMLogger = MMLogger.get_current_instance()
-
-        # convert list of tuples to tuple of lists, e.g.
-        # [(A_1, B_1, C_1, D_1), ...,  (A_n, B_n, C_n, D_n)] to
-        # ([A_1, ..., A_n], ..., [D_1, ..., D_n])
-        results = tuple(zip(*results))
-        assert len(results) == 4
-
-        total_area_intersect = sum(results[0])
-        total_area_union = sum(results[1])
-        total_area_pred_label = sum(results[2])
-        total_area_label = sum(results[3])
-        ret_metrics = self.total_area_to_metrics(
-            total_area_intersect, total_area_union, total_area_pred_label,
-            total_area_label, self.metrics, self.nan_to_num, self.beta)
-
-        class_names = self.dataset_meta['classes']
-
-        # summary table
-        ret_metrics_summary = OrderedDict({
-            ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2)
-            for ret_metric, ret_metric_value in ret_metrics.items()
-        })
-        metrics = dict()
-        for key, val in ret_metrics_summary.items():
-            if key == 'aAcc':
-                metrics[key] = val
-            else:
-                metrics['m' + key] = val
-
-        # each class table
-        ret_metrics.pop('aAcc', None)
-        ret_metrics_class = OrderedDict({
-            ret_metric: np.round(ret_metric_value * 100, 2)
-            for ret_metric, ret_metric_value in ret_metrics.items()
-        })
-        ret_metrics_class.update({'Class': class_names})
-        ret_metrics_class.move_to_end('Class', last=False)
-        class_table_data = PrettyTable()
-        for key, val in ret_metrics_class.items():
-            class_table_data.add_column(key, val)
-
-        print_log('per class results:', logger)
-        print_log('\n' + class_table_data.get_string(), logger=logger)
-
-        return metrics
-
-    @staticmethod
-    def intersect_and_union(pred_label: torch.tensor, label: torch.tensor,
-                            num_classes: int, ignore_index: int):
-        """Calculate Intersection and Union.
-
-        Args:
-            pred_label (torch.tensor): Prediction segmentation map
-                or predict result filename. The shape is (H, W).
-            label (torch.tensor): Ground truth segmentation map
-                or label filename. The shape is (H, W).
-            num_classes (int): Number of categories.
-            ignore_index (int): Index that will be ignored in evaluation.
-
-        Returns:
-            torch.Tensor: The intersection of prediction and ground truth
-                histogram on all classes.
-            torch.Tensor: The union of prediction and ground truth histogram on
-                all classes.
-            torch.Tensor: The prediction histogram on all classes.
-            torch.Tensor: The ground truth histogram on all classes.
-        """
-
-        mask = (label != ignore_index)
-        pred_label = pred_label[mask]
-        label = label[mask]
-
-        intersect = pred_label[pred_label == label]
-        area_intersect = torch.histc(
-            intersect.float(), bins=(num_classes), min=0,
-            max=num_classes - 1).cpu()
-        area_pred_label = torch.histc(
-            pred_label.float(), bins=(num_classes), min=0,
-            max=num_classes - 1).cpu()
-        area_label = torch.histc(
-            label.float(), bins=(num_classes), min=0,
-            max=num_classes - 1).cpu()
-        area_union = area_pred_label + area_label - area_intersect
-        return area_intersect, area_union, area_pred_label, area_label
-
-    @staticmethod
-    def total_area_to_metrics(total_area_intersect: np.ndarray,
-                              total_area_union: np.ndarray,
-                              total_area_pred_label: np.ndarray,
-                              total_area_label: np.ndarray,
-                              metrics: List[str] = ['mIoU'],
-                              nan_to_num: Optional[int] = None,
-                              beta: int = 1):
-        """Calculate evaluation metrics
-        Args:
-            total_area_intersect (np.ndarray): The intersection of prediction
-                and ground truth histogram on all classes.
-            total_area_union (np.ndarray): The union of prediction and ground
-                truth histogram on all classes.
-            total_area_pred_label (np.ndarray): The prediction histogram on
-                all classes.
-            total_area_label (np.ndarray): The ground truth histogram on
-                all classes.
-            metrics (List[str] | str): Metrics to be evaluated, 'mIoU' and
-                'mDice'.
-            nan_to_num (int, optional): If specified, NaN values will be
-                replaced by the numbers defined by the user. Default: None.
-            beta (int): Determines the weight of recall in the combined score.
-                Default: 1.
-        Returns:
-            Dict[str, np.ndarray]: per category evaluation metrics,
-                shape (num_classes, ).
-        """
-
-        def f_score(precision, recall, beta=1):
-            """calculate the f-score value.
-
-            Args:
-                precision (float | torch.Tensor): The precision value.
-                recall (float | torch.Tensor): The recall value.
-                beta (int): Determines the weight of recall in the combined
-                    score. Default: 1.
-
-            Returns:
-                [torch.tensor]: The f-score value.
-            """
-            score = (1 + beta**2) * (precision * recall) / (
-                (beta**2 * precision) + recall)
-            return score
-
-        if isinstance(metrics, str):
-            metrics = [metrics]
-        allowed_metrics = ['mIoU', 'mDice', 'mFscore']
-        if not set(metrics).issubset(set(allowed_metrics)):
-            raise KeyError('metrics {} is not supported'.format(metrics))
-
-        all_acc = total_area_intersect.sum() / total_area_label.sum()
-        ret_metrics = OrderedDict({'aAcc': all_acc})
-        for metric in metrics:
-            if metric == 'mIoU':
-                iou = total_area_intersect / total_area_union
-                acc = total_area_intersect / total_area_label
-                ret_metrics['IoU'] = iou
-                ret_metrics['Acc'] = acc
-            elif metric == 'mDice':
-                dice = 2 * total_area_intersect / (
-                    total_area_pred_label + total_area_label)
-                acc = total_area_intersect / total_area_label
-                ret_metrics['Dice'] = dice
-                ret_metrics['Acc'] = acc
-            elif metric == 'mFscore':
-                precision = total_area_intersect / total_area_pred_label
-                recall = total_area_intersect / total_area_label
-                f_value = torch.tensor([
-                    f_score(x[0], x[1], beta) for x in zip(precision, recall)
-                ])
-                ret_metrics['Fscore'] = f_value
-                ret_metrics['Precision'] = precision
-                ret_metrics['Recall'] = recall
-
-        ret_metrics = {
-            metric: value.numpy()
-            for metric, value in ret_metrics.items()
-        }
-        if nan_to_num is not None:
-            ret_metrics = OrderedDict({
-                metric: np.nan_to_num(metric_value, nan=nan_to_num)
-                for metric, metric_value in ret_metrics.items()
-            })
-        return ret_metrics
+        metric_results = self.compute(*args, **kwargs)
+        self.reset()
+
+        classwise_results = metric_results['classwise_results']
+        del metric_results['classwise_results']
+
+        # Pretty table of the metric results per class.
+        summary_table = PrettyTable()
+        summary_table.add_column('Class', self.dataset_meta['classes'])
+        for key, value in classwise_results.items():
+            value = np.round(value * 100, 2)
+            summary_table.add_column(key, value)
+
+        print_log('per class results:', logger='current')
+        print_log('\n' + summary_table.get_string(), logger='current')
+
+        # Multiply value by 100 to convert to percentage and rounding. 
+        evaluate_results = {
+            k: round(v * 100, 2) for k, v in metric_results.items()}
+        return evaluate_results