openvinotoolkit · djdameln · Dec 6, 2022 · Nov 24, 2022 · Nov 24, 2022 · Nov 25, 2022
diff --git a/anomalib/data/__init__.py b/anomalib/data/__init__.py
@@ -14,6 +14,7 @@
 from .folder import Folder
 from .inference import InferenceDataset
 from .mvtec import MVTec
+from .task_type import TaskType
 from .ucsd_ped import UCSDped
 
 logger = logging.getLogger(__name__)
@@ -126,4 +127,5 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
     "MVTec",
     "Avenue",
     "UCSDped",
+    "TaskType",
 ]
diff --git a/anomalib/data/avenue.py b/anomalib/data/avenue.py
@@ -27,6 +27,7 @@
 from torch import Tensor
 
 from anomalib.data.base import AnomalibDataModule, VideoAnomalibDataset
+from anomalib.data.task_type import TaskType
 from anomalib.data.utils import DownloadProgressBar, Split, ValSplitMode, hash_check
 from anomalib.data.utils.video import ClipsIndexer
 from anomalib.pre_processing import PreProcessor
@@ -124,7 +125,7 @@ class AvenueDataset(VideoAnomalibDataset):
     """Avenue Dataset class.
 
     Args:
-        task (str): Task type, either 'classification' or 'segmentation'
+        task (TaskType): Task type, 'classification', 'detection' or 'segmentation'
         root (str): Path to the root of the dataset
         gt_dir (str): Path to the ground truth files
         pre_process (PreProcessor): Pre-processor object
@@ -135,7 +136,7 @@ class AvenueDataset(VideoAnomalibDataset):
 
     def __init__(
         self,
-        task: str,
+        task: TaskType,
         root: Union[Path, str],
         gt_dir: str,
         pre_process: PreProcessor,
@@ -163,7 +164,7 @@ class Avenue(AnomalibDataModule):
         gt_dir (str): Path to the ground truth files
         clip_length_in_frames (int, optional): Number of video frames in each clip.
         frames_between_clips (int, optional): Number of frames between each consecutive video clip.
-        task (str): Task type, either 'classification' or 'segmentation'
+        task TaskType): Task type, 'classification', 'detection' or 'segmentation'
         image_size (Optional[Union[int, Tuple[int, int]]], optional): Size of the input image.
             Defaults to None.
         train_batch_size (int, optional): Training batch size. Defaults to 32.
@@ -184,7 +185,7 @@ def __init__(
         gt_dir: str,
         clip_length_in_frames: int = 1,
         frames_between_clips: int = 1,
-        task: str = "segmentation",
+        task: TaskType = TaskType.SEGMENTATION,
         image_size: Optional[Union[int, Tuple[int, int]]] = None,
         train_batch_size: int = 32,
         eval_batch_size: int = 32,

diff --git a/anomalib/data/base/datamodule.py b/anomalib/data/base/datamodule.py
@@ -12,14 +12,28 @@
 from pandas import DataFrame
 from pytorch_lightning import LightningDataModule
 from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
-from torch.utils.data import DataLoader
+from torch.utils.data import DataLoader, default_collate
 
 from anomalib.data.base.dataset import AnomalibDataset
 from anomalib.data.utils import ValSplitMode, random_split
 
 logger = logging.getLogger(__name__)
 
 
+def collate_fn(batch):
+    """Custom collate function that collates bounding boxes as lists."""
+    elem = batch[0]
+    out_dict = {}
+    if isinstance(elem, dict):
+        if "boxes" in elem.keys():
+            # collate boxes as list
+            out_dict["boxes"] = [item.pop("boxes") for item in batch]
+        # collate other data normally
+        out_dict.update({key: default_collate([item[key] for item in batch]) for key in elem})
+        return out_dict
+    return default_collate(batch)
+
+
 class AnomalibDataModule(LightningDataModule, ABC):
     """Base Anomalib data module.
 
@@ -101,8 +115,20 @@ def train_dataloader(self) -> TRAIN_DATALOADERS:
 
     def val_dataloader(self) -> EVAL_DATALOADERS:
         """Get validation dataloader."""
-        return DataLoader(self.val_data, shuffle=False, batch_size=self.eval_batch_size, num_workers=self.num_workers)
+        return DataLoader(
+            self.val_data,
+            shuffle=False,
+            batch_size=self.eval_batch_size,
+            num_workers=self.num_workers,
+            collate_fn=collate_fn,
+        )
 
     def test_dataloader(self) -> EVAL_DATALOADERS:
         """Get test dataloader."""
-        return DataLoader(self.test_data, shuffle=False, batch_size=self.eval_batch_size, num_workers=self.num_workers)
+        return DataLoader(
+            self.test_data,
+            shuffle=False,
+            batch_size=self.eval_batch_size,
+            num_workers=self.num_workers,
+            collate_fn=collate_fn,
+        )
diff --git a/anomalib/data/base/dataset.py b/anomalib/data/base/dataset.py
@@ -18,14 +18,16 @@
 from torch import Tensor
 from torch.utils.data import Dataset
 
-from anomalib.data.utils import read_image
+from anomalib.data.task_type import TaskType
+from anomalib.data.utils import masks_to_boxes, read_image
 from anomalib.pre_processing import PreProcessor
 
 _EXPECTED_COLS_CLASSIFICATION = ["image_path", "split"]
 _EXPECTED_COLS_SEGMENTATION = _EXPECTED_COLS_CLASSIFICATION + ["mask_path"]
 _EXPECTED_COLS_PERTASK = {
     "classification": _EXPECTED_COLS_CLASSIFICATION,
     "segmentation": _EXPECTED_COLS_SEGMENTATION,
+    "detection": _EXPECTED_COLS_SEGMENTATION,
 }
 
 logger = logging.getLogger(__name__)
@@ -34,7 +36,7 @@
 class AnomalibDataset(Dataset, ABC):
     """Anomalib dataset."""
 
-    def __init__(self, task: str, pre_process: PreProcessor):
+    def __init__(self, task: TaskType, pre_process: PreProcessor):
         super().__init__()
         self.task = task
         self.pre_process = pre_process
@@ -107,16 +109,16 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
         """
 
         image_path = self._samples.iloc[index].image_path
-        image = read_image(image_path)
+        mask_path = self._samples.iloc[index].mask_path
         label_index = self._samples.iloc[index].label_index
 
+        image = read_image(image_path)
         item = dict(image_path=image_path, label=label_index)
 
-        if self.task == "classification":
+        if self.task == TaskType.CLASSIFICATION:
             pre_processed = self.pre_process(image=image)
-        elif self.task == "segmentation":
-            mask_path = self._samples.iloc[index].mask_path
-
+            item["image"] = pre_processed["image"]
+        elif self.task in [TaskType.DETECTION, TaskType.SEGMENTATION]:
             # Only Anomalous (1) images have masks in anomaly datasets
             # Therefore, create empty mask for Normal (0) images.
             if label_index == 0:
@@ -126,11 +128,15 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
 
             pre_processed = self.pre_process(image=image, mask=mask)
 
+            item["image"] = pre_processed["image"]
             item["mask_path"] = mask_path
             item["mask"] = pre_processed["mask"]
+
+            if self.task == TaskType.DETECTION:
+                # create boxes from masks for detection task
+                item["boxes"] = masks_to_boxes(item["mask"])[0]
         else:
             raise ValueError(f"Unknown task type: {self.task}")
-        item["image"] = pre_processed["image"]
 
         return item
 

diff --git a/anomalib/data/base/video.py b/anomalib/data/base/video.py
@@ -7,6 +7,8 @@
 from torch import Tensor
 
 from anomalib.data.base.dataset import AnomalibDataset
+from anomalib.data.task_type import TaskType
+from anomalib.data.utils import masks_to_boxes
 from anomalib.data.utils.video import ClipsIndexer
 from anomalib.pre_processing import PreProcessor
 
@@ -21,7 +23,9 @@ class VideoAnomalibDataset(AnomalibDataset, ABC):
         frames_between_clips (int): Number of frames between each consecutive video clip.
     """
 
-    def __init__(self, task: str, pre_process: PreProcessor, clip_length_in_frames: int, frames_between_clips: int):
+    def __init__(
+        self, task: TaskType, pre_process: PreProcessor, clip_length_in_frames: int, frames_between_clips: int
+    ):
         super().__init__(task, pre_process)
 
         self.clip_length_in_frames = clip_length_in_frames
@@ -74,9 +78,15 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
                 self.pre_process(image=frame.numpy(), mask=mask) for frame, mask in zip(item["image"], item["mask"])
             ]
             item["image"] = torch.stack([item["image"] for item in processed_frames]).squeeze(0)
-            mask = item["mask"]
+            mask = Tensor(item["mask"])
             item["mask"] = torch.stack([item["mask"] for item in processed_frames]).squeeze(0)
             item["label"] = Tensor([1 in frame for frame in mask]).int().squeeze(0)
+            if self.task == TaskType.DETECTION:
+                item["boxes"] = [
+                    torch.empty((0, 4)) if frame.max() == 0 else masks_to_boxes(frame)
+                    for frame in item["mask"].view((-1, 1) + item["mask"].shape[-2:])
+                ]
+                item["boxes"] = item["boxes"][0] if len(item["boxes"]) == 1 else item["boxes"]
         else:
             item["image"] = torch.stack(
                 [self.pre_process(image=frame.numpy())["image"] for frame in item["image"]]

diff --git a/anomalib/data/btech.py b/anomalib/data/btech.py
@@ -24,6 +24,7 @@
 from tqdm import tqdm
 
 from anomalib.data.base import AnomalibDataModule, AnomalibDataset
+from anomalib.data.task_type import TaskType
 from anomalib.data.utils import DownloadProgressBar, Split, ValSplitMode, hash_check
 from anomalib.pre_processing import PreProcessor
 
@@ -114,7 +115,7 @@ def __init__(
         category: str,
         pre_process: PreProcessor,
         split: Optional[Union[Split, str]] = None,
-        task: str = "segmentation",
+        task: TaskType = TaskType.SEGMENTATION,
     ) -> None:
         """Btech Dataset class.
 
@@ -123,7 +124,7 @@ def __init__(
             category: Name of the BTech category.
             pre_process: List of pre_processing object containing albumentation compose.
             split: 'train', 'val' or 'test'
-            task: ``classification`` or ``segmentation``
+            task: ``classification``, ``detection`` or ``segmentation``
             create_validation_set: Create a validation subset in addition to the train and test subsets
 
         Examples:
@@ -177,7 +178,7 @@ def __init__(
         train_batch_size: int = 32,
         eval_batch_size: int = 32,
         num_workers: int = 8,
-        task: str = "segmentation",
+        task: TaskType = TaskType.SEGMENTATION,
         transform_config_train: Optional[Union[str, A.Compose]] = None,
         transform_config_eval: Optional[Union[str, A.Compose]] = None,
         val_split_mode: ValSplitMode = ValSplitMode.SAME_AS_TEST,
@@ -192,7 +193,7 @@ def __init__(
             train_batch_size: Training batch size.
             test_batch_size: Testing batch size.
             num_workers: Number of workers.
-            task: ``classification`` or ``segmentation``
+            task: ``classification``, ``detection`` or ``segmentation``
             transform_config_train: Config for pre-processing during training.
             transform_config_val: Config for pre-processing during validation.
             create_validation_set: Create a validation subset in addition to the train and test subsets

diff --git a/anomalib/data/folder.py b/anomalib/data/folder.py
@@ -14,6 +14,7 @@
 from torchvision.datasets.folder import IMG_EXTENSIONS
 
 from anomalib.data.base import AnomalibDataModule, AnomalibDataset
+from anomalib.data.task_type import TaskType
 from anomalib.data.utils import Split, ValSplitMode, random_split
 from anomalib.pre_processing.pre_process import PreProcessor
 
@@ -141,7 +142,7 @@ class FolderDataset(AnomalibDataset):
     """Folder dataset.
 
     Args:
-        task (str): Task type. (classification or segmentation).
+        task (TaskType): Task type. (classification, detection or segmentation).
         pre_process (PreProcessor): Image Pre-processor to apply transform.
         split (Optional[Union[Split, str]]): Fixed subset split that follows from folder structure on file system.
             Choose from [Split.FULL, Split.TRAIN, Split.TEST]
@@ -165,7 +166,7 @@ class FolderDataset(AnomalibDataset):
 
     def __init__(
         self,
-        task: str,
+        task: TaskType,
         pre_process: PreProcessor,
         root: Union[str, Path],
         normal_dir: Union[str, Path],
@@ -222,8 +223,8 @@ class Folder(AnomalibDataModule):
         train_batch_size (int, optional): Training batch size. Defaults to 32.
         test_batch_size (int, optional): Test batch size. Defaults to 32.
         num_workers (int, optional): Number of workers. Defaults to 8.
-        task (str, optional): Task type. Could be either classification or segmentation.
-            Defaults to "classification".
+        task (TaskType, optional): Task type. Could be classification, detection or segmentation.
+            Defaults to segmentation.
         transform_config_train (Optional[Union[str, A.Compose]], optional): Config for pre-processing
             during training.
             Defaults to None.
@@ -248,7 +249,7 @@ def __init__(
         train_batch_size: int = 32,
         eval_batch_size: int = 32,
         num_workers: int = 8,
-        task: str = "segmentation",
+        task: TaskType = TaskType.SEGMENTATION,
         transform_config_train: Optional[Union[str, A.Compose]] = None,
         transform_config_eval: Optional[Union[str, A.Compose]] = None,
         val_split_mode: ValSplitMode = ValSplitMode.FROM_TEST,

diff --git a/anomalib/data/mvtec.py b/anomalib/data/mvtec.py
@@ -33,6 +33,7 @@
 from pandas import DataFrame
 
 from anomalib.data.base import AnomalibDataModule, AnomalibDataset
+from anomalib.data.task_type import TaskType
 from anomalib.data.utils import DownloadProgressBar, Split, ValSplitMode, hash_check
 from anomalib.pre_processing import PreProcessor
 
@@ -123,7 +124,7 @@ class MVTecDataset(AnomalibDataset):
     """MVTec dataset class.
 
     Args:
-        task (str): Task type, either 'classification' or 'segmentation'
+        task (TaskType): Task type,'classification', 'detection' or 'segmentation'
         pre_process (PreProcessor): Pre-processor object
         split (Optional[Union[Split, str]]): Split of the dataset, usually Split.TRAIN or Split.TEST
         root (str): Path to the root of the dataset
@@ -132,7 +133,7 @@ class MVTecDataset(AnomalibDataset):
 
     def __init__(
         self,
-        task: str,
+        task: TaskType,
         pre_process: PreProcessor,
         root: str,
         category: str,
@@ -158,7 +159,7 @@ def __init__(
         train_batch_size: int = 32,
         eval_batch_size: int = 32,
         num_workers: int = 8,
-        task: str = "segmentation",
+        task: TaskType = TaskType.SEGMENTATION,
         transform_config_train: Optional[Union[str, A.Compose]] = None,
         transform_config_eval: Optional[Union[str, A.Compose]] = None,
         val_split_mode: ValSplitMode = ValSplitMode.SAME_AS_TEST,

diff --git a/anomalib/data/task_type.py b/anomalib/data/task_type.py
@@ -0,0 +1,11 @@
+"""Task type enum."""
+
+from enum import Enum
+
+
+class TaskType(str, Enum):
+    """Task type used when generating predictions on the dataset."""
+
+    CLASSIFICATION = "classification"
+    DETECTION = "detection"
+    SEGMENTATION = "segmentation"
diff --git a/anomalib/data/ucsd_ped.py b/anomalib/data/ucsd_ped.py
@@ -16,6 +16,7 @@
 
 from anomalib.data.base import AnomalibDataModule
 from anomalib.data.base.video import VideoAnomalibDataset
+from anomalib.data.task_type import TaskType
 from anomalib.data.utils import (
     DownloadProgressBar,
     Split,
@@ -138,7 +139,7 @@ class UCSDpedDataset(VideoAnomalibDataset):
     """UCSDped Dataset class.
 
     Args:
-        task (str): Task type, either 'classification' or 'segmentation'
+        task (TaskType): Task type, 'classification', 'detection' or 'segmentation'
         root (str): Path to the root of the dataset
         category (str): Sub-category of the dataset, e.g. 'bottle'
         pre_process (PreProcessor): Pre-processor object
@@ -149,7 +150,7 @@ class UCSDpedDataset(VideoAnomalibDataset):
 
     def __init__(
         self,
-        task: str,
+        task: TaskType,
         root: Union[Path, str],
         category: str,
         pre_process: PreProcessor,
@@ -176,7 +177,7 @@ class UCSDped(AnomalibDataModule):
         category (str): Sub-category of the dataset, e.g. 'bottle'
         clip_length_in_frames (int, optional): Number of video frames in each clip.
         frames_between_clips (int, optional): Number of frames between each consecutive video clip.
-        task (str): Task type, either 'classification' or 'segmentation'
+        task (TaskType): Task type, 'classification', 'detection' or 'segmentation'
         image_size (Optional[Union[int, Tuple[int, int]]], optional): Size of the input image.
             Defaults to None.
         train_batch_size (int, optional): Training batch size. Defaults to 32.
@@ -197,7 +198,7 @@ def __init__(
         category: str,
         clip_length_in_frames: int = 1,
         frames_between_clips: int = 1,
-        task: str = "segmentation",
+        task: TaskType = TaskType.SEGMENTATION,
         image_size: Optional[Union[int, Tuple[int, int]]] = None,
         train_batch_size: int = 32,
         eval_batch_size: int = 32,