openvinotoolkit · djdameln · Dec 19, 2022 · Dec 15, 2022 · Dec 15, 2022 · Dec 15, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
+- Make normalization and center cropping configurable through config (<https://github.com/openvinotoolkit/anomalib/pull/795>)
 - Switch to new [changelog format](https://keepachangelog.com/en/1.0.0/). (<https://github.com/openvinotoolkit/anomalib/pull/777>)
 - Rename feature to task (<https://github.com/openvinotoolkit/anomalib/pull/769>)
 - make device configurable in OpenVINO inference (<https://github.com/openvinotoolkit/anomalib/pull/755>)
@@ -28,6 +29,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - 📊 Update DFM results (<https://github.com/openvinotoolkit/anomalib/pull/674>)
 - Optimize anomaly score calculation for PatchCore (<https://github.com/openvinotoolkit/anomalib/pull/633>)
 
+### Deprecated
+
+- Deprecated PreProcessor class (<https://github.com/openvinotoolkit/anomalib/pull/795>)
+
 ### Fixed
 
 - Fix PatchCore performance deterioration by reverting changes to Average Pooling layer (<https://github.com/openvinotoolkit/anomalib/pull/791>)

diff --git a/anomalib/config/config.py b/anomalib/config/config.py
@@ -32,11 +32,27 @@ def update_input_size_config(config: Union[DictConfig, ListConfig]) -> Union[Dic
     Returns:
         Union[DictConfig, ListConfig]: Configurable parameters with updated values
     """
-    # handle image size
-    if isinstance(config.dataset.image_size, int):
-        config.dataset.image_size = (config.dataset.image_size,) * 2
-
-    config.model.input_size = config.dataset.image_size
+    # Image size: Ensure value is in the form [height, width]
+    image_size = config.dataset.get("image_size")
+    if isinstance(image_size, int):
+        config.dataset.image_size = (image_size,) * 2
+    elif isinstance(image_size, ListConfig):
+        assert len(image_size) == 2, "image_size must be a single integer or tuple of length 2 for width and height."
+    else:
+        raise ValueError(f"image_size must be either int or ListConfig, got {type(image_size)}")
+
+    # Center crop: Ensure value is in the form [height, width], and update input_size
+    center_crop = config.dataset.get("center_crop")
+    if center_crop is None:
+        config.model.input_size = config.dataset.image_size
+    elif isinstance(center_crop, int):
+        config.dataset.center_crop = (center_crop,) * 2
+        config.model.input_size = config.dataset.center_crop
+    elif isinstance(center_crop, ListConfig):
+        assert len(center_crop) == 2, "center_crop must be a single integer or tuple of length 2 for width and height."
+        config.model.input_size = center_crop
+    else:
+        raise ValueError(f"center_crop must be either int or ListConfig, got {type(center_crop)}")
 
     if "tiling" in config.dataset.keys() and config.dataset.tiling.apply:
         if isinstance(config.dataset.tiling.tile_size, int):

diff --git a/anomalib/data/__init__.py b/anomalib/data/__init__.py
@@ -33,11 +33,18 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
 
     datamodule: AnomalibDataModule
 
+    # convert center crop to tuple
+    center_crop = config.dataset.get("center_crop")
+    if center_crop is not None:
+        center_crop = (center_crop[0], center_crop[1])
+
     if config.dataset.format.lower() == "mvtec":
         datamodule = MVTec(
             root=config.dataset.path,
             category=config.dataset.category,
             image_size=(config.dataset.image_size[0], config.dataset.image_size[1]),
+            center_crop=center_crop,
+            normalization=config.dataset.normalization,
             train_batch_size=config.dataset.train_batch_size,
             eval_batch_size=config.dataset.eval_batch_size,
             num_workers=config.dataset.num_workers,
@@ -54,6 +61,8 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
             root=config.dataset.path,
             category=config.dataset.category,
             image_size=(config.dataset.image_size[0], config.dataset.image_size[1]),
+            center_crop=center_crop,
+            normalization=config.dataset.normalization,
             train_batch_size=config.dataset.train_batch_size,
             eval_batch_size=config.dataset.eval_batch_size,
             num_workers=config.dataset.num_workers,
@@ -75,6 +84,8 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
             mask_dir=config.dataset.mask_dir,
             extensions=config.dataset.extensions,
             image_size=(config.dataset.image_size[0], config.dataset.image_size[1]),
+            center_crop=center_crop,
+            normalization=config.dataset.normalization,
             train_batch_size=config.dataset.train_batch_size,
             eval_batch_size=config.dataset.eval_batch_size,
             num_workers=config.dataset.num_workers,
@@ -93,6 +104,8 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
             clip_length_in_frames=config.dataset.clip_length_in_frames,
             frames_between_clips=config.dataset.frames_between_clips,
             image_size=(config.dataset.image_size[0], config.dataset.image_size[1]),
+            center_crop=center_crop,
+            normalization=config.dataset.normalization,
             transform_config_train=config.dataset.transform_config.train,
             transform_config_eval=config.dataset.transform_config.eval,
             train_batch_size=config.dataset.train_batch_size,
@@ -109,6 +122,8 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
             clip_length_in_frames=config.dataset.clip_length_in_frames,
             frames_between_clips=config.dataset.frames_between_clips,
             image_size=(config.dataset.image_size[0], config.dataset.image_size[1]),
+            center_crop=center_crop,
+            normalization=config.dataset.normalization,
             transform_config_train=config.dataset.transform_config.train,
             transform_config_eval=config.dataset.transform_config.eval,
             train_batch_size=config.dataset.train_batch_size,

diff --git a/anomalib/data/avenue.py b/anomalib/data/avenue.py
@@ -28,9 +28,15 @@
 
 from anomalib.data.base import AnomalibVideoDataModule, AnomalibVideoDataset
 from anomalib.data.task_type import TaskType
-from anomalib.data.utils import DownloadProgressBar, Split, ValSplitMode, hash_check
+from anomalib.data.utils import (
+    DownloadProgressBar,
+    InputNormalizationMethod,
+    Split,
+    ValSplitMode,
+    get_transforms,
+    hash_check,
+)
 from anomalib.data.utils.video import ClipsIndexer
-from anomalib.pre_processing import PreProcessor
 
 logger = logging.getLogger(__name__)
 
@@ -128,7 +134,7 @@ class AvenueDataset(AnomalibVideoDataset):
         task (TaskType): Task type, 'classification', 'detection' or 'segmentation'
         root (str): Path to the root of the dataset
         gt_dir (str): Path to the ground truth files
-        pre_process (PreProcessor): Pre-processor object
+        transform (A.Compose): Albumentations Compose object describing the transforms that are applied to the inputs.
         split (Optional[Union[Split, str]]): Split of the dataset, usually Split.TRAIN or Split.TEST
         clip_length_in_frames (int, optional): Number of video frames in each clip.
         frames_between_clips (int, optional): Number of frames between each consecutive video clip.
@@ -139,12 +145,12 @@ def __init__(
         task: TaskType,
         root: Union[Path, str],
         gt_dir: str,
-        pre_process: PreProcessor,
+        transform: A.Compose,
         split: Split,
         clip_length_in_frames: int = 1,
         frames_between_clips: int = 1,
     ):
-        super().__init__(task, pre_process, clip_length_in_frames, frames_between_clips)
+        super().__init__(task, transform, clip_length_in_frames, frames_between_clips)
 
         self.root = root
         self.gt_dir = gt_dir
@@ -167,6 +173,9 @@ class Avenue(AnomalibVideoDataModule):
         task TaskType): Task type, 'classification', 'detection' or 'segmentation'
         image_size (Optional[Union[int, Tuple[int, int]]], optional): Size of the input image.
             Defaults to None.
+        center_crop (Optional[Union[int, Tuple[int, int]]], optional): When provided, the images will be center-cropped
+            to the provided dimensions.
+        normalize (bool): When True, the images will be normalized to the ImageNet statistics.
         train_batch_size (int, optional): Training batch size. Defaults to 32.
         eval_batch_size (int, optional): Test batch size. Defaults to 32.
         num_workers (int, optional): Number of workers. Defaults to 8.
@@ -189,6 +198,8 @@ def __init__(
         frames_between_clips: int = 1,
         task: TaskType = TaskType.SEGMENTATION,
         image_size: Optional[Union[int, Tuple[int, int]]] = None,
+        center_crop: Optional[Union[int, Tuple[int, int]]] = None,
+        normalization: Union[InputNormalizationMethod, str] = InputNormalizationMethod.IMAGENET,
         train_batch_size: int = 32,
         eval_batch_size: int = 32,
         num_workers: int = 8,
@@ -210,12 +221,22 @@ def __init__(
         self.root = Path(root)
         self.gt_dir = Path(gt_dir)
 
-        pre_process_train = PreProcessor(config=transform_config_train, image_size=image_size)
-        pre_process_eval = PreProcessor(config=transform_config_eval, image_size=image_size)
+        transform_train = get_transforms(
+            config=transform_config_train,
+            image_size=image_size,
+            center_crop=center_crop,
+            normalization=InputNormalizationMethod(normalization),
+        )
+        transform_eval = get_transforms(
+            config=transform_config_eval,
+            image_size=image_size,
+            center_crop=center_crop,
+            normalization=InputNormalizationMethod(normalization),
+        )
 
         self.train_data = AvenueDataset(
             task=task,
-            pre_process=pre_process_train,
+            transform=transform_train,
             clip_length_in_frames=clip_length_in_frames,
             frames_between_clips=frames_between_clips,
             root=root,
@@ -225,7 +246,7 @@ def __init__(
 
         self.test_data = AvenueDataset(
             task=task,
-            pre_process=pre_process_eval,
+            transform=transform_eval,
             clip_length_in_frames=clip_length_in_frames,
             frames_between_clips=frames_between_clips,
             root=root,

diff --git a/anomalib/data/base/dataset.py b/anomalib/data/base/dataset.py
@@ -11,6 +11,7 @@
 from pathlib import Path
 from typing import Dict, Sequence, Union
 
+import albumentations as A
 import cv2
 import numpy as np
 import pandas as pd
@@ -20,7 +21,6 @@
 
 from anomalib.data.task_type import TaskType
 from anomalib.data.utils import masks_to_boxes, read_image
-from anomalib.pre_processing import PreProcessor
 
 _EXPECTED_COLS_CLASSIFICATION = ["image_path", "split"]
 _EXPECTED_COLS_SEGMENTATION = _EXPECTED_COLS_CLASSIFICATION + ["mask_path"]
@@ -36,10 +36,10 @@
 class AnomalibDataset(Dataset, ABC):
     """Anomalib dataset."""
 
-    def __init__(self, task: TaskType, pre_process: PreProcessor):
+    def __init__(self, task: TaskType, transform: A.Compose):
         super().__init__()
         self.task = task
-        self.pre_process = pre_process
+        self.transform = transform
         self._samples: DataFrame = None
 
     def __len__(self) -> int:
@@ -116,7 +116,7 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
         item = dict(image_path=image_path, label=label_index)
 
         if self.task == TaskType.CLASSIFICATION:
-            pre_processed = self.pre_process(image=image)
+            pre_processed = self.transform(image=image)
             item["image"] = pre_processed["image"]
         elif self.task in [TaskType.DETECTION, TaskType.SEGMENTATION]:
             # Only Anomalous (1) images have masks in anomaly datasets
@@ -126,7 +126,7 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
             else:
                 mask = cv2.imread(mask_path, flags=0) / 255.0
 
-            pre_processed = self.pre_process(image=image, mask=mask)
+            pre_processed = self.transform(image=image, mask=mask)
 
             item["image"] = pre_processed["image"]
             item["mask_path"] = mask_path

diff --git a/anomalib/data/base/video.py b/anomalib/data/base/video.py
@@ -3,6 +3,7 @@
 from abc import ABC
 from typing import Callable, Dict, Optional, Union
 
+import albumentations as A
 import torch
 from torch import Tensor
 
@@ -11,7 +12,6 @@
 from anomalib.data.task_type import TaskType
 from anomalib.data.utils import ValSplitMode, masks_to_boxes
 from anomalib.data.utils.video import ClipsIndexer
-from anomalib.pre_processing import PreProcessor
 
 
 class AnomalibVideoDataset(AnomalibDataset, ABC):
@@ -24,14 +24,12 @@ class AnomalibVideoDataset(AnomalibDataset, ABC):
         frames_between_clips (int): Number of frames between each consecutive video clip.
     """
 
-    def __init__(
-        self, task: TaskType, pre_process: PreProcessor, clip_length_in_frames: int, frames_between_clips: int
-    ):
-        super().__init__(task, pre_process)
+    def __init__(self, task: TaskType, transform: A.Compose, clip_length_in_frames: int, frames_between_clips: int):
+        super().__init__(task, transform)
 
         self.clip_length_in_frames = clip_length_in_frames
         self.frames_between_clips = frames_between_clips
-        self.pre_process = pre_process
+        self.transform = transform
 
         self.indexer: Optional[ClipsIndexer] = None
         self.indexer_cls: Optional[Callable] = None
@@ -76,7 +74,7 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
         # apply transforms
         if "mask" in item and item["mask"] is not None:
             processed_frames = [
-                self.pre_process(image=frame.numpy(), mask=mask) for frame, mask in zip(item["image"], item["mask"])
+                self.transform(image=frame.numpy(), mask=mask) for frame, mask in zip(item["image"], item["mask"])
             ]
             item["image"] = torch.stack([item["image"] for item in processed_frames]).squeeze(0)
             mask = torch.as_tensor(item["mask"])
@@ -87,7 +85,7 @@ def __getitem__(self, index: int) -> Dict[str, Union[str, Tensor]]:
                 item["boxes"] = item["boxes"][0] if len(item["boxes"]) == 1 else item["boxes"]
         else:
             item["image"] = torch.stack(
-                [self.pre_process(image=frame.numpy())["image"] for frame in item["image"]]
+                [self.transform(image=frame.numpy())["image"] for frame in item["image"]]
             ).squeeze(0)
 
         if item["mask"] is None: