openvinotoolkit · ashwinvaidya17 · Aug 17, 2023 · Aug 14, 2023 · Aug 14, 2023 · Aug 14, 2023
@@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Add support for receiving dataset paths as a list by @harimkang in https://github.com/openvinotoolkit/anomalib/pull/1265
+
 ### Changed
 
 - Enable training with only normal images for MVTecv in https://github.com/openvinotoolkit/anomalib/pull/1241

@@ -107,6 +107,20 @@ Let's choose `Padim algorithm <https://arxiv.org/pdf/2011.08785.pdf>`_, copy the
         ...
 
 
+.. note::
+
+    Each dir value can take multiple folder inputs in the form of a list as shown below.
+
+    .. code-block:: yaml
+        dataset:
+        normal_dir: ["normal_1", "normal_2"] # Reads the images in the folders in the list.
+        abnormal_dir: # List configurations in the universal YAML format can also be used.
+            - abnormal_1
+            - abnormal_2
+        mask_dir: ["mask_1", "mask_2"] # optional
+        normal_test_dir: null # optional
+
+
 Step 5: Run Training
 --------------------
 

@@ -8,6 +8,7 @@
 from __future__ import annotations
 
 from pathlib import Path
+from typing import List, Sequence
 
 import albumentations as A
 from pandas import DataFrame
@@ -27,23 +28,23 @@
 
 
 def make_folder_dataset(
-    normal_dir: str | Path,
+    normal_dir: str | Path | Sequence[str | Path],
     root: str | Path | None = None,
-    abnormal_dir: str | Path | None = None,
-    normal_test_dir: str | Path | None = None,
-    mask_dir: str | Path | None = None,
+    abnormal_dir: str | Path | Sequence[str | Path] | None = None,
+    normal_test_dir: str | Path | Sequence[str | Path] | None = None,
+    mask_dir: str | Path | Sequence[str | Path] | None = None,
     split: str | Split | None = None,
     extensions: tuple[str, ...] | None = None,
 ) -> DataFrame:
     """Make Folder Dataset.
     Args:
-        normal_dir (str | Path): Path to the directory containing normal images.
+        normal_dir (str | Path | Sequence): Path to the directory containing normal images.
         root (str | Path | None): Path to the root directory of the dataset.
-        abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images.
-        normal_test_dir (str | Path | None, optional): Path to the directory containing
+        abnormal_dir (str | Path | Sequence | None, optional): Path to the directory containing abnormal images.
+        normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing
             normal images for the test dataset. Normal test images will be a split of `normal_dir`
             if `None`. Defaults to None.
-        mask_dir (str | Path | None, optional): Path to the directory containing
+        mask_dir (str | Path | Sequence | None, optional): Path to the directory containing
             the mask annotations. Defaults to None.
         split (str | Split | None, optional): Dataset split (ie., Split.FULL, Split.TRAIN or Split.TEST).
             Defaults to None.
@@ -52,11 +53,24 @@ def make_folder_dataset(
     Returns:
         DataFrame: an output dataframe containing samples for the requested split (ie., train or test)
     """
-    normal_dir = _resolve_path(normal_dir, root)
-    abnormal_dir = _resolve_path(abnormal_dir, root) if abnormal_dir is not None else None
-    normal_test_dir = _resolve_path(normal_test_dir, root) if normal_test_dir is not None else None
-    mask_dir = _resolve_path(mask_dir, root) if mask_dir is not None else None
-    assert normal_dir.is_dir(), "A folder location must be provided in normal_dir."
+
+    def _path_to_list_config_with_resolve_path(path: str | Path | Sequence[str | Path] | None) -> List[Path]:
+        """Function for changing path to List[Path].
+        Args:
+            path (str | Path | Sequence | None): Path to replace with Sequence[str | Path].
+        Returns:
+            List[Path]: The result of path replaced by Sequence[str | Path].
+        """
+        if isinstance(path, Sequence) and not isinstance(path, str):
+            return [_resolve_path(dir_path, root) for dir_path in path]
+        return [_resolve_path(path, root)] if path is not None else []
+
+    # All paths are changed to the List[Path] type and used.
+    normal_dir = _path_to_list_config_with_resolve_path(normal_dir)
+    abnormal_dir = _path_to_list_config_with_resolve_path(abnormal_dir)
+    normal_test_dir = _path_to_list_config_with_resolve_path(normal_test_dir)
+    mask_dir = _path_to_list_config_with_resolve_path(mask_dir)
+    assert len(normal_dir) > 0, "A folder location must be provided in normal_dir."
 
     filenames = []
     labels = []
@@ -71,10 +85,11 @@ def make_folder_dataset(
     if mask_dir:
         dirs = {**dirs, **{DirType.MASK: mask_dir}}
 
-    for dir_type, path in dirs.items():
-        filename, label = _prepare_files_labels(path, dir_type, extensions)
-        filenames += filename
-        labels += label
+    for dir_type, paths in dirs.items():
+        for path in paths:
+            filename, label = _prepare_files_labels(path, dir_type, extensions)
+            filenames += filename
+            labels += label
 
     samples = DataFrame({"image_path": filenames, "label": labels})
     samples = samples.sort_values(by="image_path", ignore_index=True)
@@ -88,7 +103,7 @@ def make_folder_dataset(
 
     # If a path to mask is provided, add it to the sample dataframe.
 
-    if mask_dir is not None and abnormal_dir is not None:
+    if len(mask_dir) > 0 and len(abnormal_dir) > 0:
         samples.loc[samples.label == DirType.ABNORMAL, "mask_path"] = samples.loc[
             samples.label == DirType.MASK
         ].image_path.values
@@ -136,12 +151,12 @@ class FolderDataset(AnomalibDataset):
         transform (A.Compose): Albumentations Compose object describing the transforms that are applied to the inputs.
         split (str | Split | None): Fixed subset split that follows from folder structure on file system.
             Choose from [Split.FULL, Split.TRAIN, Split.TEST]
-        normal_dir (str | Path): Path to the directory containing normal images.
+        normal_dir (str | Path | Sequence): Path to the directory containing normal images.
         root (str | Path | None): Root folder of the dataset.
-        abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images.
-        normal_test_dir (str | Path | None, optional): Path to the directory containing
+        abnormal_dir (str | Path | Sequence | None, optional): Path to the directory containing abnormal images.
+        normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing
             normal images for the test dataset. Defaults to None.
-        mask_dir (str | Path | None, optional): Path to the directory containing
+        mask_dir (str | Path | Sequence | None, optional): Path to the directory containing
             the mask annotations. Defaults to None.
         extensions (tuple[str, ...] | None, optional): Type of the image extensions to read from the
             directory.
@@ -155,11 +170,11 @@ def __init__(
         self,
         task: TaskType,
         transform: A.Compose,
-        normal_dir: str | Path,
+        normal_dir: str | Path | Sequence[str | Path],
         root: str | Path | None = None,
-        abnormal_dir: str | Path | None = None,
-        normal_test_dir: str | Path | None = None,
-        mask_dir: str | Path | None = None,
+        abnormal_dir: str | Path | Sequence[str | Path] | None = None,
+        normal_test_dir: str | Path | Sequence[str | Path] | None = None,
+        mask_dir: str | Path | Sequence[str | Path] | None = None,
         split: str | Split | None = None,
         extensions: tuple[str, ...] | None = None,
     ) -> None:
@@ -189,14 +204,14 @@ def _setup(self) -> None:
 class Folder(AnomalibDataModule):
     """Folder DataModule.
     Args:
-        normal_dir (str | Path): Name of the directory containing normal images.
+        normal_dir (str | Path | Sequence): Name of the directory containing normal images.
             Defaults to "normal".
         root (str | Path | None): Path to the root folder containing normal and abnormal dirs.
-        abnormal_dir (str | Path | None): Name of the directory containing abnormal images.
+        abnormal_dir (str | Path | None | Sequence): Name of the directory containing abnormal images.
             Defaults to "abnormal".
-        normal_test_dir (str | Path | None, optional): Path to the directory containing
+        normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing
             normal images for the test dataset. Defaults to None.
-        mask_dir (str | Path | None, optional): Path to the directory containing
+        mask_dir (str | Path | Sequence | None, optional): Path to the directory containing
             the mask annotations. Defaults to None.
         normal_split_ratio (float, optional): Ratio to split normal training images and add to the
             test set in case test set doesn't contain any normal images.
@@ -228,11 +243,11 @@ class Folder(AnomalibDataModule):
 
     def __init__(
         self,
-        normal_dir: str | Path,
+        normal_dir: str | Path | Sequence[str | Path],
         root: str | Path | None = None,
-        abnormal_dir: str | Path | None = None,
-        normal_test_dir: str | Path | None = None,
-        mask_dir: str | Path | None = None,
+        abnormal_dir: str | Path | Sequence[str | Path] | None = None,
+        normal_test_dir: str | Path | Sequence[str | Path] | None = None,
+        mask_dir: str | Path | Sequence[str | Path] | None = None,
         normal_split_ratio: float = 0.2,
         extensions: tuple[str] | None = None,
         image_size: int | tuple[int, int] | None = None,

@@ -16,6 +16,7 @@
     Visa,
     get_datamodule,
 )
+from anomalib.data.utils import DirType
 from anomalib.pre_processing.transforms import Denormalize, ToNumpy
 from tests.helpers.config import get_test_configurable_parameters
 from tests.helpers.dataset import TestDataset, get_dataset_path
@@ -249,6 +250,18 @@ def test_equal_splits(self, make_data_module, dataset):
             data_module.val_data.samples["image_path"].values == data_module.test_data.samples["image_path"].values
         )
 
+    def test_folder_sequence_inputs(self, make_data_module, dataset):
+        """This test ensures that val and test split are equal when split mode == same_as_test."""
+        if dataset == "folder":
+            _large = make_data_module(dataset=dataset, abnormal_dir="broken_large")
+            len_large = len(_large.val_data.samples.loc[_large.val_data.samples.label == DirType.ABNORMAL])
+            _small = make_data_module(dataset=dataset, abnormal_dir="broken_small")
+            len_small = len(_small.val_data.samples.loc[_small.val_data.samples.label == DirType.ABNORMAL])
+
+            data_module = make_data_module(dataset=dataset, abnormal_dir=["broken_large", "broken_small"])
+            len_broken = len(data_module.val_data.samples.loc[data_module.val_data.samples.label == DirType.ABNORMAL])
+            assert len_broken == len_large + len_small
+
 
 class TestDenormalize:
     """Test Denormalize Util."""