Skip to content

Commit

Permalink
Add support for receiving folder dataset paths as a list (#1265)
Browse files Browse the repository at this point in the history
* Add List input as folder path

* Modify CHANGELOG.md

* Update if not to if

* Update ListConfig to Sequence

* Add test case for folder dataset with list input.

* Rename function & reflect reviews

* Change test dataset bottle to hazelnut_toy

* Change docstring

* Fix test function docstring

* Recheck test dataset paths

* mask_dir to None

---------

Co-authored-by: Samet Akcay <samet.akcay@intel.com>
  • Loading branch information
harimkang and samet-akcay authored Aug 17, 2023
1 parent 2332aae commit 74b1c6a
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 35 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Added

- Add support for receiving dataset paths as a list by @harimkang in https://github.com/openvinotoolkit/anomalib/pull/1265

### Changed

- Enable training with only normal images for MVTecv in https://github.com/openvinotoolkit/anomalib/pull/1241
Expand Down
14 changes: 14 additions & 0 deletions docs/source/how_to_guides/train_custom_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,20 @@ Let's choose `Padim algorithm <https://arxiv.org/pdf/2011.08785.pdf>`_, copy the
...
.. note::

Each dir value can take multiple folder inputs in the form of a list as shown below.

.. code-block:: yaml
dataset:
normal_dir: ["normal_1", "normal_2"] # Reads the images in the folders in the list.
abnormal_dir: # List configurations in the universal YAML format can also be used.
- abnormal_1
- abnormal_2
mask_dir: ["mask_1", "mask_2"] # optional
normal_test_dir: null # optional
Step 5: Run Training
--------------------

Expand Down
88 changes: 54 additions & 34 deletions src/anomalib/data/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

from pathlib import Path
from typing import Sequence

import albumentations as A
from pandas import DataFrame
Expand All @@ -27,23 +28,23 @@


def make_folder_dataset(
normal_dir: str | Path,
normal_dir: str | Path | Sequence[str | Path],
root: str | Path | None = None,
abnormal_dir: str | Path | None = None,
normal_test_dir: str | Path | None = None,
mask_dir: str | Path | None = None,
abnormal_dir: str | Path | Sequence[str | Path] | None = None,
normal_test_dir: str | Path | Sequence[str | Path] | None = None,
mask_dir: str | Path | Sequence[str | Path] | None = None,
split: str | Split | None = None,
extensions: tuple[str, ...] | None = None,
) -> DataFrame:
"""Make Folder Dataset.
Args:
normal_dir (str | Path): Path to the directory containing normal images.
normal_dir (str | Path | Sequence): Path to the directory containing normal images.
root (str | Path | None): Path to the root directory of the dataset.
abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | None, optional): Path to the directory containing
abnormal_dir (str | Path | Sequence | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing
normal images for the test dataset. Normal test images will be a split of `normal_dir`
if `None`. Defaults to None.
mask_dir (str | Path | None, optional): Path to the directory containing
mask_dir (str | Path | Sequence | None, optional): Path to the directory containing
the mask annotations. Defaults to None.
split (str | Split | None, optional): Dataset split (ie., Split.FULL, Split.TRAIN or Split.TEST).
Defaults to None.
Expand All @@ -52,11 +53,29 @@ def make_folder_dataset(
Returns:
DataFrame: an output dataframe containing samples for the requested split (ie., train or test)
"""
normal_dir = _resolve_path(normal_dir, root)
abnormal_dir = _resolve_path(abnormal_dir, root) if abnormal_dir is not None else None
normal_test_dir = _resolve_path(normal_test_dir, root) if normal_test_dir is not None else None
mask_dir = _resolve_path(mask_dir, root) if mask_dir is not None else None
assert normal_dir.is_dir(), "A folder location must be provided in normal_dir."

def _resolve_path_and_convert_to_list(path: str | Path | Sequence[str | Path] | None) -> list[Path]:
"""Convert path to list of paths.
Args:
path (str | Path | Sequence | None): Path to replace with Sequence[str | Path].
Examples:
>>> _resolve_path_and_convert_to_list("dir")
[Path("path/to/dir")]
>>> _resolve_path_and_convert_to_list(["dir1", "dir2"])
[Path("path/to/dir1"), Path("path/to/dir2")]
Returns:
list[Path]: The result of path replaced by Sequence[str | Path].
"""
if isinstance(path, Sequence) and not isinstance(path, str):
return [_resolve_path(dir_path, root) for dir_path in path]
return [_resolve_path(path, root)] if path is not None else []

# All paths are changed to the List[Path] type and used.
normal_dir = _resolve_path_and_convert_to_list(normal_dir)
abnormal_dir = _resolve_path_and_convert_to_list(abnormal_dir)
normal_test_dir = _resolve_path_and_convert_to_list(normal_test_dir)
mask_dir = _resolve_path_and_convert_to_list(mask_dir)
assert len(normal_dir) > 0, "A folder location must be provided in normal_dir."

filenames = []
labels = []
Expand All @@ -71,10 +90,11 @@ def make_folder_dataset(
if mask_dir:
dirs = {**dirs, **{DirType.MASK: mask_dir}}

for dir_type, path in dirs.items():
filename, label = _prepare_files_labels(path, dir_type, extensions)
filenames += filename
labels += label
for dir_type, paths in dirs.items():
for path in paths:
filename, label = _prepare_files_labels(path, dir_type, extensions)
filenames += filename
labels += label

samples = DataFrame({"image_path": filenames, "label": labels})
samples = samples.sort_values(by="image_path", ignore_index=True)
Expand All @@ -88,7 +108,7 @@ def make_folder_dataset(

# If a path to mask is provided, add it to the sample dataframe.

if mask_dir is not None and abnormal_dir is not None:
if len(mask_dir) > 0 and len(abnormal_dir) > 0:
samples.loc[samples.label == DirType.ABNORMAL, "mask_path"] = samples.loc[
samples.label == DirType.MASK
].image_path.values
Expand Down Expand Up @@ -136,12 +156,12 @@ class FolderDataset(AnomalibDataset):
transform (A.Compose): Albumentations Compose object describing the transforms that are applied to the inputs.
split (str | Split | None): Fixed subset split that follows from folder structure on file system.
Choose from [Split.FULL, Split.TRAIN, Split.TEST]
normal_dir (str | Path): Path to the directory containing normal images.
normal_dir (str | Path | Sequence): Path to the directory containing normal images.
root (str | Path | None): Root folder of the dataset.
abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | None, optional): Path to the directory containing
abnormal_dir (str | Path | Sequence | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
mask_dir (str | Path | None, optional): Path to the directory containing
mask_dir (str | Path | Sequence | None, optional): Path to the directory containing
the mask annotations. Defaults to None.
extensions (tuple[str, ...] | None, optional): Type of the image extensions to read from the
directory.
Expand All @@ -155,11 +175,11 @@ def __init__(
self,
task: TaskType,
transform: A.Compose,
normal_dir: str | Path,
normal_dir: str | Path | Sequence[str | Path],
root: str | Path | None = None,
abnormal_dir: str | Path | None = None,
normal_test_dir: str | Path | None = None,
mask_dir: str | Path | None = None,
abnormal_dir: str | Path | Sequence[str | Path] | None = None,
normal_test_dir: str | Path | Sequence[str | Path] | None = None,
mask_dir: str | Path | Sequence[str | Path] | None = None,
split: str | Split | None = None,
extensions: tuple[str, ...] | None = None,
) -> None:
Expand Down Expand Up @@ -189,14 +209,14 @@ def _setup(self) -> None:
class Folder(AnomalibDataModule):
"""Folder DataModule.
Args:
normal_dir (str | Path): Name of the directory containing normal images.
normal_dir (str | Path | Sequence): Name of the directory containing normal images.
Defaults to "normal".
root (str | Path | None): Path to the root folder containing normal and abnormal dirs.
abnormal_dir (str | Path | None): Name of the directory containing abnormal images.
abnormal_dir (str | Path | None | Sequence): Name of the directory containing abnormal images.
Defaults to "abnormal".
normal_test_dir (str | Path | None, optional): Path to the directory containing
normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
mask_dir (str | Path | None, optional): Path to the directory containing
mask_dir (str | Path | Sequence | None, optional): Path to the directory containing
the mask annotations. Defaults to None.
normal_split_ratio (float, optional): Ratio to split normal training images and add to the
test set in case test set doesn't contain any normal images.
Expand Down Expand Up @@ -228,11 +248,11 @@ class Folder(AnomalibDataModule):

def __init__(
self,
normal_dir: str | Path,
normal_dir: str | Path | Sequence[str | Path],
root: str | Path | None = None,
abnormal_dir: str | Path | None = None,
normal_test_dir: str | Path | None = None,
mask_dir: str | Path | None = None,
abnormal_dir: str | Path | Sequence[str | Path] | None = None,
normal_test_dir: str | Path | Sequence[str | Path] | None = None,
mask_dir: str | Path | Sequence[str | Path] | None = None,
normal_split_ratio: float = 0.2,
extensions: tuple[str] | None = None,
image_size: int | tuple[int, int] | None = None,
Expand Down
34 changes: 33 additions & 1 deletion tests/pre_merge/datasets/test_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Visa,
get_datamodule,
)
from anomalib.data.utils import DirType
from anomalib.pre_processing.transforms import Denormalize, ToNumpy
from tests.helpers.config import get_test_configurable_parameters
from tests.helpers.dataset import TestDataset, get_dataset_path
Expand Down Expand Up @@ -90,9 +91,10 @@ def make_folder_data_module(
abnormal_dir="broken_large",
normal_test_dir="good_test",
mask_dir="ground_truth/broken_large",
dataset_name="bottle",
):
"""Create Folder Data Module."""
root = get_dataset_path(dataset="bottle")
root = get_dataset_path(dataset=dataset_name)
data_module = Folder(
root=root,
normal_dir=normal_dir,
Expand Down Expand Up @@ -249,6 +251,36 @@ def test_equal_splits(self, make_data_module, dataset):
data_module.val_data.samples["image_path"].values == data_module.test_data.samples["image_path"].values
)

def test_folder_sequence_inputs(self, make_data_module, dataset):
"""This test ensures that the list folder input is working well."""
if dataset == "folder":
_colour = make_data_module(
dataset=dataset,
abnormal_dir="colour",
dataset_name="hazelnut_toy",
normal_test_dir="good",
mask_dir=None,
)
len_colour = len(_colour.val_data.samples.loc[_colour.val_data.samples.label == DirType.ABNORMAL])
_crack = make_data_module(
dataset=dataset,
abnormal_dir="crack",
dataset_name="hazelnut_toy",
normal_test_dir="good",
mask_dir=None,
)
len_crack = len(_crack.val_data.samples.loc[_crack.val_data.samples.label == DirType.ABNORMAL])

data_module = make_data_module(
dataset=dataset,
abnormal_dir=["colour", "crack"],
dataset_name="hazelnut_toy",
normal_test_dir="good",
mask_dir=None,
)
len_merged = len(data_module.val_data.samples.loc[data_module.val_data.samples.label == DirType.ABNORMAL])
assert len_merged == len_colour + len_crack


class TestDenormalize:
"""Test Denormalize Util."""
Expand Down

0 comments on commit 74b1c6a

Please sign in to comment.