Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for receiving folder dataset paths as a list #1265

Merged
Merged
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Added

- Add support for receiving dataset paths as a list by @harimkang in https://github.com/openvinotoolkit/anomalib/pull/1265

### Changed

- Enable training with only normal images for MVTecv in https://github.com/openvinotoolkit/anomalib/pull/1241
Expand Down
14 changes: 14 additions & 0 deletions docs/source/how_to_guides/train_custom_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,20 @@ Let's choose `Padim algorithm <https://arxiv.org/pdf/2011.08785.pdf>`_, copy the
...
.. note::

Each dir value can take multiple folder inputs in the form of a list as shown below.

.. code-block:: yaml
dataset:
normal_dir: ["normal_1", "normal_2"] # Reads the images in the folders in the list.
abnormal_dir: # List configurations in the universal YAML format can also be used.
- abnormal_1
- abnormal_2
mask_dir: ["mask_1", "mask_2"] # optional
normal_test_dir: null # optional
Step 5: Run Training
--------------------

Expand Down
83 changes: 49 additions & 34 deletions src/anomalib/data/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

from pathlib import Path
from typing import List, Sequence

import albumentations as A
from pandas import DataFrame
Expand All @@ -27,23 +28,23 @@


def make_folder_dataset(
normal_dir: str | Path,
normal_dir: str | Path | Sequence[str | Path],
root: str | Path | None = None,
abnormal_dir: str | Path | None = None,
normal_test_dir: str | Path | None = None,
mask_dir: str | Path | None = None,
abnormal_dir: str | Path | Sequence[str | Path] | None = None,
normal_test_dir: str | Path | Sequence[str | Path] | None = None,
mask_dir: str | Path | Sequence[str | Path] | None = None,
split: str | Split | None = None,
extensions: tuple[str, ...] | None = None,
) -> DataFrame:
"""Make Folder Dataset.
Args:
normal_dir (str | Path): Path to the directory containing normal images.
normal_dir (str | Path | Sequence): Path to the directory containing normal images.
root (str | Path | None): Path to the root directory of the dataset.
abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | None, optional): Path to the directory containing
abnormal_dir (str | Path | Sequence | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing
normal images for the test dataset. Normal test images will be a split of `normal_dir`
if `None`. Defaults to None.
mask_dir (str | Path | None, optional): Path to the directory containing
mask_dir (str | Path | Sequence | None, optional): Path to the directory containing
the mask annotations. Defaults to None.
split (str | Split | None, optional): Dataset split (ie., Split.FULL, Split.TRAIN or Split.TEST).
Defaults to None.
Expand All @@ -52,11 +53,24 @@ def make_folder_dataset(
Returns:
DataFrame: an output dataframe containing samples for the requested split (ie., train or test)
"""
normal_dir = _resolve_path(normal_dir, root)
abnormal_dir = _resolve_path(abnormal_dir, root) if abnormal_dir is not None else None
normal_test_dir = _resolve_path(normal_test_dir, root) if normal_test_dir is not None else None
mask_dir = _resolve_path(mask_dir, root) if mask_dir is not None else None
assert normal_dir.is_dir(), "A folder location must be provided in normal_dir."

def _path_to_list_config_with_resolve_path(path: str | Path | Sequence[str | Path] | None) -> List[Path]:
samet-akcay marked this conversation as resolved.
Show resolved Hide resolved
samet-akcay marked this conversation as resolved.
Show resolved Hide resolved
"""Function for changing path to List[Path].
Args:
path (str | Path | Sequence | None): Path to replace with Sequence[str | Path].
Returns:
List[Path]: The result of path replaced by Sequence[str | Path].
"""
samet-akcay marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(path, Sequence) and not isinstance(path, str):
return [_resolve_path(dir_path, root) for dir_path in path]
return [_resolve_path(path, root)] if path is not None else []

# All paths are changed to the List[Path] type and used.
normal_dir = _path_to_list_config_with_resolve_path(normal_dir)
abnormal_dir = _path_to_list_config_with_resolve_path(abnormal_dir)
normal_test_dir = _path_to_list_config_with_resolve_path(normal_test_dir)
mask_dir = _path_to_list_config_with_resolve_path(mask_dir)
assert len(normal_dir) > 0, "A folder location must be provided in normal_dir."

filenames = []
labels = []
Expand All @@ -71,10 +85,11 @@ def make_folder_dataset(
if mask_dir:
dirs = {**dirs, **{DirType.MASK: mask_dir}}

for dir_type, path in dirs.items():
filename, label = _prepare_files_labels(path, dir_type, extensions)
filenames += filename
labels += label
for dir_type, paths in dirs.items():
for path in paths:
filename, label = _prepare_files_labels(path, dir_type, extensions)
filenames += filename
labels += label

samples = DataFrame({"image_path": filenames, "label": labels})
samples = samples.sort_values(by="image_path", ignore_index=True)
Expand All @@ -88,7 +103,7 @@ def make_folder_dataset(

# If a path to mask is provided, add it to the sample dataframe.

if mask_dir is not None and abnormal_dir is not None:
if len(mask_dir) > 0 and len(abnormal_dir) > 0:
samples.loc[samples.label == DirType.ABNORMAL, "mask_path"] = samples.loc[
samples.label == DirType.MASK
].image_path.values
Expand Down Expand Up @@ -136,12 +151,12 @@ class FolderDataset(AnomalibDataset):
transform (A.Compose): Albumentations Compose object describing the transforms that are applied to the inputs.
split (str | Split | None): Fixed subset split that follows from folder structure on file system.
Choose from [Split.FULL, Split.TRAIN, Split.TEST]
normal_dir (str | Path): Path to the directory containing normal images.
normal_dir (str | Path | Sequence): Path to the directory containing normal images.
root (str | Path | None): Root folder of the dataset.
abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | None, optional): Path to the directory containing
abnormal_dir (str | Path | Sequence | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
mask_dir (str | Path | None, optional): Path to the directory containing
mask_dir (str | Path | Sequence | None, optional): Path to the directory containing
the mask annotations. Defaults to None.
extensions (tuple[str, ...] | None, optional): Type of the image extensions to read from the
directory.
Expand All @@ -155,11 +170,11 @@ def __init__(
self,
task: TaskType,
transform: A.Compose,
normal_dir: str | Path,
normal_dir: str | Path | Sequence[str | Path],
root: str | Path | None = None,
abnormal_dir: str | Path | None = None,
normal_test_dir: str | Path | None = None,
mask_dir: str | Path | None = None,
abnormal_dir: str | Path | Sequence[str | Path] | None = None,
normal_test_dir: str | Path | Sequence[str | Path] | None = None,
mask_dir: str | Path | Sequence[str | Path] | None = None,
split: str | Split | None = None,
extensions: tuple[str, ...] | None = None,
) -> None:
Expand Down Expand Up @@ -189,14 +204,14 @@ def _setup(self) -> None:
class Folder(AnomalibDataModule):
"""Folder DataModule.
Args:
normal_dir (str | Path): Name of the directory containing normal images.
normal_dir (str | Path | Sequence): Name of the directory containing normal images.
Defaults to "normal".
root (str | Path | None): Path to the root folder containing normal and abnormal dirs.
abnormal_dir (str | Path | None): Name of the directory containing abnormal images.
abnormal_dir (str | Path | None | Sequence): Name of the directory containing abnormal images.
Defaults to "abnormal".
normal_test_dir (str | Path | None, optional): Path to the directory containing
normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
mask_dir (str | Path | None, optional): Path to the directory containing
mask_dir (str | Path | Sequence | None, optional): Path to the directory containing
the mask annotations. Defaults to None.
normal_split_ratio (float, optional): Ratio to split normal training images and add to the
test set in case test set doesn't contain any normal images.
Expand Down Expand Up @@ -228,11 +243,11 @@ class Folder(AnomalibDataModule):

def __init__(
self,
normal_dir: str | Path,
normal_dir: str | Path | Sequence[str | Path],
root: str | Path | None = None,
abnormal_dir: str | Path | None = None,
normal_test_dir: str | Path | None = None,
mask_dir: str | Path | None = None,
abnormal_dir: str | Path | Sequence[str | Path] | None = None,
normal_test_dir: str | Path | Sequence[str | Path] | None = None,
mask_dir: str | Path | Sequence[str | Path] | None = None,
normal_split_ratio: float = 0.2,
extensions: tuple[str] | None = None,
image_size: int | tuple[int, int] | None = None,
Expand Down
13 changes: 13 additions & 0 deletions tests/pre_merge/datasets/test_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Visa,
get_datamodule,
)
from anomalib.data.utils import DirType
from anomalib.pre_processing.transforms import Denormalize, ToNumpy
from tests.helpers.config import get_test_configurable_parameters
from tests.helpers.dataset import TestDataset, get_dataset_path
Expand Down Expand Up @@ -249,6 +250,18 @@ def test_equal_splits(self, make_data_module, dataset):
data_module.val_data.samples["image_path"].values == data_module.test_data.samples["image_path"].values
)

def test_folder_sequence_inputs(self, make_data_module, dataset):
"""This test ensures that val and test split are equal when split mode == same_as_test."""
if dataset == "folder":
_large = make_data_module(dataset=dataset, abnormal_dir="broken_large")
len_large = len(_large.val_data.samples.loc[_large.val_data.samples.label == DirType.ABNORMAL])
_small = make_data_module(dataset=dataset, abnormal_dir="broken_small")
len_small = len(_small.val_data.samples.loc[_small.val_data.samples.label == DirType.ABNORMAL])

data_module = make_data_module(dataset=dataset, abnormal_dir=["broken_large", "broken_small"])
len_broken = len(data_module.val_data.samples.loc[data_module.val_data.samples.label == DirType.ABNORMAL])
assert len_broken == len_large + len_small


class TestDenormalize:
"""Test Denormalize Util."""
Expand Down
Loading