Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for receiving folder dataset paths as a list #1265

Merged
Merged
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Added

- Add support for receiving dataset paths as a list by @harimkang in https://github.com/openvinotoolkit/anomalib/pull/1265

### Changed

- Enable training with only normal images for MVTecv in https://github.com/openvinotoolkit/anomalib/pull/1241
Expand Down
89 changes: 52 additions & 37 deletions src/anomalib/data/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pathlib import Path

import albumentations as A
from omegaconf import ListConfig
from pandas import DataFrame

from anomalib.data.base import AnomalibDataModule, AnomalibDataset
Expand All @@ -27,23 +28,23 @@


def make_folder_dataset(
normal_dir: str | Path,
root: str | Path | None = None,
abnormal_dir: str | Path | None = None,
normal_test_dir: str | Path | None = None,
mask_dir: str | Path | None = None,
normal_dir: str | Path | ListConfig,
samet-akcay marked this conversation as resolved.
Show resolved Hide resolved
root: str | Path | ListConfig | None = None,
abnormal_dir: str | Path | ListConfig | None = None,
normal_test_dir: str | Path | ListConfig | None = None,
mask_dir: str | Path | ListConfig | None = None,
split: str | Split | None = None,
extensions: tuple[str, ...] | None = None,
) -> DataFrame:
"""Make Folder Dataset.
Args:
normal_dir (str | Path): Path to the directory containing normal images.
root (str | Path | None): Path to the root directory of the dataset.
abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | None, optional): Path to the directory containing
normal_dir (str | Path | ListConfig): Path to the directory containing normal images.
root (str | Path | ListConfig | None): Path to the root directory of the dataset.
abnormal_dir (str | Path | ListConfig | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | ListConfig | None, optional): Path to the directory containing
normal images for the test dataset. Normal test images will be a split of `normal_dir`
if `None`. Defaults to None.
mask_dir (str | Path | None, optional): Path to the directory containing
mask_dir (str | Path | ListConfig | None, optional): Path to the directory containing
the mask annotations. Defaults to None.
split (str | Split | None, optional): Dataset split (ie., Split.FULL, Split.TRAIN or Split.TEST).
Defaults to None.
Expand All @@ -52,11 +53,24 @@ def make_folder_dataset(
Returns:
DataFrame: an output dataframe containing samples for the requested split (ie., train or test)
"""
normal_dir = _resolve_path(normal_dir, root)
abnormal_dir = _resolve_path(abnormal_dir, root) if abnormal_dir is not None else None
normal_test_dir = _resolve_path(normal_test_dir, root) if normal_test_dir is not None else None
mask_dir = _resolve_path(mask_dir, root) if mask_dir is not None else None
assert normal_dir.is_dir(), "A folder location must be provided in normal_dir."

def _path_to_list_config_with_resolve_path(path: str | Path | ListConfig | None) -> ListConfig:
"""Function for changing path to ListConfig.
Args:
path (str | Path | ListConfig| None): Path to replace with ListConfig.
Returns:
ListConfig: The result of path replaced by ListConfig.
"""
if isinstance(path, ListConfig):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should allow other data types here as well, since only supporting ListConfig is not very API-friendly. The following should probably work:

>>> from anomalib.data import Folder
>>> data_module = Folder(
...     root="datasets/hazelnut_toy/",
...     normal_dir="good",
...     abnormal_dir=["colour", "crack"],
...     image_size=256,
... )
>>> data_module.setup()

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the comment. I have fixed this. (to Sequence)

return ListConfig(content=[_resolve_path(dir_path, root) for dir_path in path])
return ListConfig(content=[_resolve_path(path, root)] if path is not None else [])

# All paths are changed to the ListConfig type and used.
normal_dir = _path_to_list_config_with_resolve_path(normal_dir)
abnormal_dir = _path_to_list_config_with_resolve_path(abnormal_dir)
normal_test_dir = _path_to_list_config_with_resolve_path(normal_test_dir)
mask_dir = _path_to_list_config_with_resolve_path(mask_dir)
assert len(normal_dir) > 0, "A folder location must be provided in normal_dir."

filenames = []
labels = []
Expand All @@ -71,10 +85,11 @@ def make_folder_dataset(
if mask_dir:
dirs = {**dirs, **{DirType.MASK: mask_dir}}

for dir_type, path in dirs.items():
filename, label = _prepare_files_labels(path, dir_type, extensions)
filenames += filename
labels += label
for dir_type, paths in dirs.items():
for path in paths:
filename, label = _prepare_files_labels(path, dir_type, extensions)
filenames += filename
labels += label

samples = DataFrame({"image_path": filenames, "label": labels})
samples = samples.sort_values(by="image_path", ignore_index=True)
Expand All @@ -88,7 +103,7 @@ def make_folder_dataset(

# If a path to mask is provided, add it to the sample dataframe.

if mask_dir is not None and abnormal_dir is not None:
if len(mask_dir) > 0 and len(abnormal_dir) > 0:
samples.loc[samples.label == DirType.ABNORMAL, "mask_path"] = samples.loc[
samples.label == DirType.MASK
].image_path.values
Expand Down Expand Up @@ -136,12 +151,12 @@ class FolderDataset(AnomalibDataset):
transform (A.Compose): Albumentations Compose object describing the transforms that are applied to the inputs.
split (str | Split | None): Fixed subset split that follows from folder structure on file system.
Choose from [Split.FULL, Split.TRAIN, Split.TEST]
normal_dir (str | Path): Path to the directory containing normal images.
normal_dir (str | Path | ListConfig): Path to the directory containing normal images.
root (str | Path | None): Root folder of the dataset.
abnormal_dir (str | Path | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | None, optional): Path to the directory containing
abnormal_dir (str | Path | ListConfig | None, optional): Path to the directory containing abnormal images.
normal_test_dir (str | Path | ListConfig | None, optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
mask_dir (str | Path | None, optional): Path to the directory containing
mask_dir (str | Path | ListConfig | None, optional): Path to the directory containing
the mask annotations. Defaults to None.
extensions (tuple[str, ...] | None, optional): Type of the image extensions to read from the
directory.
Expand All @@ -155,11 +170,11 @@ def __init__(
self,
task: TaskType,
transform: A.Compose,
normal_dir: str | Path,
root: str | Path | None = None,
abnormal_dir: str | Path | None = None,
normal_test_dir: str | Path | None = None,
mask_dir: str | Path | None = None,
normal_dir: str | Path | ListConfig,
root: str | Path | ListConfig | None = None,
abnormal_dir: str | Path | ListConfig | None = None,
normal_test_dir: str | Path | ListConfig | None = None,
mask_dir: str | Path | ListConfig | None = None,
split: str | Split | None = None,
extensions: tuple[str, ...] | None = None,
) -> None:
Expand Down Expand Up @@ -189,14 +204,14 @@ def _setup(self) -> None:
class Folder(AnomalibDataModule):
"""Folder DataModule.
Args:
normal_dir (str | Path): Name of the directory containing normal images.
normal_dir (str | Path | ListConfig): Name of the directory containing normal images.
Defaults to "normal".
root (str | Path | None): Path to the root folder containing normal and abnormal dirs.
abnormal_dir (str | Path | None): Name of the directory containing abnormal images.
abnormal_dir (str | Path | None | ListConfig): Name of the directory containing abnormal images.
Defaults to "abnormal".
normal_test_dir (str | Path | None, optional): Path to the directory containing
normal_test_dir (str | Path | ListConfig | None, optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
mask_dir (str | Path | None, optional): Path to the directory containing
mask_dir (str | Path | ListConfig | None, optional): Path to the directory containing
the mask annotations. Defaults to None.
normal_split_ratio (float, optional): Ratio to split normal training images and add to the
test set in case test set doesn't contain any normal images.
Expand Down Expand Up @@ -228,11 +243,11 @@ class Folder(AnomalibDataModule):

def __init__(
self,
normal_dir: str | Path,
normal_dir: str | Path | ListConfig,
root: str | Path | None = None,
abnormal_dir: str | Path | None = None,
normal_test_dir: str | Path | None = None,
mask_dir: str | Path | None = None,
abnormal_dir: str | Path | ListConfig | None = None,
normal_test_dir: str | Path | ListConfig | None = None,
mask_dir: str | Path | ListConfig | None = None,
normal_split_ratio: float = 0.2,
extensions: tuple[str] | None = None,
image_size: int | tuple[int, int] | None = None,
Expand Down
Loading