From 7ac7a73480ec0856720dd9c877745791b77f5b87 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Tue, 27 Feb 2024 13:51:38 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=92=20Replace=20`md5`=20with=20`sha-25?= =?UTF-8?q?6`=20(#1680)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix metadata path * Ignore hidden directories in folder dataset * Add check for mask_dir for segmentation tasks in Folder dataset * Limit the gradio version to <4 * Replace md5 with sha256, Signed off: Samet Akcay samet.akcay@intel.com * Rename checksum to hashsum since we use cryptographic algorithms * Update btech hashsum * update kolektor hashsum * Update hashsum for mvtec download info * Update hashsum in Visa image download info * Update hashsums for Avenue Dataset and Annotations * Update ucsd hashsum * Update shanghai tech hashsum * Update dsr weight hashsum * Update efficient ad weight hashsum * Fix albumentation tests * Fix albumentation tests --- src/anomalib/data/depth/mvtec_3d.py | 2 +- src/anomalib/data/image/btech.py | 2 +- src/anomalib/data/image/kolektor.py | 2 +- src/anomalib/data/image/mvtec.py | 2 +- src/anomalib/data/image/visa.py | 2 +- src/anomalib/data/utils/download.py | 50 ++++++++++++++++--- src/anomalib/data/video/avenue.py | 4 +- src/anomalib/data/video/shanghaitech.py | 2 +- src/anomalib/data/video/ucsd_ped.py | 2 +- .../models/image/dsr/lightning_model.py | 2 +- .../image/efficient_ad/lightning_model.py | 8 +-- 11 files changed, 56 insertions(+), 22 deletions(-) diff --git a/src/anomalib/data/depth/mvtec_3d.py b/src/anomalib/data/depth/mvtec_3d.py index 33a28eea23..a41411a547 100644 --- a/src/anomalib/data/depth/mvtec_3d.py +++ b/src/anomalib/data/depth/mvtec_3d.py @@ -50,7 +50,7 @@ name="mvtec_3d", url="https://www.mydrive.ch/shares/45920/dd1eb345346df066c63b5c95676b961b/download/428824485-1643285832" "/mvtec_3d_anomaly_detection.tar.xz", - checksum="d8bb2800fbf3ac88e798da6ae10dc819", + hashsum="d8bb2800fbf3ac88e798da6ae10dc819", ) CATEGORIES = ("bagel", "cable_gland", "carrot", "cookie", "dowel", "foam", "peach", "potato", "rope", "tire") diff --git a/src/anomalib/data/image/btech.py b/src/anomalib/data/image/btech.py index 147cf67019..af1287ee7f 100644 --- a/src/anomalib/data/image/btech.py +++ b/src/anomalib/data/image/btech.py @@ -38,7 +38,7 @@ DOWNLOAD_INFO = DownloadInfo( name="btech", url="https://avires.dimi.uniud.it/papers/btad/btad.zip", - checksum="c1fa4d56ac50dd50908ce04e81037a8e", + hashsum="461c9387e515bfed41ecaae07c50cf6b10def647b36c9e31d239ab2736b10d2a", ) CATEGORIES = ("01", "02", "03") diff --git a/src/anomalib/data/image/kolektor.py b/src/anomalib/data/image/kolektor.py index 1b6e7e55c4..b7cc42cf97 100644 --- a/src/anomalib/data/image/kolektor.py +++ b/src/anomalib/data/image/kolektor.py @@ -46,7 +46,7 @@ DOWNLOAD_INFO = DownloadInfo( name="kolektor", url="https://go.vicos.si/kolektorsdd", - checksum="2b094030343c1cd59df02203ac6c57a0", + hashsum="65dc621693418585de9c4467d1340ea7958a6181816f0dc2883a1e8b61f9d4dc", filename="KolektorSDD.zip", ) diff --git a/src/anomalib/data/image/mvtec.py b/src/anomalib/data/image/mvtec.py index 2892732b9e..2ff4b56147 100644 --- a/src/anomalib/data/image/mvtec.py +++ b/src/anomalib/data/image/mvtec.py @@ -56,7 +56,7 @@ name="mvtec", url="https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094" "/mvtec_anomaly_detection.tar.xz", - checksum="eefca59f2cede9c3fc5b6befbfec275e", + hashsum="cf4313b13603bec67abb49ca959488f7eedce2a9f7795ec54446c649ac98cd3d", ) CATEGORIES = ( diff --git a/src/anomalib/data/image/visa.py b/src/anomalib/data/image/visa.py index f7aed601e8..8dd70c3b4e 100644 --- a/src/anomalib/data/image/visa.py +++ b/src/anomalib/data/image/visa.py @@ -51,7 +51,7 @@ DOWNLOAD_INFO = DownloadInfo( name="VisA", url="https://amazon-visual-anomaly.s3.us-west-2.amazonaws.com/VisA_20220922.tar", - checksum="ef908989b6dc701fc218f643c127a4de", + hashsum="2eb8690c803ab37de0324772964100169ec8ba1fa3f7e94291c9ca673f40f362", ) CATEGORIES = ( diff --git a/src/anomalib/data/utils/download.py b/src/anomalib/data/utils/download.py index 4d12c2f280..558768b654 100644 --- a/src/anomalib/data/utils/download.py +++ b/src/anomalib/data/utils/download.py @@ -28,7 +28,7 @@ class DownloadInfo: name: str url: str - checksum: str + hashsum: str filename: str | None = None @@ -233,17 +233,51 @@ def safe_extract(tar_file: TarFile, root: Path, members: list[TarInfo]) -> None: tar_file.extract(member, root) -def hash_check(file_path: Path, expected_hash: str) -> None: - """Raise assert error if hash does not match the calculated hash of the file. +def generate_hash(file_path: str | Path, algorithm: str = "sha256") -> str: + """Generate a hash of a file using the specified algorithm. + + Args: + file_path (str | Path): Path to the file to hash. + algorithm (str): The hashing algorithm to use (e.g., 'sha256', 'sha3_512'). + + Returns: + str: The hexadecimal hash string of the file. + + Raises: + ValueError: If the specified hashing algorithm is not supported. + """ + # Get the hashing algorithm. + try: + hasher = getattr(hashlib, algorithm)() + except AttributeError as err: + msg = f"Unsupported hashing algorithm: {algorithm}" + raise ValueError(msg) from err + + # Read the file in chunks to avoid loading it all into memory + with Path(file_path).open("rb") as file: + for chunk in iter(lambda: file.read(4096), b""): + hasher.update(chunk) + + # Return the computed hash value in hexadecimal format + return hasher.hexdigest() + + +def check_hash(file_path: Path, expected_hash: str, algorithm: str = "sha256") -> None: + """Raise value error if hash does not match the calculated hash of the file. Args: file_path (Path): Path to file. expected_hash (str): Expected hash of the file. + algorithm (str): Hashing algorithm to use ('sha256', 'sha3_512', etc.). """ - with file_path.open("rb") as hash_file: - assert ( - hashlib.new(name="md5", data=hash_file.read(), usedforsecurity=False).hexdigest() == expected_hash - ), f"Downloaded file {file_path} does not match the required hash." + # Compare the calculated hash with the expected hash + calculated_hash = generate_hash(file_path, algorithm) + if calculated_hash != expected_hash: + msg = ( + f"Calculated hash {calculated_hash} of downloaded file {file_path} does not match the required hash " + f"{expected_hash}." + ) + raise ValueError(msg) def extract(file_name: Path, root: Path) -> None: @@ -303,7 +337,7 @@ def download_and_extract(root: Path, info: DownloadInfo) -> None: reporthook=progress_bar.update_to, ) logger.info("Checking the hash of the downloaded file.") - hash_check(downloaded_file_path, info.checksum) + check_hash(downloaded_file_path, info.hashsum) else: msg = f"Invalid URL to download dataset. Supported 'http://' or 'https://' but '{info.url}' is requested" raise RuntimeError(msg) diff --git a/src/anomalib/data/video/avenue.py b/src/anomalib/data/video/avenue.py index e7109466d9..baa41d3d0c 100644 --- a/src/anomalib/data/video/avenue.py +++ b/src/anomalib/data/video/avenue.py @@ -50,12 +50,12 @@ DATASET_DOWNLOAD_INFO = DownloadInfo( name="Avenue Dataset", url="http://www.cse.cuhk.edu.hk/leojia/projects/detectabnormal/Avenue_Dataset.zip", - checksum="b7a34b212ecdd30efbd989a6dcb1aceb", + hashsum="fc9cb8432a11ca79c18aa180c72524011411b69d3b0ff27c8816e41c0de61531", ) ANNOTATIONS_DOWNLOAD_INFO = DownloadInfo( name="Avenue Annotations", url="http://www.cse.cuhk.edu.hk/leojia/projects/detectabnormal/ground_truth_demo.zip", - checksum="e8e3bff99195b6b511534083b9dbe1f5", + hashsum="60fec1728ec8f73a58aad3aeb5729d70a805a47e0b8eb4bf91ab67ef06386d77", ) diff --git a/src/anomalib/data/video/shanghaitech.py b/src/anomalib/data/video/shanghaitech.py index 9dccff8863..d4b05f1bfd 100644 --- a/src/anomalib/data/video/shanghaitech.py +++ b/src/anomalib/data/video/shanghaitech.py @@ -49,7 +49,7 @@ DATASET_DOWNLOAD_INFO = DownloadInfo( name="ShanghaiTech Dataset", url="http://101.32.75.151:8181/dataset/shanghaitech.tar.gz", - checksum="08494decd30fb0fa213b519a9c555040", + hashsum="c13a827043b259ccf8493c9d9130486872992153a9d714fe229e523cd4c94116", ) diff --git a/src/anomalib/data/video/ucsd_ped.py b/src/anomalib/data/video/ucsd_ped.py index d2e304dde9..05dbba8b8e 100644 --- a/src/anomalib/data/video/ucsd_ped.py +++ b/src/anomalib/data/video/ucsd_ped.py @@ -38,7 +38,7 @@ DOWNLOAD_INFO = DownloadInfo( name="UCSD Pedestrian", url="http://www.svcl.ucsd.edu/projects/anomaly/UCSD_Anomaly_Dataset.tar.gz", - checksum="5006421b89885f45a6f93b041145f2eb", + hashsum="2329af326951f5097fdd114c50e853957d3e569493a49d22fc082a9fd791915b", ) CATEGORIES = ("UCSDped1", "UCSDped2") diff --git a/src/anomalib/models/image/dsr/lightning_model.py b/src/anomalib/models/image/dsr/lightning_model.py index c4b4963409..29f3ad9454 100644 --- a/src/anomalib/models/image/dsr/lightning_model.py +++ b/src/anomalib/models/image/dsr/lightning_model.py @@ -29,7 +29,7 @@ WEIGHTS_DOWNLOAD_INFO = DownloadInfo( name="vq_model_pretrained_128_4096.pckl", url="https://github.com/openvinotoolkit/anomalib/releases/download/dsr_pretrained_weights/dsr_vq_model_pretrained.zip", - checksum="927f6b40841a7c885d12217c922b2bba", + hashsum="52fe7504ec8e9df70b4382f287ab26269dcfe000cd7a7e146a52c6f146f34afb", ) diff --git a/src/anomalib/models/image/efficient_ad/lightning_model.py b/src/anomalib/models/image/efficient_ad/lightning_model.py index 4fcce26d1c..25ef59ad7b 100644 --- a/src/anomalib/models/image/efficient_ad/lightning_model.py +++ b/src/anomalib/models/image/efficient_ad/lightning_model.py @@ -31,13 +31,13 @@ IMAGENETTE_DOWNLOAD_INFO = DownloadInfo( name="imagenette2.tgz", url="https://s3.amazonaws.com/fast-ai-imageclas/imagenette2.tgz", - checksum="fe2fc210e6bb7c5664d602c3cd71e612", + hashsum="6cbfac238434d89fe99e651496f0812ebc7a10fa62bd42d6874042bf01de4efd", ) WEIGHTS_DOWNLOAD_INFO = DownloadInfo( name="efficientad_pretrained_weights.zip", url="https://github.com/openvinotoolkit/anomalib/releases/download/efficientad_pretrained_weights/efficientad_pretrained_weights.zip", - checksum="ec6113d728969cd233271eeed7d692f2", + hashsum="c09aeaa2b33f244b3261a5efdaeae8f8284a949470a4c5a526c61275fe62684a", ) @@ -171,8 +171,8 @@ def teacher_channel_mean_std(self, dataloader: DataLoader) -> dict[str, torch.Te if not arrays_defined: _, num_channels, _, _ = y.shape n = torch.zeros((num_channels,), dtype=torch.int64, device=y.device) - chanel_sum = torch.zeros((num_channels,), dtype=torch.float64, device=y.device) - chanel_sum_sqr = torch.zeros((num_channels,), dtype=torch.float64, device=y.device) + chanel_sum = torch.zeros((num_channels,), dtype=torch.float32, device=y.device) + chanel_sum_sqr = torch.zeros((num_channels,), dtype=torch.float32, device=y.device) arrays_defined = True n += y[:, 0].numel()