From 7ac7a73480ec0856720dd9c877745791b77f5b87 Mon Sep 17 00:00:00 2001
From: Samet Akcay <samet.akcay@intel.com>
Date: Tue, 27 Feb 2024 13:51:38 +0000
Subject: [PATCH] =?UTF-8?q?=F0=9F=94=92=20Replace=20`md5`=20with=20`sha-25?=
 =?UTF-8?q?6`=20(#1680)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix metadata path

* Ignore hidden directories in folder dataset

* Add check for mask_dir for segmentation tasks in Folder dataset

* Limit the gradio version to <4

* Replace md5 with sha256, Signed off: Samet Akcay samet.akcay@intel.com

* Rename checksum to hashsum since we use cryptographic algorithms

* Update btech hashsum

* update kolektor hashsum

* Update hashsum for mvtec download info

* Update hashsum in Visa image download info

* Update hashsums for Avenue Dataset and Annotations

* Update ucsd hashsum

* Update shanghai tech hashsum

* Update dsr weight hashsum

* Update efficient ad weight hashsum

* Fix albumentation tests

* Fix albumentation tests
---
 src/anomalib/data/depth/mvtec_3d.py           |  2 +-
 src/anomalib/data/image/btech.py              |  2 +-
 src/anomalib/data/image/kolektor.py           |  2 +-
 src/anomalib/data/image/mvtec.py              |  2 +-
 src/anomalib/data/image/visa.py               |  2 +-
 src/anomalib/data/utils/download.py           | 50 ++++++++++++++++---
 src/anomalib/data/video/avenue.py             |  4 +-
 src/anomalib/data/video/shanghaitech.py       |  2 +-
 src/anomalib/data/video/ucsd_ped.py           |  2 +-
 .../models/image/dsr/lightning_model.py       |  2 +-
 .../image/efficient_ad/lightning_model.py     |  8 +--
 11 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/src/anomalib/data/depth/mvtec_3d.py b/src/anomalib/data/depth/mvtec_3d.py
index 33a28eea23..a41411a547 100644
--- a/src/anomalib/data/depth/mvtec_3d.py
+++ b/src/anomalib/data/depth/mvtec_3d.py
@@ -50,7 +50,7 @@
     name="mvtec_3d",
     url="https://www.mydrive.ch/shares/45920/dd1eb345346df066c63b5c95676b961b/download/428824485-1643285832"
     "/mvtec_3d_anomaly_detection.tar.xz",
-    checksum="d8bb2800fbf3ac88e798da6ae10dc819",
+    hashsum="d8bb2800fbf3ac88e798da6ae10dc819",
 )
 
 CATEGORIES = ("bagel", "cable_gland", "carrot", "cookie", "dowel", "foam", "peach", "potato", "rope", "tire")
diff --git a/src/anomalib/data/image/btech.py b/src/anomalib/data/image/btech.py
index 147cf67019..af1287ee7f 100644
--- a/src/anomalib/data/image/btech.py
+++ b/src/anomalib/data/image/btech.py
@@ -38,7 +38,7 @@
 DOWNLOAD_INFO = DownloadInfo(
     name="btech",
     url="https://avires.dimi.uniud.it/papers/btad/btad.zip",
-    checksum="c1fa4d56ac50dd50908ce04e81037a8e",
+    hashsum="461c9387e515bfed41ecaae07c50cf6b10def647b36c9e31d239ab2736b10d2a",
 )
 
 CATEGORIES = ("01", "02", "03")
diff --git a/src/anomalib/data/image/kolektor.py b/src/anomalib/data/image/kolektor.py
index 1b6e7e55c4..b7cc42cf97 100644
--- a/src/anomalib/data/image/kolektor.py
+++ b/src/anomalib/data/image/kolektor.py
@@ -46,7 +46,7 @@
 DOWNLOAD_INFO = DownloadInfo(
     name="kolektor",
     url="https://go.vicos.si/kolektorsdd",
-    checksum="2b094030343c1cd59df02203ac6c57a0",
+    hashsum="65dc621693418585de9c4467d1340ea7958a6181816f0dc2883a1e8b61f9d4dc",
     filename="KolektorSDD.zip",
 )
 
diff --git a/src/anomalib/data/image/mvtec.py b/src/anomalib/data/image/mvtec.py
index 2892732b9e..2ff4b56147 100644
--- a/src/anomalib/data/image/mvtec.py
+++ b/src/anomalib/data/image/mvtec.py
@@ -56,7 +56,7 @@
     name="mvtec",
     url="https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094"
     "/mvtec_anomaly_detection.tar.xz",
-    checksum="eefca59f2cede9c3fc5b6befbfec275e",
+    hashsum="cf4313b13603bec67abb49ca959488f7eedce2a9f7795ec54446c649ac98cd3d",
 )
 
 CATEGORIES = (
diff --git a/src/anomalib/data/image/visa.py b/src/anomalib/data/image/visa.py
index f7aed601e8..8dd70c3b4e 100644
--- a/src/anomalib/data/image/visa.py
+++ b/src/anomalib/data/image/visa.py
@@ -51,7 +51,7 @@
 DOWNLOAD_INFO = DownloadInfo(
     name="VisA",
     url="https://amazon-visual-anomaly.s3.us-west-2.amazonaws.com/VisA_20220922.tar",
-    checksum="ef908989b6dc701fc218f643c127a4de",
+    hashsum="2eb8690c803ab37de0324772964100169ec8ba1fa3f7e94291c9ca673f40f362",
 )
 
 CATEGORIES = (
diff --git a/src/anomalib/data/utils/download.py b/src/anomalib/data/utils/download.py
index 4d12c2f280..558768b654 100644
--- a/src/anomalib/data/utils/download.py
+++ b/src/anomalib/data/utils/download.py
@@ -28,7 +28,7 @@ class DownloadInfo:
 
     name: str
     url: str
-    checksum: str
+    hashsum: str
     filename: str | None = None
 
 
@@ -233,17 +233,51 @@ def safe_extract(tar_file: TarFile, root: Path, members: list[TarInfo]) -> None:
         tar_file.extract(member, root)
 
 
-def hash_check(file_path: Path, expected_hash: str) -> None:
-    """Raise assert error if hash does not match the calculated hash of the file.
+def generate_hash(file_path: str | Path, algorithm: str = "sha256") -> str:
+    """Generate a hash of a file using the specified algorithm.
+
+    Args:
+        file_path (str | Path): Path to the file to hash.
+        algorithm (str): The hashing algorithm to use (e.g., 'sha256', 'sha3_512').
+
+    Returns:
+        str: The hexadecimal hash string of the file.
+
+    Raises:
+        ValueError: If the specified hashing algorithm is not supported.
+    """
+    # Get the hashing algorithm.
+    try:
+        hasher = getattr(hashlib, algorithm)()
+    except AttributeError as err:
+        msg = f"Unsupported hashing algorithm: {algorithm}"
+        raise ValueError(msg) from err
+
+    # Read the file in chunks to avoid loading it all into memory
+    with Path(file_path).open("rb") as file:
+        for chunk in iter(lambda: file.read(4096), b""):
+            hasher.update(chunk)
+
+    # Return the computed hash value in hexadecimal format
+    return hasher.hexdigest()
+
+
+def check_hash(file_path: Path, expected_hash: str, algorithm: str = "sha256") -> None:
+    """Raise value error if hash does not match the calculated hash of the file.
 
     Args:
         file_path (Path): Path to file.
         expected_hash (str): Expected hash of the file.
+        algorithm (str): Hashing algorithm to use ('sha256', 'sha3_512', etc.).
     """
-    with file_path.open("rb") as hash_file:
-        assert (
-            hashlib.new(name="md5", data=hash_file.read(), usedforsecurity=False).hexdigest() == expected_hash
-        ), f"Downloaded file {file_path} does not match the required hash."
+    # Compare the calculated hash with the expected hash
+    calculated_hash = generate_hash(file_path, algorithm)
+    if calculated_hash != expected_hash:
+        msg = (
+            f"Calculated hash {calculated_hash} of downloaded file {file_path} does not match the required hash "
+            f"{expected_hash}."
+        )
+        raise ValueError(msg)
 
 
 def extract(file_name: Path, root: Path) -> None:
@@ -303,7 +337,7 @@ def download_and_extract(root: Path, info: DownloadInfo) -> None:
                     reporthook=progress_bar.update_to,
                 )
             logger.info("Checking the hash of the downloaded file.")
-            hash_check(downloaded_file_path, info.checksum)
+            check_hash(downloaded_file_path, info.hashsum)
         else:
             msg = f"Invalid URL to download dataset. Supported 'http://' or 'https://' but '{info.url}' is requested"
             raise RuntimeError(msg)
diff --git a/src/anomalib/data/video/avenue.py b/src/anomalib/data/video/avenue.py
index e7109466d9..baa41d3d0c 100644
--- a/src/anomalib/data/video/avenue.py
+++ b/src/anomalib/data/video/avenue.py
@@ -50,12 +50,12 @@
 DATASET_DOWNLOAD_INFO = DownloadInfo(
     name="Avenue Dataset",
     url="http://www.cse.cuhk.edu.hk/leojia/projects/detectabnormal/Avenue_Dataset.zip",
-    checksum="b7a34b212ecdd30efbd989a6dcb1aceb",
+    hashsum="fc9cb8432a11ca79c18aa180c72524011411b69d3b0ff27c8816e41c0de61531",
 )
 ANNOTATIONS_DOWNLOAD_INFO = DownloadInfo(
     name="Avenue Annotations",
     url="http://www.cse.cuhk.edu.hk/leojia/projects/detectabnormal/ground_truth_demo.zip",
-    checksum="e8e3bff99195b6b511534083b9dbe1f5",
+    hashsum="60fec1728ec8f73a58aad3aeb5729d70a805a47e0b8eb4bf91ab67ef06386d77",
 )
 
 
diff --git a/src/anomalib/data/video/shanghaitech.py b/src/anomalib/data/video/shanghaitech.py
index 9dccff8863..d4b05f1bfd 100644
--- a/src/anomalib/data/video/shanghaitech.py
+++ b/src/anomalib/data/video/shanghaitech.py
@@ -49,7 +49,7 @@
 DATASET_DOWNLOAD_INFO = DownloadInfo(
     name="ShanghaiTech Dataset",
     url="http://101.32.75.151:8181/dataset/shanghaitech.tar.gz",
-    checksum="08494decd30fb0fa213b519a9c555040",
+    hashsum="c13a827043b259ccf8493c9d9130486872992153a9d714fe229e523cd4c94116",
 )
 
 
diff --git a/src/anomalib/data/video/ucsd_ped.py b/src/anomalib/data/video/ucsd_ped.py
index d2e304dde9..05dbba8b8e 100644
--- a/src/anomalib/data/video/ucsd_ped.py
+++ b/src/anomalib/data/video/ucsd_ped.py
@@ -38,7 +38,7 @@
 DOWNLOAD_INFO = DownloadInfo(
     name="UCSD Pedestrian",
     url="http://www.svcl.ucsd.edu/projects/anomaly/UCSD_Anomaly_Dataset.tar.gz",
-    checksum="5006421b89885f45a6f93b041145f2eb",
+    hashsum="2329af326951f5097fdd114c50e853957d3e569493a49d22fc082a9fd791915b",
 )
 
 CATEGORIES = ("UCSDped1", "UCSDped2")
diff --git a/src/anomalib/models/image/dsr/lightning_model.py b/src/anomalib/models/image/dsr/lightning_model.py
index c4b4963409..29f3ad9454 100644
--- a/src/anomalib/models/image/dsr/lightning_model.py
+++ b/src/anomalib/models/image/dsr/lightning_model.py
@@ -29,7 +29,7 @@
 WEIGHTS_DOWNLOAD_INFO = DownloadInfo(
     name="vq_model_pretrained_128_4096.pckl",
     url="https://github.com/openvinotoolkit/anomalib/releases/download/dsr_pretrained_weights/dsr_vq_model_pretrained.zip",
-    checksum="927f6b40841a7c885d12217c922b2bba",
+    hashsum="52fe7504ec8e9df70b4382f287ab26269dcfe000cd7a7e146a52c6f146f34afb",
 )
 
 
diff --git a/src/anomalib/models/image/efficient_ad/lightning_model.py b/src/anomalib/models/image/efficient_ad/lightning_model.py
index 4fcce26d1c..25ef59ad7b 100644
--- a/src/anomalib/models/image/efficient_ad/lightning_model.py
+++ b/src/anomalib/models/image/efficient_ad/lightning_model.py
@@ -31,13 +31,13 @@
 IMAGENETTE_DOWNLOAD_INFO = DownloadInfo(
     name="imagenette2.tgz",
     url="https://s3.amazonaws.com/fast-ai-imageclas/imagenette2.tgz",
-    checksum="fe2fc210e6bb7c5664d602c3cd71e612",
+    hashsum="6cbfac238434d89fe99e651496f0812ebc7a10fa62bd42d6874042bf01de4efd",
 )
 
 WEIGHTS_DOWNLOAD_INFO = DownloadInfo(
     name="efficientad_pretrained_weights.zip",
     url="https://github.com/openvinotoolkit/anomalib/releases/download/efficientad_pretrained_weights/efficientad_pretrained_weights.zip",
-    checksum="ec6113d728969cd233271eeed7d692f2",
+    hashsum="c09aeaa2b33f244b3261a5efdaeae8f8284a949470a4c5a526c61275fe62684a",
 )
 
 
@@ -171,8 +171,8 @@ def teacher_channel_mean_std(self, dataloader: DataLoader) -> dict[str, torch.Te
             if not arrays_defined:
                 _, num_channels, _, _ = y.shape
                 n = torch.zeros((num_channels,), dtype=torch.int64, device=y.device)
-                chanel_sum = torch.zeros((num_channels,), dtype=torch.float64, device=y.device)
-                chanel_sum_sqr = torch.zeros((num_channels,), dtype=torch.float64, device=y.device)
+                chanel_sum = torch.zeros((num_channels,), dtype=torch.float32, device=y.device)
+                chanel_sum_sqr = torch.zeros((num_channels,), dtype=torch.float32, device=y.device)
                 arrays_defined = True
 
             n += y[:, 0].numel()