openvinotoolkit · samet-akcay · May 16, 2024 · Apr 11, 2024 · Apr 11, 2024 · Apr 11, 2024
@@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
+- WinCLIP: set device in text embedding collection and apply forward pass with no grad, by @djdameln in https://github.com/openvinotoolkit/anomalib/pull/1984
 - 🔨Rename OptimalF1 to F1Max for consistency with the literature, by @samet-akcay in https://github.com/openvinotoolkit/anomalib/pull/1980
 - 🐞Update OptimalF1 score to use BinaryPrecisionRecallCurve and remove num_classes by @ashwinvaidya17 in https://github.com/openvinotoolkit/anomalib/pull/1972
 

@@ -222,6 +222,7 @@ def _get_window_embeddings(self, feature_map: torch.Tensor, masks: torch.Tensor)
 
         return pooled.reshape((n_masks, batch_size, -1)).permute(1, 0, 2)
 
+    @torch.no_grad
     def forward(self, batch: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         """Forward-pass through the model to obtain image and pixel scores.
 
@@ -325,15 +326,17 @@ def _collect_text_embeddings(self, class_name: str) -> None:
         Args:
             class_name (str): The name of the object class used in the prompt ensemble.
         """
+        # get the device, this is to ensure that we move the text embeddings to the same device as the model
+        device = next(self.parameters()).device
         # collect prompt ensemble
         normal_prompts, anomalous_prompts = create_prompt_ensemble(class_name)
         # tokenize prompts
         normal_tokens = tokenize(normal_prompts)
         anomalous_tokens = tokenize(anomalous_prompts)
         # encode tokens to obtain prompt embeddings
         with torch.no_grad():
-            normal_embeddings = self.clip.encode_text(normal_tokens)
-            anomalous_embeddings = self.clip.encode_text(anomalous_tokens)
+            normal_embeddings = self.clip.encode_text(normal_tokens.to(device))
+            anomalous_embeddings = self.clip.encode_text(anomalous_tokens.to(device))
         # average prompt embeddings
         normal_embeddings = torch.mean(normal_embeddings, dim=0, keepdim=True)
         anomalous_embeddings = torch.mean(anomalous_embeddings, dim=0, keepdim=True)