From 52707c6328b25961b27be9af443e74f8e5883d87 Mon Sep 17 00:00:00 2001 From: "Zian(Andy) Zheng" <62330719+Orion-Zheng@users.noreply.github.com> Date: Fri, 13 Oct 2023 16:46:33 +0800 Subject: [PATCH] Update flash_attention_patch.py To be compatible with the new change in the Transformers library, where a new argument 'padding_mask' was added to forward function of attention layer. https://github.com/huggingface/transformers/pull/25598 --- .../colossal_llama2/utils/flash_attention_patch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/applications/Colossal-LLaMA-2/colossal_llama2/utils/flash_attention_patch.py b/applications/Colossal-LLaMA-2/colossal_llama2/utils/flash_attention_patch.py index 6c58c59307a6..111659b2d928 100644 --- a/applications/Colossal-LLaMA-2/colossal_llama2/utils/flash_attention_patch.py +++ b/applications/Colossal-LLaMA-2/colossal_llama2/utils/flash_attention_patch.py @@ -65,6 +65,7 @@ def attention_forward( past_key_value: Optional[Tuple[torch.Tensor]] = None, output_attentions: bool = False, use_cache: bool = False, + **kwargs ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: """ Re-define LLaMA-2 `LlamaAttention` forward method using flash-attention.