From 6ecf3189e00a1e8e737a78b6d10e1d7006e050a2 Mon Sep 17 00:00:00 2001
From: alwqx <kenan3015@gmail.com>
Date: Thu, 2 May 2024 23:56:41 +0800
Subject: [PATCH 1/2] chore: fix typo in llama.cpp (#7032)

Co-authored-by: Jared Van Bortel <jared@nomic.ai>
---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 18d6297ce1dfd..18b49ec20909e 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2359,7 +2359,7 @@ static bool llama_kv_cache_init(
     cache.recurrent = model.arch == LLM_ARCH_MAMBA;
     cache.v_trans   = !cparams.flash_attn;
 
-    // TODO: support mixed reccurent Transformer architectues
+    // TODO: support mixed recurrent Transformer architectures
     // NOTE: (!a || b) is a logical implication (a -> b)
     GGML_ASSERT(!cache.recurrent || n_embd_k_gqa == hparams.n_embd_k_s());
     GGML_ASSERT(!cache.recurrent || n_embd_v_gqa == hparams.n_embd_v_s());

From 60325fa56f61c228464c9f065db3aa6a61f2156e Mon Sep 17 00:00:00 2001
From: Bartowski <ckealty1182@gmail.com>
Date: Thu, 2 May 2024 19:49:09 -0400
Subject: [PATCH 2/2] Remove .attention from skipped tensors to match more
 accurately (#7051)

---
 convert-hf-to-gguf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 2f146d7302a78..612aea173644b 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -1427,7 +1427,7 @@ def write_tensors(self):
         experts = dict()
         for name, data_torch in self.get_tensors():
             # we don't need these
-            if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")):
+            if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
                 continue
 
             old_dtype = data_torch.dtype