From 6ecf3189e00a1e8e737a78b6d10e1d7006e050a2 Mon Sep 17 00:00:00 2001 From: alwqx Date: Thu, 2 May 2024 23:56:41 +0800 Subject: [PATCH 1/2] chore: fix typo in llama.cpp (#7032) Co-authored-by: Jared Van Bortel --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 18d6297ce1dfd..18b49ec20909e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2359,7 +2359,7 @@ static bool llama_kv_cache_init( cache.recurrent = model.arch == LLM_ARCH_MAMBA; cache.v_trans = !cparams.flash_attn; - // TODO: support mixed reccurent Transformer architectues + // TODO: support mixed recurrent Transformer architectures // NOTE: (!a || b) is a logical implication (a -> b) GGML_ASSERT(!cache.recurrent || n_embd_k_gqa == hparams.n_embd_k_s()); GGML_ASSERT(!cache.recurrent || n_embd_v_gqa == hparams.n_embd_v_s()); From 60325fa56f61c228464c9f065db3aa6a61f2156e Mon Sep 17 00:00:00 2001 From: Bartowski Date: Thu, 2 May 2024 19:49:09 -0400 Subject: [PATCH 2/2] Remove .attention from skipped tensors to match more accurately (#7051) --- convert-hf-to-gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 2f146d7302a78..612aea173644b 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1427,7 +1427,7 @@ def write_tensors(self): experts = dict() for name, data_torch in self.get_tensors(): # we don't need these - if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")): + if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): continue old_dtype = data_torch.dtype