Skip to content

Commit

Permalink
Fixed issues
Browse files Browse the repository at this point in the history
  • Loading branch information
dragnil1 committed Apr 15, 2024
1 parent 6c80b3c commit cc8d529
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 18 deletions.
36 changes: 20 additions & 16 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4076,24 +4076,26 @@ static void llm_load_vocab(
if (add_space_prefix_keyidx != -1) {
vocab.add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx);
} // The default value of add_space_prefix is true.
} else if (tokenizer_name == "bert") {
vocab.type = LLAMA_VOCAB_TYPE_WPM;

// default special tokens
vocab.special_bos_id = -1;
vocab.special_eos_id = -1;
vocab.special_unk_id = 100;
vocab.special_sep_id = 102;
vocab.special_pad_id = 0;
vocab.special_cls_id = 101;
vocab.special_mask_id = 103;
vocab.add_space_prefix = false;
} else {
if (tokenizer_name == "gpt2") {
vocab.type = LLAMA_VOCAB_TYPE_BPE;
} else if (tokenizer_name == "deepseek_coder") {
vocab.type = LLAMA_VOCAB_TYPE_DEEPSEEKCODER;
} else if (tokenizer_name == "deepseek_llm") {
vocab.type = LLAMA_VOCAB_TYPE_DEEPSEEKLLM;
} else if (tokenizer_name == "bert") {
vocab.type = LLAMA_VOCAB_TYPE_WPM;

// default special tokens
vocab.special_bos_id = 101;
vocab.special_eos_id = 102;
vocab.special_unk_id = 100;
vocab.special_sep_id = -1;
vocab.special_pad_id = -1;
vocab.add_space_prefix = false;
} else {
} else {
LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_name.c_str());
LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);
vocab.type = LLAMA_VOCAB_TYPE_SPM;
Expand Down Expand Up @@ -4125,11 +4127,13 @@ static void llm_load_vocab(
}

// default special tokens
vocab.special_bos_id = 11;
vocab.special_eos_id = 11;
vocab.special_unk_id = -1;
vocab.special_sep_id = -1;
vocab.special_pad_id = -1;
vocab.special_bos_id = 11;
vocab.special_eos_id = 11;
vocab.special_unk_id = -1;
vocab.special_sep_id = -1;
vocab.special_pad_id = -1;
vocab.special_cls_id = -1;
vocab.special_mask_id = -1;
}
}

Expand Down
Loading

0 comments on commit cc8d529

Please sign in to comment.