🚀 test(examples): 3 LLMs examples (#130)

owkin · Jul 15, 2024 · 723b021 · 723b021
1 parent c3a8ade
commit 723b021
Show file tree

Hide file tree

Showing 21 changed files with 1,894 additions and 882 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
 
 ## [unreleased]
 
+🚀 **examples**: 3 LLMs examples ([#130](https://github.com/owkin/GrAIdient/pull/130))\
 📚 **docs:** LLM doc & split tests ([129](https://github.com/owkin/GrAIdient/pull/129))\
 ✨ **layer_seq:** LLM generate ([128](https://github.com/owkin/GrAIdient/pull/128))\
 ✨ **layer_seq:** MultiplySeq, SiLU & LLM test ([127](https://github.com/owkin/GrAIdient/pull/127))\

diff --git a/Docs/Examples/EXAMPLES.md b/Docs/Examples/EXAMPLES.md
@@ -12,4 +12,4 @@ The following examples are currently available:
 - [VGG](VGG.md)
 - [Vision Transformer](VisionTransformer.md)
 - [Auto Encoder](AutoEncoder.md)
-- [NLP](NLP.md)
+- [LLM](LLM.md)
diff --git a/Docs/Examples/NLP.md → Docs/Examples/LLM.md b/Docs/Examples/NLP.md → Docs/Examples/LLM.md
@@ -1,7 +1,7 @@
-#  🚀 NLP Example
+#  🚀 LLM Example
 
 This is the documentation for running 
-[LLMs](../../Tests/GrAIExamples/NLPExample.swift) on the GPU.
+[LLMs](../../Tests/GrAIExamples/LLMExample.swift) on the GPU.
 
 ## Setup
 
@@ -17,13 +17,17 @@ pip install -e .
 
 Then: 
 - download weights from 
-[MistralAI](https://docs.mistral.ai/getting-started/open_weight_models/).
-- Update `_modelPath` in the 
-[NLPExample](../../Tests/GrAIExamples/NLPExample.swift) file with the 
+[MistralAI](https://docs.mistral.ai/getting-started/open_weight_models/) 
+and / or
+[Llama](https://llama.meta.com/llama-downloads/) 
+- Update `_modelPathMistral`, `_modelPathLlama2`, `_modelPathLlama3` in the 
+[LLMExample](../../Tests/GrAIExamples/LLMExample.swift) file with the 
 previous downloaded weights. 
 - Optionnally update `_prompt`.
-- Rename `_testGenerate` into `testGenerate`. 
-- Run the test.
+- Rename `_testGenerateMistral`, `_testGenerateLlama2` and `_testGenerateLlama3` 
+into 
+`testGenerateMistral`, `testGenerateLlama2` and `testGenerateLlama3`. 
+- Run the tests.
 
 It is finally possible to clean the environment 🌍
 
@@ -34,12 +38,14 @@ conda env remove --name graiexamples
 
 ## Steps
 
-1. Generate text from a prompt.   
+1. Generate text from a prompt with Mistral 7B Instruct model.
+1. Generate text from a prompt with Llama 2 7B Chat model.
+1. Generate text from a prompt with Llama 3 8B Instruct model.  
 
 ## Further tests
 
 Further tests are available at 
-[NLPExampleTests](../../Tests/GrAIExamples/NLPExampleTests.swift). 
+[LLMExampleTests](../../Tests/GrAIExamples/LLMExampleTests.swift). 
 In order to run them, rename 
 `_testPredict1` and `_testPredict32` into `testPredict1` and `testPredict32`.
 

diff --git a/Tests/GrAIExamples/Base/python_lib/__init__.py b/Tests/GrAIExamples/Base/python_lib/__init__.py
@@ -5,29 +5,53 @@
     next_data_CIFAR,
 )
 from python_lib.weight import (
+    extract_state_key,
     load_simple_auto_encoder_weights,
-    load_llm_weights,
+    load_mistral_state,
+    load_llama_state,
 )
 from python_lib.trainer import (
     train_simple_auto_encoder,
     step_simple_auto_encoder,
 )
-from python_lib.nlp.generate import (
-    predict,
-    encode,
-    decode,
+from python_lib.nlp.mistral.generate import (
+    predict_mistral,
+    load_mistral_tokenizer,
+    encode_mistral,
+    decode_mistral,
+)
+from python_lib.nlp.llama2.generate import (
+    load_llama2_tokenizer,
+    encode_llama2,
+    decode_llama2,
+)
+from python_lib.nlp.llama3.generate import (
+    load_llama3_tokenizer,
+    load_llama3_formatter,
+    encode_llama3,
+    decode_llama3
 )
 
 __all__ = [
     "load_CIFAR_train",
     "load_CIFAR_test",
     "iter_CIFAR",
     "next_data_CIFAR",
+    "extract_state_key",
     "load_simple_auto_encoder_weights",
-    "load_llm_weights",
+    "load_mistral_state",
+    "load_llama_state",
     "train_simple_auto_encoder",
     "step_simple_auto_encoder",
-    "predict",
-    "encode",
-    "decode",
+    "predict_mistral",
+    "load_mistral_tokenizer",
+    "encode_mistral",
+    "decode_mistral",
+    "load_llama2_tokenizer",
+    "encode_llama2",
+    "decode_llama2",
+    "load_llama3_tokenizer",
+    "load_llama3_formatter",
+    "encode_llama3",
+    "decode_llama3",
 ]
diff --git a/Tests/GrAIExamples/Base/python_lib/nlp/generate.py b/Tests/GrAIExamples/Base/python_lib/nlp/generate.py
@@ -1,14 +1,10 @@
-import json
 import torch
-import numpy as np
-from pathlib import Path
-from typing import Generator, List, Optional
+from typing import Generator, Optional
 
-from python_lib.nlp.tokenizer import Tokenizer
-from python_lib.nlp.model import Transformer, TransformerArgs
+from python_lib.nlp.model import Transformer
 
 
-def _predict_no_cache(
+def predict_no_cache(
     prompt: torch.Tensor,
     model: Transformer,
     temp: float = 0.0,
@@ -47,7 +43,7 @@ def sample(logits: torch.Tensor) -> torch.Tensor:
     return sample(logits)
 
 
-def _generate_with_cache(
+def generate_with_cache(
     prompt: torch.Tensor, model: Transformer, temp: float = 0.0
 ) -> Generator[torch.Tensor, None, None]:
     """
@@ -84,209 +80,3 @@ def sample(logits: torch.Tensor) -> torch.Tensor:
         logits = logits[:, -1, :]
         y = sample(logits)
         yield y
-
-
-def _generate(
-    prompt: str,
-    model_path: str,
-    temp: float = 0,
-    max_tokens: int = 128
-):
-    """
-    Generate text based on the given prompt and model.
-
-    Parameters
-    ----------
-    prompt: torch.Tensor
-        The input prompt.
-    model_path: str
-        Path to the model on the disk.
-    temp: float
-        The temperature for sampling. If temp is 0, use max sampling.
-    max_tokens: int
-        The maximal number of generated tokens.
-    """
-    state = torch.load(str(Path(model_path) / "consolidated.00.pth"))
-    tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))
-
-    with open(Path(model_path) / "params.json", "r") as f:
-        config = json.loads(f.read())
-        config.pop("sliding_window", None)
-        config.pop("model_type", None)
-        model_args = TransformerArgs(**config)
-
-    model = Transformer(model_args)
-    model.load_state_dict(state)
-    model.to("mps")
-
-    print(prompt, end="", flush=True)
-    prompt = torch.tensor(
-        tokenizer.encode(prompt), dtype=torch.long, device="mps"
-    )
-
-    tokens = []
-    skip = 0
-    for token, n in zip(
-        _generate_with_cache(prompt, model, temp),
-        range(max_tokens),
-    ):
-        if token == tokenizer.eos_id:
-            break
-
-        tokens.append(token.item())
-        s = tokenizer.decode(tokens)
-        if len(s) - skip > 1:
-            print(s[skip:-1], end="", flush=True)
-            skip = len(s) - 1
-
-    print(tokenizer.decode(tokens)[skip:], flush=True)
-    print("=" * 10)
-
-    if len(tokens) == 0:
-        print("No tokens generated for this prompt.")
-        return
-
-
-def _predict(
-    prompt: str,
-    model_path: str,
-    temp: float = 0,
-    n_layers: Optional[int] = None
-):
-    """
-    Predict text based on the given prompt and model.
-
-    Parameters
-    ----------
-    prompt: torch.Tensor
-        The input prompt.
-    model_path: str
-        Path to the model on the disk.
-    temp: float
-        The temperature for sampling. If temp is 0, use max sampling.
-    n_layers: int
-        Modifier of the number of Transformer blocks.
-    """
-    state = torch.load(str(Path(model_path) / "consolidated.00.pth"))
-    tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))
-
-    with open(Path(model_path) / "params.json", "r") as f:
-        config = json.loads(f.read())
-        config.pop("sliding_window", None)
-        config.pop("model_type", None)
-        model_args = TransformerArgs(**config)
-
-    model = Transformer(model_args)
-    model.load_state_dict(state)
-    model.to("mps")
-
-    print(prompt, end="", flush=True)
-    prompt = torch.tensor(
-        tokenizer.encode(prompt), dtype=torch.long, device="mps"
-    )
-
-    tokens = _predict_no_cache(
-        prompt, model, temp, n_layers
-    ).squeeze(dim=0).cpu().numpy().tolist()
-    print(tokenizer.decode(tokens))
-
-
-def predict(
-    prompt: str,
-    model_path: str,
-    n_layers: Optional[int] = None
-) -> np.ndarray:
-    """
-    Predict text based on the given prompt and model.
-
-    Parameters
-    ----------
-    prompt: torch.Tensor
-        The input prompt.
-    model_path: str
-        Path to the model on the disk.
-    n_layers: int
-        Modifier of the number of Transformer blocks.
-    """
-    state = torch.load(str(Path(model_path) / "consolidated.00.pth"))
-    tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))
-
-    with open(Path(model_path) / "params.json", "r") as f:
-        config = json.loads(f.read())
-        config.pop("sliding_window", None)
-        config.pop("model_type", None)
-        model_args = TransformerArgs(**config)
-
-    model = Transformer(model_args)
-    model.load_state_dict(state)
-    model.to("mps")
-
-    prompt = torch.tensor(
-        tokenizer.encode(prompt), dtype=torch.long, device="mps"
-    )
-    out, _ = model(prompt[None], n_layers=n_layers)
-    return out.detach().cpu().numpy().flatten()
-
-
-def encode(
-    prompt: str,
-    model_path: str
-) -> List[int]:
-    """
-    Encode text.
-
-    Parameters
-    ----------
-    prompt: torch.Tensor
-        The input prompt.
-    model_path: str
-        Path to the model on the disk.
-    """
-    tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))
-    return tokenizer.encode(prompt)
-
-
-def decode(
-    prompt: List[int],
-    model_path: str
-) -> str:
-    """
-    Decode text.
-
-    Parameters
-    ----------
-    prompt: [int]
-        The input prompt.
-    model_path: str
-        Path to the model on the disk.
-    """
-    tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))
-    return tokenizer.decode(prompt)
-
-
-if __name__ == "__main__":
-    model_path = ""
-    prompt = "How do you do?"
-
-    _generate(
-        prompt="How do you do?",
-        model_path=model_path
-    )
-    prompt = encode(
-        prompt=prompt,
-        model_path=model_path
-    )
-    prompt = decode(
-        prompt=prompt,
-        model_path=model_path
-    )
-    _predict(
-        prompt=prompt,
-        model_path=model_path,
-        n_layers=None
-    )
-    predict(
-        prompt=prompt,
-        model_path=model_path,
-        n_layers=1
-    )
diff --git a/Tests/GrAIExamples/Base/python_lib/nlp/llama2/__init__.py b/Tests/GrAIExamples/Base/python_lib/nlp/llama2/__init__.py