Skip to content

Commit

Permalink
🚀 test(examples): 3 LLMs examples (#130)
Browse files Browse the repository at this point in the history
  • Loading branch information
jean-francoisreboud committed Jul 15, 2024
1 parent c3a8ade commit 723b021
Show file tree
Hide file tree
Showing 21 changed files with 1,894 additions and 882 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.

## [unreleased]

🚀 **examples**: 3 LLMs examples ([#130](https://github.com/owkin/GrAIdient/pull/130))\
📚 **docs:** LLM doc & split tests ([129](https://github.com/owkin/GrAIdient/pull/129))\
**layer_seq:** LLM generate ([128](https://github.com/owkin/GrAIdient/pull/128))\
**layer_seq:** MultiplySeq, SiLU & LLM test ([127](https://github.com/owkin/GrAIdient/pull/127))\
Expand Down
2 changes: 1 addition & 1 deletion Docs/Examples/EXAMPLES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ The following examples are currently available:
- [VGG](VGG.md)
- [Vision Transformer](VisionTransformer.md)
- [Auto Encoder](AutoEncoder.md)
- [NLP](NLP.md)
- [LLM](LLM.md)
24 changes: 15 additions & 9 deletions Docs/Examples/NLP.md → Docs/Examples/LLM.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# 🚀 NLP Example
# 🚀 LLM Example

This is the documentation for running
[LLMs](../../Tests/GrAIExamples/NLPExample.swift) on the GPU.
[LLMs](../../Tests/GrAIExamples/LLMExample.swift) on the GPU.

## Setup

Expand All @@ -17,13 +17,17 @@ pip install -e .

Then:
- download weights from
[MistralAI](https://docs.mistral.ai/getting-started/open_weight_models/).
- Update `_modelPath` in the
[NLPExample](../../Tests/GrAIExamples/NLPExample.swift) file with the
[MistralAI](https://docs.mistral.ai/getting-started/open_weight_models/)
and / or
[Llama](https://llama.meta.com/llama-downloads/)
- Update `_modelPathMistral`, `_modelPathLlama2`, `_modelPathLlama3` in the
[LLMExample](../../Tests/GrAIExamples/LLMExample.swift) file with the
previous downloaded weights.
- Optionnally update `_prompt`.
- Rename `_testGenerate` into `testGenerate`.
- Run the test.
- Rename `_testGenerateMistral`, `_testGenerateLlama2` and `_testGenerateLlama3`
into
`testGenerateMistral`, `testGenerateLlama2` and `testGenerateLlama3`.
- Run the tests.

It is finally possible to clean the environment 🌍

Expand All @@ -34,12 +38,14 @@ conda env remove --name graiexamples

## Steps

1. Generate text from a prompt.
1. Generate text from a prompt with Mistral 7B Instruct model.
1. Generate text from a prompt with Llama 2 7B Chat model.
1. Generate text from a prompt with Llama 3 8B Instruct model.

## Further tests

Further tests are available at
[NLPExampleTests](../../Tests/GrAIExamples/NLPExampleTests.swift).
[LLMExampleTests](../../Tests/GrAIExamples/LLMExampleTests.swift).
In order to run them, rename
`_testPredict1` and `_testPredict32` into `testPredict1` and `testPredict32`.

Expand Down
42 changes: 33 additions & 9 deletions Tests/GrAIExamples/Base/python_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,53 @@
next_data_CIFAR,
)
from python_lib.weight import (
extract_state_key,
load_simple_auto_encoder_weights,
load_llm_weights,
load_mistral_state,
load_llama_state,
)
from python_lib.trainer import (
train_simple_auto_encoder,
step_simple_auto_encoder,
)
from python_lib.nlp.generate import (
predict,
encode,
decode,
from python_lib.nlp.mistral.generate import (
predict_mistral,
load_mistral_tokenizer,
encode_mistral,
decode_mistral,
)
from python_lib.nlp.llama2.generate import (
load_llama2_tokenizer,
encode_llama2,
decode_llama2,
)
from python_lib.nlp.llama3.generate import (
load_llama3_tokenizer,
load_llama3_formatter,
encode_llama3,
decode_llama3
)

__all__ = [
"load_CIFAR_train",
"load_CIFAR_test",
"iter_CIFAR",
"next_data_CIFAR",
"extract_state_key",
"load_simple_auto_encoder_weights",
"load_llm_weights",
"load_mistral_state",
"load_llama_state",
"train_simple_auto_encoder",
"step_simple_auto_encoder",
"predict",
"encode",
"decode",
"predict_mistral",
"load_mistral_tokenizer",
"encode_mistral",
"decode_mistral",
"load_llama2_tokenizer",
"encode_llama2",
"decode_llama2",
"load_llama3_tokenizer",
"load_llama3_formatter",
"encode_llama3",
"decode_llama3",
]
218 changes: 4 additions & 214 deletions Tests/GrAIExamples/Base/python_lib/nlp/generate.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
import json
import torch
import numpy as np
from pathlib import Path
from typing import Generator, List, Optional
from typing import Generator, Optional

from python_lib.nlp.tokenizer import Tokenizer
from python_lib.nlp.model import Transformer, TransformerArgs
from python_lib.nlp.model import Transformer


def _predict_no_cache(
def predict_no_cache(
prompt: torch.Tensor,
model: Transformer,
temp: float = 0.0,
Expand Down Expand Up @@ -47,7 +43,7 @@ def sample(logits: torch.Tensor) -> torch.Tensor:
return sample(logits)


def _generate_with_cache(
def generate_with_cache(
prompt: torch.Tensor, model: Transformer, temp: float = 0.0
) -> Generator[torch.Tensor, None, None]:
"""
Expand Down Expand Up @@ -84,209 +80,3 @@ def sample(logits: torch.Tensor) -> torch.Tensor:
logits = logits[:, -1, :]
y = sample(logits)
yield y


def _generate(
prompt: str,
model_path: str,
temp: float = 0,
max_tokens: int = 128
):
"""
Generate text based on the given prompt and model.
Parameters
----------
prompt: torch.Tensor
The input prompt.
model_path: str
Path to the model on the disk.
temp: float
The temperature for sampling. If temp is 0, use max sampling.
max_tokens: int
The maximal number of generated tokens.
"""
state = torch.load(str(Path(model_path) / "consolidated.00.pth"))
tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))

with open(Path(model_path) / "params.json", "r") as f:
config = json.loads(f.read())
config.pop("sliding_window", None)
config.pop("model_type", None)
model_args = TransformerArgs(**config)

model = Transformer(model_args)
model.load_state_dict(state)
model.to("mps")

print(prompt, end="", flush=True)
prompt = torch.tensor(
tokenizer.encode(prompt), dtype=torch.long, device="mps"
)

tokens = []
skip = 0
for token, n in zip(
_generate_with_cache(prompt, model, temp),
range(max_tokens),
):
if token == tokenizer.eos_id:
break

tokens.append(token.item())
s = tokenizer.decode(tokens)
if len(s) - skip > 1:
print(s[skip:-1], end="", flush=True)
skip = len(s) - 1

print(tokenizer.decode(tokens)[skip:], flush=True)
print("=" * 10)

if len(tokens) == 0:
print("No tokens generated for this prompt.")
return


def _predict(
prompt: str,
model_path: str,
temp: float = 0,
n_layers: Optional[int] = None
):
"""
Predict text based on the given prompt and model.
Parameters
----------
prompt: torch.Tensor
The input prompt.
model_path: str
Path to the model on the disk.
temp: float
The temperature for sampling. If temp is 0, use max sampling.
n_layers: int
Modifier of the number of Transformer blocks.
"""
state = torch.load(str(Path(model_path) / "consolidated.00.pth"))
tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))

with open(Path(model_path) / "params.json", "r") as f:
config = json.loads(f.read())
config.pop("sliding_window", None)
config.pop("model_type", None)
model_args = TransformerArgs(**config)

model = Transformer(model_args)
model.load_state_dict(state)
model.to("mps")

print(prompt, end="", flush=True)
prompt = torch.tensor(
tokenizer.encode(prompt), dtype=torch.long, device="mps"
)

tokens = _predict_no_cache(
prompt, model, temp, n_layers
).squeeze(dim=0).cpu().numpy().tolist()
print(tokenizer.decode(tokens))


def predict(
prompt: str,
model_path: str,
n_layers: Optional[int] = None
) -> np.ndarray:
"""
Predict text based on the given prompt and model.
Parameters
----------
prompt: torch.Tensor
The input prompt.
model_path: str
Path to the model on the disk.
n_layers: int
Modifier of the number of Transformer blocks.
"""
state = torch.load(str(Path(model_path) / "consolidated.00.pth"))
tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))

with open(Path(model_path) / "params.json", "r") as f:
config = json.loads(f.read())
config.pop("sliding_window", None)
config.pop("model_type", None)
model_args = TransformerArgs(**config)

model = Transformer(model_args)
model.load_state_dict(state)
model.to("mps")

prompt = torch.tensor(
tokenizer.encode(prompt), dtype=torch.long, device="mps"
)
out, _ = model(prompt[None], n_layers=n_layers)
return out.detach().cpu().numpy().flatten()


def encode(
prompt: str,
model_path: str
) -> List[int]:
"""
Encode text.
Parameters
----------
prompt: torch.Tensor
The input prompt.
model_path: str
Path to the model on the disk.
"""
tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))
return tokenizer.encode(prompt)


def decode(
prompt: List[int],
model_path: str
) -> str:
"""
Decode text.
Parameters
----------
prompt: [int]
The input prompt.
model_path: str
Path to the model on the disk.
"""
tokenizer = Tokenizer(str(Path(model_path) / "tokenizer.model"))
return tokenizer.decode(prompt)


if __name__ == "__main__":
model_path = ""
prompt = "How do you do?"

_generate(
prompt="How do you do?",
model_path=model_path
)
prompt = encode(
prompt=prompt,
model_path=model_path
)
prompt = decode(
prompt=prompt,
model_path=model_path
)
_predict(
prompt=prompt,
model_path=model_path,
n_layers=None
)
predict(
prompt=prompt,
model_path=model_path,
n_layers=1
)
Empty file.
Loading

0 comments on commit 723b021

Please sign in to comment.