hpcaitech · flybird11111 · Aug 6, 2024 · Jun 24, 2024 · Jun 24, 2024 · Jun 24, 2024
diff --git a/.compatibility b/.compatibility
@@ -1 +1,3 @@
 2.1.0-12.1.0
+2.2.2-12.1.0
+2.3.0-12.1.0
diff --git a/.github/workflows/compatiblity_test_on_dispatch.yml b/.github/workflows/compatiblity_test_on_dispatch.yml
@@ -55,41 +55,27 @@ jobs:
     steps:
       - name: Install dependencies
         run: |
-          pip install -U pip setuptools==68.2.2 wheel --user
-      - uses: actions/checkout@v2
-        with:
-          repository: hpcaitech/TensorNVMe
-          ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
-          path: TensorNVMe
-      - name: Install tensornvme
-        run: |
-          cd TensorNVMe
           apt update && apt install -y cmake
-          pip install -r requirements.txt
-          DISABLE_URING=1 pip install -v .
+          pip install -U pip setuptools==68.2.2 wheel --user
+
       - uses: actions/checkout@v2
         with:
           ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
-      - name: Download cub for CUDA 10.2
-        run: |
-          CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
 
-          # check if it is CUDA 10.2
-          # download cub
-          if [ "$CUDA_VERSION" = "10.2" ]; then
-            wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
-            unzip 1.8.0.zip
-            cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
-          fi
       - name: Install Colossal-AI
         run: |
           BUILD_EXT=1 pip install -v .
-          pip install -r requirements/requirements-test.txt
+          pip install --no-cache-dir -r requirements/requirements-test.txt
+
+      - name: Install tensornvme
+        run: |
+          DISABLE_URING=1 pip install -v git+https://github.com/hpcaitech/TensorNVMe.git
+
       - name: Unit Testing
         run: |
           PYTHONPATH=$PWD pytest --durations=0 tests
         env:
           DATA: /data/scratch/cifar-10
-          LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
+          LD_LIBRARY_PATH: /github/home/.tensornvme/lib
           LLAMA_PATH: /data/scratch/llama-tiny
           MOE_TENSOR_PATH: /data/scratch/moe_tensors
diff --git a/.github/workflows/compatiblity_test_on_pr.yml b/.github/workflows/compatiblity_test_on_pr.yml
@@ -49,42 +49,27 @@ jobs:
     steps:
       - name: Install dependencies
         run: |
-          pip install -U pip setuptools==68.2.2 wheel --user
-      - uses: actions/checkout@v2
-        with:
-          repository: hpcaitech/TensorNVMe
-          ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
-          path: TensorNVMe
-      - name: Install tensornvme
-        run: |
-          cd TensorNVMe
           apt update && apt install -y cmake
-          pip install -r requirements.txt
-          DISABLE_URING=1 pip install -v .
+          pip install -U pip setuptools==68.2.2 wheel --user
+
       - uses: actions/checkout@v2
         with:
           ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
-      - name: Download cub for CUDA 10.2
-        run: |
-          CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
-
-          # check if it is CUDA 10.2
-          # download cub
-          if [ "$CUDA_VERSION" = "10.2" ]; then
-            wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
-            unzip 1.8.0.zip
-            cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
-          fi
 
       - name: Install Colossal-AI
         run: |
           BUILD_EXT=1 pip install -v .
-          pip install -r requirements/requirements-test.txt
+          pip install --no-cache-dir -r requirements/requirements-test.txt
+
+      - name: Install tensornvme
+        run: |
+          DISABLE_URING=1 pip install -v git+https://github.com/hpcaitech/TensorNVMe.git
+
       - name: Unit Testing
         run: |
           PYTHONPATH=$PWD pytest --durations=0 tests
         env:
           DATA: /data/scratch/cifar-10
-          LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
+          LD_LIBRARY_PATH: /github/home/.tensornvme/lib
           LLAMA_PATH: /data/scratch/llama-tiny
           MOE_TENSOR_PATH: /data/scratch/moe_tensors
diff --git a/.github/workflows/compatiblity_test_on_schedule.yml b/.github/workflows/compatiblity_test_on_schedule.yml
@@ -43,47 +43,28 @@ jobs:
     steps:
       - name: Install dependencies
         run: |
+          apt update && apt install -y cmake
           pip install -U pip setuptools==68.2.2 wheel --user
 
-      - uses: actions/checkout@v2
-        with:
-          repository: hpcaitech/TensorNVMe
-          ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
-          path: TensorNVMe
-
-      - name: Install tensornvme
-        run: |
-          cd TensorNVMe
-          apt update && apt install -y cmake
-          pip install -r requirements.txt
-          DISABLE_URING=1 pip install -v .
       - uses: actions/checkout@v2
         with:
           ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
 
-      - name: Download cub for CUDA 10.2
-        run: |
-          CUDA_VERSION=$(nvcc -V | awk -F ',| ' '/release/{print $6}')
-
-          # check if it is CUDA 10.2
-          # download cub
-          if [ "$CUDA_VERSION" = "10.2" ]; then
-            wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip
-            unzip 1.8.0.zip
-            cp -r cub-1.8.0/cub/ colossalai/kernel/cuda_native/csrc/kernels/include/
-          fi
-
       - name: Install Colossal-AI
         run: |
           BUILD_EXT=1 pip install -v .
-          pip install -r requirements/requirements-test.txt
+          pip install --no-cache-dir -r requirements/requirements-test.txt
+
+      - name: Install tensornvme
+        run: |
+          DISABLE_URING=1 pip install -v git+https://github.com/hpcaitech/TensorNVMe.git
 
       - name: Unit Testing
         run: |
           PYTHONPATH=$PWD pytest --durations=0 tests
         env:
           DATA: /data/scratch/cifar-10
-          LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
+          LD_LIBRARY_PATH: /github/home/.tensornvme/lib
           LLAMA_PATH: /data/scratch/llama-tiny
           MOE_TENSOR_PATH: /data/scratch/moe_tensors
 

diff --git a/.github/workflows/run_chatgpt_examples.yml b/.github/workflows/run_chatgpt_examples.yml
@@ -52,6 +52,7 @@ jobs:
           mkdir sft_data
           mkdir prompt_data
           mkdir preference_data
+          mkdir kto_data
           ./tests/test_data_preparation.sh
           ./tests/test_train.sh
         env:
@@ -61,3 +62,4 @@ jobs:
           SFT_DATASET: ./sft_data
           PROMPT_DATASET: ./prompt_data
           PREFERENCE_DATASET: ./preference_data
+          KTO_DATASET: ./kto_data
diff --git a/applications/Colossal-LLaMA/prepare_sft_dataset.py b/applications/Colossal-LLaMA/prepare_sft_dataset.py
@@ -10,7 +10,7 @@
 import os
 from multiprocessing import cpu_count
 
-from colossal_llama.dataset.conversation import LLaMA2_Conv
+from colossal_llama.dataset.conversation import LLaMA2_Conv, LLaMA3_Conv
 from colossal_llama.dataset.spliced_and_tokenized_dataset import supervised_tokenize_sft
 from datasets import dataset_dict, load_dataset
 from transformers import AddedToken, AutoTokenizer
@@ -75,6 +75,8 @@ def main():
     # Prepare to the tokenizer.
     tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_dir)
 
+    default_conversation = LLaMA3_Conv
+
     # Fix </s> split issue: https://github.com/huggingface/transformers/issues/23833
     if args.llama_version == 2:
         tokenizer.add_tokens(AddedToken("</s>", normalized=False, special=True), special_tokens=True)

diff --git a/applications/Colossal-LLaMA/train.py b/applications/Colossal-LLaMA/train.py
@@ -128,6 +128,12 @@ def main() -> None:
     parser.add_argument("--zero", type=int, default=1)
     parser.add_argument("--pad_token", choices=["eos", "unk"], default="eos")
     parser.add_argument("--padding_mode", choices=["max_length", "longest"], default="max_length")
+    parser.add_argument(
+        "--skip_save_each_epoch",
+        action="store_true",
+        default=False,
+        help="skip saving the model checkpoint after each epoch is completed.",
+    )
     args = parser.parse_args()
 
     with open(args.config_file, "w") as f:
@@ -370,11 +376,17 @@ def main() -> None:
                     )
                 total_loss.fill_(0.0)
                 pbar.update()
+
             # Save modeling.
 
-            if (args.save_interval > 0 and (step + 1) % (args.save_interval * args.accumulation_steps) == 0) or (
-                step + 1
-            ) == len(dataloader):
+            save_model_condition = (
+                args.save_interval > 0 and (step + 1) % (args.save_interval * args.accumulation_steps) == 0
+            )
+
+            if not args.skip_save_each_epoch:
+                save_model_condition = save_model_condition or (step + 1) == len(dataloader)
+
+            if save_model_condition:
                 coordinator.print_on_master("\nStart saving model checkpoint with running states")
 
                 if args.use_neft:

diff --git a/applications/ColossalChat/.gitignore b/applications/ColossalChat/.gitignore
@@ -146,6 +146,9 @@ docs/.build
 examples/wandb/
 examples/logs/
 examples/output/
+examples/training_scripts/logs
+examples/training_scripts/wandb
+examples/training_scripts/output
 
 examples/awesome-chatgpt-prompts/
 temp/

diff --git a/applications/ColossalChat/README.md b/applications/ColossalChat/README.md
@@ -23,6 +23,10 @@
   - [Open QA](#open-qa)
   - [Limitation for LLaMA-finetuned models](#limitation)
   - [Limitation of dataset](#limitation)
+- [Alternative Option For RLHF: DPO](#alternative-option-for-rlhf-direct-preference-optimization)
+- [Alternative Option For RLHF: SimPO](#alternative-option-for-rlhf-simple-preference-optimization-simpo)
+- [Alternative Option For RLHF: ORPO](#alternative-option-for-rlhf-odds-ratio-preference-optimization-orpo)
+- [Alternative Option For RLHF: KTO](#alternative-option-for-rlhf-kahneman-tversky-optimization-kto)
 - [FAQ](#faq)
   - [How to save/load checkpoint](#faq)
   - [How to train with limited resources](#faq)
@@ -135,17 +139,15 @@ The first step in Stage 1 is to collect a dataset of human demonstrations of the
     {"messages":
       [
         {
-          "from": "human",
+          "from": "user",
           "content": "what are some pranks with a pen i can do?"
         },
         {
           "from": "assistant",
           "content": "Are you looking for practical joke ideas?"
         },
-        ...
       ]
     },
-    ...
 ]
 ```
 
@@ -171,23 +173,20 @@ Below shows the preference dataset format used in training the reward model.
           "from": "human",
           "content": "Introduce butterflies species in Oregon."
         }
-      ]
+      ],
       "chosen": [
         {
           "from": "assistant",
           "content": "About 150 species of butterflies live in Oregon, with about 100 species are moths..."
         },
-        ...
       ],
       "rejected": [
         {
           "from": "assistant",
           "content": "Are you interested in just the common butterflies?  There are a few common ones which will be easy to find..."
         },
-        ...
       ]
     },
-    ...
 ]
 ```
 
@@ -216,7 +215,6 @@ PPO uses two kind of training data--- the prompt data and the sft data (optional
           "from": "human",
           "content": "what are some pranks with a pen i can do?"
         }
-        ...
       ]
     },
 ]
@@ -262,9 +260,8 @@ experience buffer size
 = train_batch_size * accumulation_steps * num_tp_group
 ```
 
-## Alternative Option For RLHF: Direct Preference Optimization
-
-For those seeking an alternative to Reinforcement Learning from Human Feedback (RLHF), Direct Preference Optimization (DPO) presents a compelling option. DPO, as detailed in the paper (available at [https://arxiv.org/abs/2305.18290](https://arxiv.org/abs/2305.18290)), DPO offers an low-cost way to perform RLHF and usually request less computation resources compares to PPO.
+## Alternative Option For RLHF: Direct Preference Optimization (DPO)
+For those seeking an alternative to Reinforcement Learning from Human Feedback (RLHF), Direct Preference Optimization (DPO) presents a compelling option. DPO, as detailed in this [paper](https://arxiv.org/abs/2305.18290), DPO offers an low-cost way to perform RLHF and usually request less computation resources compares to PPO. Read this [README](./examples/README.md) for more information.
 
 ### DPO Training Stage1 - Supervised Instructs Tuning
 
@@ -277,6 +274,15 @@ For DPO training, you only need the preference dataset. Please follow the instru
 #### Step 2: Training
 You can run the [train_dpo.sh](./examples/training_scripts/train_dpo.sh) to start DPO training. More detais can be found in [example guideline](./examples/README.md).
 
+## Alternative Option For RLHF: Simple Preference Optimization (SimPO)
+Simple Preference Optimization (SimPO) from this [paper](https://arxiv.org/pdf/2405.14734) is similar to DPO but it abandons the use of the reference model, which makes the training more efficient. It also adds a reward shaping term called target reward margin to enhance training stability. It also use length normalization to better align with the inference process. Read this [README](./examples/README.md) for more information.
+
+## Alternative Option For RLHF: Odds Ratio Preference Optimization (ORPO)
+Odds Ratio Preference Optimization (ORPO) from this [paper](https://arxiv.org/pdf/2403.07691) is a reference model free alignment method that use a mixture of SFT loss and a reinforcement leanring loss calculated based on odds-ratio-based implicit reward to makes the training more efficient and stable. Read this [README](./examples/README.md) for more information.
+
+## Alternative Option For RLHF: Kahneman-Tversky Optimization (KTO)
+We support the method introduced in the paper [KTO:Model Alignment as Prospect Theoretic Optimization](https://arxiv.org/pdf/2402.01306) (KTO). Which is a aligment method that directly maximize "human utility" of generation results. Read this [README](./examples/README.md) for more information.
+
 ### Inference Quantization and Serving - After Training
 
 We provide an online inference server and a benchmark. We aim to run inference on single GPU, so quantization is essential when using large models.
@@ -441,20 +447,6 @@ If you only have a single 24G GPU. Generally, using lora and "zero2-cpu" will be
 If you have multiple GPUs each has very limited VRAM, say 8GB. You can try the `3d` for the plugin option, which supports tensor parellelism, set `--tp` to the number of GPUs that you have.
 </details>
 
-## The Plan
-
-- [x] implement PPO fine-tuning
-- [x] implement training reward model
-- [x] support LoRA
-- [x] support inference
-- [x] support llama from [facebook](https://github.com/facebookresearch/llama)
-- [x] implement PPO-ptx fine-tuning
-- [x] support flash-attention
-- [x] implement DPO fine-tuning
-- [ ] integrate with Ray
-- [ ] support more RL paradigms, like Implicit Language Q-Learning (ILQL),
-- [ ] support chain-of-thought by [langchain](https://github.com/hwchase17/langchain)
-
 ### Real-time progress
 
 You will find our progress in github [project broad](https://github.com/orgs/hpcaitech/projects/17/views/1).
@@ -522,7 +514,7 @@ Coati is developed by ColossalAI Team:
 - [Fazzie](https://fazzie-key.cool/about/index.html) Contributing to the algorithm and development for SFT.
 - [ofey404](https://github.com/ofey404) Contributing to both front-end and back-end development.
 - [Wenhao Chen](https://github.com/CWHer) Contributing to subsequent code enhancements and performance improvements.
-- [Anbang Ye](https://github.com/YeAnbang) Contributing to the refactored version with updated acceleration framework, LoRA, DPO and PPO.
+- [Anbang Ye](https://github.com/YeAnbang) Contributing to the refactored PPO version with updated acceleration framework. Add support for DPO, SimPO, ORPO.
 
 The PhD student from [(HPC-AI) Lab](https://ai.comp.nus.edu.sg/) also contributed a lot to this project.
 - [Zangwei Zheng](https://github.com/zhengzangw)
@@ -572,6 +564,36 @@ We also appreciate the valuable suggestions provided by [Jian Hu](https://github
   journal = {GitHub repository},
   howpublished = {\url{https://github.com/XueFuzhao/InstructionWild}},
 }
+
+@misc{meng2024simposimplepreferenceoptimization,
+      title={SimPO: Simple Preference Optimization with a Reference-Free Reward},
+      author={Yu Meng and Mengzhou Xia and Danqi Chen},
+      year={2024},
+      eprint={2405.14734},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2405.14734},
+}
+
+@misc{rafailov2023directpreferenceoptimizationlanguage,
+      title={Direct Preference Optimization: Your Language Model is Secretly a Reward Model},
+      author={Rafael Rafailov and Archit Sharma and Eric Mitchell and Stefano Ermon and Christopher D. Manning and Chelsea Finn},
+      year={2023},
+      eprint={2305.18290},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/2305.18290},
+}
+
+@misc{hong2024orpomonolithicpreferenceoptimization,
+      title={ORPO: Monolithic Preference Optimization without Reference Model},
+      author={Jiwoo Hong and Noah Lee and James Thorne},
+      year={2024},
+      eprint={2403.07691},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2403.07691},
+}
 ```
 
 ## Licenses