diff --git a/.cuda_ext.json b/.cuda_ext.json index eba19cf05e31..b8269f83786c 100644 --- a/.cuda_ext.json +++ b/.cuda_ext.json @@ -1,16 +1,16 @@ { "build": [ { - "torch_command": "pip install torch==1.12.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102", - "cuda_image": "hpcaitech/cuda-conda:10.2" + "torch_command": "pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121", + "cuda_image": "hpcaitech/cuda-conda:12.1" }, { - "torch_command": "pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113", - "cuda_image": "hpcaitech/cuda-conda:11.3" + "torch_command": "pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118", + "cuda_image": "hpcaitech/cuda-conda:11.8" }, { - "torch_command": "pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116", - "cuda_image": "hpcaitech/cuda-conda:11.6" + "torch_command": "pip install torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.1", + "cuda_image": "hpcaitech/cuda-conda:11.7" } ] } diff --git a/.github/workflows/compatiblity_test_on_dispatch.yml b/.github/workflows/compatiblity_test_on_dispatch.yml index 5083212993cc..a6f9582ac901 100644 --- a/.github/workflows/compatiblity_test_on_dispatch.yml +++ b/.github/workflows/compatiblity_test_on_dispatch.yml @@ -83,7 +83,7 @@ jobs: fi - name: Install Colossal-AI run: | - CUDA_EXT=1 pip install -v . + BUILD_EXT=1 pip install -v . pip install -r requirements/requirements-test.txt - name: Unit Testing run: | diff --git a/.github/workflows/compatiblity_test_on_pr.yml b/.github/workflows/compatiblity_test_on_pr.yml index cc17c66f9c3a..ede6c380a8ec 100644 --- a/.github/workflows/compatiblity_test_on_pr.yml +++ b/.github/workflows/compatiblity_test_on_pr.yml @@ -78,7 +78,7 @@ jobs: - name: Install Colossal-AI run: | - CUDA_EXT=1 pip install -v . + BUILD_EXT=1 pip install -v . pip install -r requirements/requirements-test.txt - name: Unit Testing run: | diff --git a/.github/workflows/compatiblity_test_on_schedule.yml b/.github/workflows/compatiblity_test_on_schedule.yml index 158fe751bf2e..1cf456ff62c1 100644 --- a/.github/workflows/compatiblity_test_on_schedule.yml +++ b/.github/workflows/compatiblity_test_on_schedule.yml @@ -75,7 +75,7 @@ jobs: - name: Install Colossal-AI run: | - CUDA_EXT=1 pip install -v . + BUILD_EXT=1 pip install -v . pip install -r requirements/requirements-test.txt - name: Unit Testing diff --git a/.github/workflows/cuda_ext_check_before_merge.yml b/.github/workflows/cuda_ext_check_before_merge.yml index 686f0f395c73..14f53bd69ef9 100644 --- a/.github/workflows/cuda_ext_check_before_merge.yml +++ b/.github/workflows/cuda_ext_check_before_merge.yml @@ -51,4 +51,4 @@ jobs: - name: Build run: | - CUDA_EXT=1 pip install -v . + BUILD_EXT=1 pip install -v . diff --git a/.github/workflows/doc_test_on_pr.yml b/.github/workflows/doc_test_on_pr.yml index 51238905e115..8afc46b87aa2 100644 --- a/.github/workflows/doc_test_on_pr.yml +++ b/.github/workflows/doc_test_on_pr.yml @@ -89,7 +89,7 @@ jobs: - name: Install ColossalAI run: | source activate pytorch - CUDA_EXT=1 pip install -v . + BUILD_EXT=1 pip install -v . - name: Test the Doc run: | diff --git a/.github/workflows/doc_test_on_schedule.yml b/.github/workflows/doc_test_on_schedule.yml index b3536184d78a..e2491e4607f5 100644 --- a/.github/workflows/doc_test_on_schedule.yml +++ b/.github/workflows/doc_test_on_schedule.yml @@ -32,7 +32,7 @@ jobs: - name: Install ColossalAI run: | - CUDA_EXT=1 pip install -v . + BUILD_EXT=1 pip install -v . - name: Install Doc Test Requirements run: | diff --git a/.github/workflows/example_check_on_dispatch.yml b/.github/workflows/example_check_on_dispatch.yml index bba321fd2d59..24e726b4f16d 100644 --- a/.github/workflows/example_check_on_dispatch.yml +++ b/.github/workflows/example_check_on_dispatch.yml @@ -53,7 +53,7 @@ jobs: uses: actions/checkout@v3 - name: Install Colossal-AI run: | - CUDA_EXT=1 pip install -v . + BUILD_EXT=1 pip install -v . - name: Test the example run: | dir=${{ matrix.directory }} diff --git a/.github/workflows/example_check_on_pr.yml b/.github/workflows/example_check_on_pr.yml index fcff8e569ff7..728f059c1bb3 100644 --- a/.github/workflows/example_check_on_pr.yml +++ b/.github/workflows/example_check_on_pr.yml @@ -88,7 +88,7 @@ jobs: - name: Install Colossal-AI run: | - CUDA_EXT=1 pip install -v . + BUILD_EXT=1 pip install -v . - name: Test the example run: | diff --git a/.github/workflows/example_check_on_schedule.yml b/.github/workflows/example_check_on_schedule.yml index abb9479492e7..efb131a864cb 100644 --- a/.github/workflows/example_check_on_schedule.yml +++ b/.github/workflows/example_check_on_schedule.yml @@ -42,7 +42,7 @@ jobs: - name: Install Colossal-AI run: | - CUDA_EXT=1 pip install -v . + BUILD_EXT=1 pip install -v . - name: Traverse all files run: | diff --git a/colossalai/cli/check/check_installation.py b/colossalai/cli/check/check_installation.py index 772c513ffa06..f5602bbe6155 100644 --- a/colossalai/cli/check/check_installation.py +++ b/colossalai/cli/check/check_installation.py @@ -76,7 +76,7 @@ def check_installation(): click.echo("") click.echo(f"Note:") click.echo( - f"1. AOT (ahead-of-time) compilation of the CUDA kernels occurs during installation when the environment variable CUDA_EXT=1 is set" + f"1. AOT (ahead-of-time) compilation of the CUDA kernels occurs during installation when the environment variable BUILD_EXT=1 is set" ) click.echo(f"2. If AOT compilation is not enabled, stay calm as the CUDA kernels can still be built during runtime") diff --git a/colossalai/legacy/inference/serving/ray_serve/README.md b/colossalai/legacy/inference/serving/ray_serve/README.md index 1d408238760b..888f04bb50f9 100644 --- a/colossalai/legacy/inference/serving/ray_serve/README.md +++ b/colossalai/legacy/inference/serving/ray_serve/README.md @@ -25,7 +25,7 @@ conda install -c conda-forge cupy cudnn cutensor nccl cuda-version=11.6 # install colossalai with PyTorch extensions cd -CUDA_EXT=1 pip install -e . +BUILD_EXT=1 pip install -e . # install other dependencies pip install triton==2.0.0.dev20221202 diff --git a/colossalai/legacy/inference/serving/torch_serve/README.md b/colossalai/legacy/inference/serving/torch_serve/README.md index 6bd145bc30ae..fcf2e36d23c5 100644 --- a/colossalai/legacy/inference/serving/torch_serve/README.md +++ b/colossalai/legacy/inference/serving/torch_serve/README.md @@ -25,7 +25,7 @@ conda install -c "nvidia/label/cuda-11.6.2" cuda-toolkit cd pip install -r requirements/requirements.txt pip install -r requirements/requirements-test.txt -CUDA_EXT=1 pip install -e . +BUILD_EXT=1 pip install -e . # install torchserve cd diff --git a/colossalai/legacy/inference/serving/torch_serve/docker/Dockerfile b/colossalai/legacy/inference/serving/torch_serve/docker/Dockerfile index 6d780a84747f..755812397932 100644 --- a/colossalai/legacy/inference/serving/torch_serve/docker/Dockerfile +++ b/colossalai/legacy/inference/serving/torch_serve/docker/Dockerfile @@ -38,7 +38,7 @@ ARG VERSION=main RUN git clone -b ${VERSION} https://github.com/hpcaitech/ColossalAI.git && \ cd ./ColossalAI && \ git checkout 3e05c07bb8921f2a8f9736b6f6673d4e9f1697d0 && \ - CUDA_EXT=1 pip install -v --no-cache-dir . + BUILD_EXT=1 pip install -v --no-cache-dir . # install titans RUN pip install --no-cache-dir titans diff --git a/colossalai/nn/optimizer/cpu_adam.py b/colossalai/nn/optimizer/cpu_adam.py index 5be629fb2045..68fb582e5d1f 100644 --- a/colossalai/nn/optimizer/cpu_adam.py +++ b/colossalai/nn/optimizer/cpu_adam.py @@ -78,7 +78,7 @@ def __init__( super(CPUAdam, self).__init__(model_params, default_args, nvme_offload_fraction, nvme_offload_dir) self.adamw_mode = adamw_mode cpu_adam = CPUAdamLoader().load() - # if you find yourself stuck here, make sure that you install colossalai with CUDA_EXT=1 specification + # if you find yourself stuck here, make sure that you install colossalai with BUILD_EXT=1 specification self.cpu_adam_op = cpu_adam.CPUAdamOptimizer(lr, betas[0], betas[1], eps, weight_decay, adamw_mode) def torch_adam_update( diff --git a/docker/Dockerfile b/docker/Dockerfile index 26d3fab1b6d7..0e796a9d4a95 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -37,7 +37,7 @@ RUN git clone https://github.com/NVIDIA/apex && \ ARG VERSION=main RUN git clone -b ${VERSION} https://github.com/hpcaitech/ColossalAI.git \ && cd ./ColossalAI \ - && CUDA_EXT=1 pip install -v --no-cache-dir . + && BUILD_EXT=1 pip install -v --no-cache-dir . # install titans RUN pip install --no-cache-dir titans diff --git a/docs/README-zh-Hans.md b/docs/README-zh-Hans.md index 90ad5540ae83..bc4106d12642 100644 --- a/docs/README-zh-Hans.md +++ b/docs/README-zh-Hans.md @@ -146,25 +146,25 @@ Colossal-AI 为您提供了一系列并行组件。我们的目标是让您的 [[HuggingFace model weights]](https://huggingface.co/hpcai-tech/Colossal-LLaMA-2-13b-base) [[Modelscope model weights]](https://www.modelscope.cn/models/colossalai/Colossal-LLaMA-2-13b-base/summary) -| Model | Backbone | Tokens Consumed | MMLU (5-shot) | CMMLU (5-shot)| AGIEval (5-shot) | GAOKAO (0-shot) | CEval (5-shot) | -| :----------------------------: | :--------: | :-------------: | :------------------: | :-----------: | :--------------: | :-------------: | :-------------: | -| Baichuan-7B | - | 1.2T | 42.32 (42.30) | 44.53 (44.02) | 38.72 | 36.74 | 42.80 | -| Baichuan-13B-Base | - | 1.4T | 50.51 (51.60) | 55.73 (55.30) | 47.20 | 51.41 | 53.60 | -| Baichuan2-7B-Base | - | 2.6T | 46.97 (54.16) | 57.67 (57.07) | 45.76 | 52.60 | 54.00 | -| Baichuan2-13B-Base | - | 2.6T | 54.84 (59.17) | 62.62 (61.97) | 52.08 | 58.25 | 58.10 | -| ChatGLM-6B | - | 1.0T | 39.67 (40.63) | 41.17 (-) | 40.10 | 36.53 | 38.90 | -| ChatGLM2-6B | - | 1.4T | 44.74 (45.46) | 49.40 (-) | 46.36 | 45.49 | 51.70 | -| InternLM-7B | - | 1.6T | 46.70 (51.00) | 52.00 (-) | 44.77 | 61.64 | 52.80 | -| Qwen-7B | - | 2.2T | 54.29 (56.70) | 56.03 (58.80) | 52.47 | 56.42 | 59.60 | -| Llama-2-7B | - | 2.0T | 44.47 (45.30) | 32.97 (-) | 32.60 | 25.46 | - | -| Linly-AI/Chinese-LLaMA-2-7B-hf | Llama-2-7B | 1.0T | 37.43 | 29.92 | 32.00 | 27.57 | - | -| wenge-research/yayi-7b-llama2 | Llama-2-7B | - | 38.56 | 31.52 | 30.99 | 25.95 | - | -| ziqingyang/chinese-llama-2-7b | Llama-2-7B | - | 33.86 | 34.69 | 34.52 | 25.18 | 34.2 | -| TigerResearch/tigerbot-7b-base | Llama-2-7B | 0.3T | 43.73 | 42.04 | 37.64 | 30.61 | - | -| LinkSoul/Chinese-Llama-2-7b | Llama-2-7B | - | 48.41 | 38.31 | 38.45 | 27.72 | - | -| FlagAlpha/Atom-7B | Llama-2-7B | 0.1T | 49.96 | 41.10 | 39.83 | 33.00 | - | -| IDEA-CCNL/Ziya-LLaMA-13B-v1.1 | Llama-13B | 0.11T | 50.25 | 40.99 | 40.04 | 30.54 | - | -| **Colossal-LLaMA-2-7b-base** | Llama-2-7B | **0.0085T** | 53.06 | 49.89 | 51.48 | 58.82 | 50.2 | +| Model | Backbone | Tokens Consumed | MMLU (5-shot) | CMMLU (5-shot) | AGIEval (5-shot) | GAOKAO (0-shot) | CEval (5-shot) | +|:------------------------------:|:----------:|:---------------:|:-------------:|:--------------:|:----------------:|:---------------:|:--------------:| +| Baichuan-7B | - | 1.2T | 42.32 (42.30) | 44.53 (44.02) | 38.72 | 36.74 | 42.80 | +| Baichuan-13B-Base | - | 1.4T | 50.51 (51.60) | 55.73 (55.30) | 47.20 | 51.41 | 53.60 | +| Baichuan2-7B-Base | - | 2.6T | 46.97 (54.16) | 57.67 (57.07) | 45.76 | 52.60 | 54.00 | +| Baichuan2-13B-Base | - | 2.6T | 54.84 (59.17) | 62.62 (61.97) | 52.08 | 58.25 | 58.10 | +| ChatGLM-6B | - | 1.0T | 39.67 (40.63) | 41.17 (-) | 40.10 | 36.53 | 38.90 | +| ChatGLM2-6B | - | 1.4T | 44.74 (45.46) | 49.40 (-) | 46.36 | 45.49 | 51.70 | +| InternLM-7B | - | 1.6T | 46.70 (51.00) | 52.00 (-) | 44.77 | 61.64 | 52.80 | +| Qwen-7B | - | 2.2T | 54.29 (56.70) | 56.03 (58.80) | 52.47 | 56.42 | 59.60 | +| Llama-2-7B | - | 2.0T | 44.47 (45.30) | 32.97 (-) | 32.60 | 25.46 | - | +| Linly-AI/Chinese-LLaMA-2-7B-hf | Llama-2-7B | 1.0T | 37.43 | 29.92 | 32.00 | 27.57 | - | +| wenge-research/yayi-7b-llama2 | Llama-2-7B | - | 38.56 | 31.52 | 30.99 | 25.95 | - | +| ziqingyang/chinese-llama-2-7b | Llama-2-7B | - | 33.86 | 34.69 | 34.52 | 25.18 | 34.2 | +| TigerResearch/tigerbot-7b-base | Llama-2-7B | 0.3T | 43.73 | 42.04 | 37.64 | 30.61 | - | +| LinkSoul/Chinese-Llama-2-7b | Llama-2-7B | - | 48.41 | 38.31 | 38.45 | 27.72 | - | +| FlagAlpha/Atom-7B | Llama-2-7B | 0.1T | 49.96 | 41.10 | 39.83 | 33.00 | - | +| IDEA-CCNL/Ziya-LLaMA-13B-v1.1 | Llama-13B | 0.11T | 50.25 | 40.99 | 40.04 | 30.54 | - | +| **Colossal-LLaMA-2-7b-base** | Llama-2-7B | **0.0085T** | 53.06 | 49.89 | 51.48 | 58.82 | 50.2 | ### ColossalChat @@ -406,10 +406,10 @@ pip install colossalai **注:目前只支持Linux。** -但是,如果你想在安装时就直接构建PyTorch扩展,您可以设置环境变量`CUDA_EXT=1`. +但是,如果你想在安装时就直接构建PyTorch扩展,您可以设置环境变量`BUILD_EXT=1`. ```bash -CUDA_EXT=1 pip install colossalai +BUILD_EXT=1 pip install colossalai ``` **否则,PyTorch扩展只会在你实际需要使用他们时在运行时里被构建。** @@ -438,7 +438,7 @@ pip install . 我们默认在`pip install`时不安装PyTorch扩展,而是在运行时临时编译,如果你想要提前安装这些扩展的话(在使用融合优化器时会用到),可以使用一下命令。 ```shell -CUDA_EXT=1 pip install . +BUILD_EXT=1 pip install . ```

(返回顶端)

diff --git a/docs/source/en/get_started/installation.md b/docs/source/en/get_started/installation.md index f9c8fe4758c8..50325462d522 100644 --- a/docs/source/en/get_started/installation.md +++ b/docs/source/en/get_started/installation.md @@ -42,7 +42,7 @@ pip install -r requirements/requirements.txt BUILD_EXT=1 pip install . ``` -If you don't want to install and enable CUDA kernel fusion (compulsory installation when using fused optimizer), just don't specify the `CUDA_EXT`: +If you don't want to install and enable CUDA kernel fusion (compulsory installation when using fused optimizer), just don't specify the `BUILD_EXT`: ```shell pip install . diff --git a/examples/images/diffusion/README.md b/examples/images/diffusion/README.md index d6a1c47d6b87..5434551f4fb4 100644 --- a/examples/images/diffusion/README.md +++ b/examples/images/diffusion/README.md @@ -77,7 +77,7 @@ git clone https://github.com/hpcaitech/ColossalAI.git cd ColossalAI # install colossalai -CUDA_EXT=1 pip install . +BUILD_EXT=1 pip install . ``` #### Step 3: Accelerate with flash attention by xformers (Optional) diff --git a/examples/images/diffusion/test_ci.sh b/examples/images/diffusion/test_ci.sh index 44cf47046684..652db5d3918a 100755 --- a/examples/images/diffusion/test_ci.sh +++ b/examples/images/diffusion/test_ci.sh @@ -8,7 +8,7 @@ conda activate ldm conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch pip install transformers diffusers invisible-watermark -CUDA_EXT=1 pip install colossalai +BUILD_EXT=1 pip install colossalai wget https://huggingface.co/stabilityai/stable-diffusion-2-base/resolve/main/512-base-ema.ckpt diff --git a/examples/language/llama2/README.md b/examples/language/llama2/README.md index 752453b5a7e3..068f15cbb041 100644 --- a/examples/language/llama2/README.md +++ b/examples/language/llama2/README.md @@ -53,7 +53,7 @@ We follow the hyperparameter settings from the original LLaMA paper. We use Adam Please install the latest ColossalAI from source. ```bash -CUDA_EXT=1 pip install -U git+https://github.com/hpcaitech/ColossalAI +BUILD_EXT=1 pip install -U git+https://github.com/hpcaitech/ColossalAI ``` Then install other dependencies. diff --git a/examples/language/openmoe/README.md b/examples/language/openmoe/README.md index 45657f192024..f62223c9319d 100644 --- a/examples/language/openmoe/README.md +++ b/examples/language/openmoe/README.md @@ -17,7 +17,7 @@ Please install the latest ColossalAI from source. ```bash -CUDA_EXT=1 pip install -U git+https://github.com/hpcaitech/ColossalAI +BUILD_EXT=1 pip install -U git+https://github.com/hpcaitech/ColossalAI ``` Then install dependencies. diff --git a/extensions/utils.py b/extensions/utils.py index 3f75f952d57b..d5d87a77a9c0 100644 --- a/extensions/utils.py +++ b/extensions/utils.py @@ -154,7 +154,7 @@ def check_cuda_availability(): def set_cuda_arch_list(cuda_dir): """ This function sets the PyTorch TORCH_CUDA_ARCH_LIST variable for ahead-of-time extension compilation. - Ahead-of-time compilation occurs when CUDA_EXT=1 is set when running 'pip install'. + Ahead-of-time compilation occurs when BUILD_EXT=1 is set when running 'pip install'. """ cuda_available = check_cuda_availability() diff --git a/setup.py b/setup.py index e54ec41ea9f8..ef89481e6b1e 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ def get_version() -> str: if BUILD_EXT: if not TORCH_AVAILABLE: raise ModuleNotFoundError( - "[extension] PyTorch is not found while CUDA_EXT=1. You need to install PyTorch first in order to build CUDA extensions" + "[extension] PyTorch is not found while BUILD_EXT=1. You need to install PyTorch first in order to build CUDA extensions" ) from extensions import ALL_EXTENSIONS