Skip to content

Build Wheels

Build Wheels #1

Workflow file for this run

name: Build Wheels
on:
workflow_dispatch:
inputs:
version:
description: 'Version tag of llama-cpp-python to build: v0.1.77'
default: 'v0.1.77'
required: false
type: string
workflow_call:
inputs:
version:
description: 'Version tag of llama-cpp-python to build: v0.1.77'
default: 'v0.1.77'
required: false
type: string
permissions:
contents: write
jobs:
build_wheels:
name: ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-20.04, windows-latest]
pyver: ["3.7", "3.8", "3.9", "3.10", "3.11"]
cuda: ["11.6.2", "11.7.1", "11.8.0", "12.0.1", "12.1.1", "12.2.0"]
releasetag: ["AVX","wheels","AVX512","basic"]
defaults:
run:
shell: pwsh
env:
CUDAVER: ${{ matrix.cuda }}
AVXVER: ${{ matrix.releasetag }}
PCKGVER: ${{ inputs.version }}
steps:
- uses: actions/checkout@v3
with:
repository: 'abetlen/llama-cpp-python'
ref: ${{ inputs.version }}
submodules: 'recursive'
- uses: actions/setup-python@v3
with:
python-version: ${{ matrix.pyver }}
- name: Setup Mamba
uses: conda-incubator/setup-miniconda@v2.2.0
with:
activate-environment: "build"
python-version: ${{ matrix.pyver }}
miniforge-variant: Mambaforge
miniforge-version: latest
use-mamba: true
add-pip-as-python-dependency: true
auto-activate-base: false
- name: Install Dependencies
run: |
$cudaVersion = $env:CUDAVER
$cudaChannels = ''
$cudaNum = [int]$cudaVersion.substring($cudaVersion.LastIndexOf('.')+1)
while ($cudaNum -ge 0) { $cudaChannels += '-c nvidia/label/cuda-' + $cudaVersion.Remove($cudaVersion.LastIndexOf('.')+1) + $cudaNum + ' '; $cudaNum-- }
mamba install -y 'cuda' $cudaChannels.TrimEnd().Split()
python -m pip install build wheel
- name: Build Wheel
run: |
$packageVersion = [version]$env:PCKGVER.TrimStart('v')
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH}
$env:VERBOSE = '1'
$env:FORCE_CMAKE = '1'
$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all'
if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.70') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=35-real;37-real;52;61-real;70-real;72-real;75-real;80-real;86-real;89-real;90'}
if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.70' -and [version]$env:CUDAVER -ge [version]'12.0') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=52;61-real;70-real;72-real;75-real;80-real;86-real;89-real;90'}
if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.70' -and [version]$env:CUDAVER -lt [version]'11.8') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=35-real;37-real;52;61-real;70-real;72-real;75-real;80-real;86'}
if ($packageVersion -lt [version]'0.1.66') {$env:CUDAFLAGS = '-arch=all'}
if ($env:AVXVER -eq 'AVX') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off'}
if ($env:AVXVER -eq 'AVX512') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on'}
if ($env:AVXVER -eq 'basic') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'}
python -m build --wheel -C--build-option=egg_info "-C--build-option=--tag-build=+cu$cudaVersion"
- name: Upload files to a GitHub release
id: upload-release
uses: svenstaro/upload-release-action@2.6.1
continue-on-error: true
with:
file: ./dist/*.whl
tag: ${{ matrix.releasetag }}
file_glob: true
make_latest: false
overwrite: true
- uses: actions/upload-artifact@v3
if: steps.upload-release.outcome == 'failure'
with:
name: ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
path: ./dist/*.whl
build_cpu:
name: Build CPU-only Wheels
needs: build_wheels
uses: ./.github/workflows/build-wheels-cpu.yml
with:
version: ${{ inputs.version }}