Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scripts for producing PyPI-compatible manylinux wheel files #1028

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ build
build-*
# pymarian wheels
dist/
tmp
tmp-*
tmp.*

# Examples
examples/*/*.gz
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Fixed compilation with clang 16.0.6
- Added Threads::Threads to `EXT_LIBS`
- Updates to pymarian: building for multiple python versions; disabling tcmalloc; hosting gated COMETs on HuggingFace
- Scripts for building _manylinux_ compatible wheel files (a requirement for publishing wheels on PyPI)

### Added
- Added `--normalize-gradient-by-ratio` to mildly adapt gradient magnitude if effective batch size diverges from running average effective batch size.
Expand Down
6 changes: 2 additions & 4 deletions cmake/PythonModules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,8 @@ macro(py_exec)
endif()
endmacro()

set(PYBIND11_NOPYTHON On)
# this wont work if pybind11 is git submodule
#find_package(pybind11 REQUIRED)

# NOTE: this property must be set before including pybind11
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment seems confusing given the commented line below. Please check if it's okay and explain in the comment or fix.

# set(PYBIND11_NOPYTHON On)
## =====================
set(PYTHON_SEARCH_VERSIONS 3.7 3.8 3.9 3.10 3.11 3.12 3.13)
set(PYTHON_DISABLE_VERSIONS "" CACHE STRING "")
Expand Down
4 changes: 3 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,9 @@ endif(GENERATE_MARIAN_INSTALL_TARGETS)


if(PYMARIAN)
# python libs which use different version of tcmalloc (e.g. pandas) can cause segfaults, so we disable it
# this property must be set **before** including pybind11
# otherwise pybind will intervene with our own python version detection
set(PYBIND11_NOPYTHON On)
include_directories(3rd_party/pybind11/include)
add_subdirectory(3rd_party/pybind11)
include(PythonModules)
Expand Down
102 changes: 102 additions & 0 deletions src/python/build-manylinux.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env bash

# DO NOT call this script directly (unless you know what you are doing).
# Use the build.sh script instead.
# this script builds pymarian wheels for multiple python versions
# it uses mamba to create python environments and builds the wheels
# it also creates manylinux wheels using auditwheel

set -eu
MYDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
MARIAN_ROOT="$( cd "$MYDIR/../.." && pwd )"
# assume this directory is mounted in the docker container
cd $MARIAN_ROOT

#MKL is not in docker image
# yum-config-manager --add-repo https://yum.repos.intel.com/setup/intelproducts.repo
yum-config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo
rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
yum install -y intel-mkl-64bit-2020.4-912

# TODO: build a docker image with MKL and mamba installed

COMPILE_CUDA=1
PY_VERSIONS="$(echo 3.{12,11,10,9,8})"

# quick testing: compile for only one version and for CPU only
#COMPILE_CUDA=0
#PY_VERSIONS="3.10"

# GLIBC we use for compiling marian should be compatible for newer platforms
# So we use an old GLIBC that works (e.g. 2.17), thus ensuring maximum compatibility
PY_PLATFORM="manylinux_2_17_x86_64" # GLIBC must be 2.17 (or older) for this platform
echo "$(ldd --version | head -1); platform=$PY_PLATFORM"
which mamba >& /dev/null || {
name=Miniforge3-$(uname)-$(uname -m).sh
mambadir=tmp/mamba-$(uname)-$(uname -m)
mkdir -p tmp/
[[ -s $mambadir/bin/activate ]] || {
[[ -s $name ]] || {
rm -f $name.tmp
wget -q "https://github.com/conda-forge/miniforge/releases/latest/download/$name" -O tmp/$name.tmp \
&& mv tmp/$name{.tmp,}
}
bash tmp/$name -b -u -p $mambadir/
$mambadir/bin/mamba init bash
}
source $mambadir/etc/profile.d/mamba.sh
source $mambadir/bin/activate
}

# check if mamba is available
which mamba || {
echo "mamba not found. Exiting."
exit 1
}

# create environment for each version

for v in $PY_VERSIONS; do
mamba env list | grep -q "^py${v}" || {
echo "Creating python $v environment"
mamba create -q -y -n py${v} python=${v}
}
done

# stack all environments
for v in $PY_VERSIONS; do mamba activate py${v} --stack; done
# check if all python versions are available
for v in $PY_VERSIONS; do which python$v; done


# Build as usual
build_dir=$MARIAN_ROOT/build-pymarian
fresh_build=1
if [[ $fresh_build -eq 1 && -d $build_dir ]]; then
backup_dir=$build_dir.$(date +%y%m%d%H%M%S)
echo "Moving existing build directory to $backup_dir"
mv $build_dir $backup_dir
fi

mkdir -p $build_dir
cd $build_dir

#CMAKE_FLAGS="-DPYMARIAN=on -DCMAKE_BUILD_TYPE=Release -DUSE_STATIC_LIBS=on -DUSE_FBGEMM=on"
CMAKE_FLAGS="-DPYMARIAN=on -DCMAKE_BUILD_TYPE=Slim -DUSE_STATIC_LIBS=on -DUSE_FBGEMM=on"
# for cuda support
if [[ $COMPILE_CUDA -eq 1 ]]; then
CMAKE_FLAGS+=" -DCOMPILE_CUDA=on -DCOMPILE_PASCAL=ON -DCOMPILE_VOLTA=ON -DCOMPILE_TURING=ON -DCOMPILE_AMPERE=ON -DCOMPILE_AMPERE_RTX=ON"
else
CMAKE_FLAGS+=" -DCOMPILE_CUDA=off -DCOMPILE_CPU=on"
fi

cmake .. $CMAKE_FLAGS
make -j
ls -lh pymarian*.whl

echo "=== Generating manylinux wheels ==="
# make the wheels manylinux compatible
auditwheel repair --plat $PY_PLATFORM *.whl -w manylinux/
ls -lh manylinux/

echo "=== Done ==="
20 changes: 20 additions & 0 deletions src/python/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

#!/usr/bin/env bash

# This script is used to build the Python wheels.
# A requirement is that we have to use older GLIBC versions to ensure maximum compatibility.
# Python folks call it "manylinux" wheels and recommed using docker images to build them.
# official manylinux docs: https://github.com/pypa/manylinux
# But the official manylinux images doesnt have CUDA support.
# So we use the "pytorch/manylinux-builder" image which has CUDA support.
# Available tags: https://hub.docker.com/r/pytorch/manylinux-builder/tags


LINUX_IMAGE="pytorch/manylinux-builder:cuda12.1"
MYDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
MARIAN_ROOT="$( cd "$MYDIR/../.." && pwd )"

set -x
LINUX_BUILDER="src/python/build-manylinux.sh"
MOUNT="/work"
docker run --rm -it -v $MARIAN_ROOT:$MOUNT $LINUX_IMAGE $MOUNT/$LINUX_BUILDER
4 changes: 2 additions & 2 deletions src/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ dependencies = [
"pyyaml",
"tqdm",
"requests",
"huggingface-hub==0.23.1",
"huggingface-hub",
]

[project.scripts]
Expand All @@ -47,7 +47,7 @@ demos = [
"flask",
"sacremoses",
"pyqt5",
"sentence-splitter@git+https://github.com/mediacloud/sentence-splitter",
# "sentence-splitter@git+https://github.com/mediacloud/sentence-splitter",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please explain in the comment why it's commented (because not used yet but will be in pymarian-webapp?) or remove.

]


Expand Down
Loading