Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update ruy & simd forks to internal #1021

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/ios.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: iOS

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build-macos:
name: iOS CPU-only
runs-on: macos-12

steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive

- name: Install dependencies
run: brew install boost openblas openssl protobuf

- name: Configure CMake
run: |
export LDFLAGS="-L/usr/local/opt/openblas/lib"
export CPPFLAGS="-I/usr/local/opt/openblas/include"
mkdir -p build
cd build
cmake .. \
-DCOMPILE_CPU=on \
-DCOMPILE_CUDA=off \
-DCOMPILE_EXAMPLES=on \
-DCOMPILE_SERVER=off \
-DCOMPILE_TESTS=on \
-DUSE_SENTENCEPIECE=on \
-DCMAKE_TOOLCHAIN_FILE=../cmake/ios.toolchain.cmake \
-DUSE_SENTENCEPIECE=on \
-DPLATFORM=OS64 \
-DDEPLOYMENT_TARGET=13.0

- name: Compile
working-directory: build
run: cmake --build . --config Release
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
# https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
- name: Install MKL
run: |
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB" | sudo apt-key add -
sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ jobs:
# https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
- name: Install MKL
run: |
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB" | sudo apt-key add -
sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
Expand Down
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,9 @@
[submodule "src/3rd_party/simple-websocket-server"]
path = src/3rd_party/simple-websocket-server
url = https://github.com/marian-nmt/Simple-WebSocket-Server
[submodule "src/3rd_party/ruy"]
path = src/3rd_party/ruy
url = https://github.com/marian-nmt/ruy.git
[submodule "src/3rd_party/simd_utils"]
path = src/3rd_party/simd_utils
url = https://github.com/marian-nmt/simd_utils.git
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]
- Fixed compilation with clang 16.0.6
- Added Threads::Threads to EXT_LIBS


### Added
- Added `--no-spm-encode` option, allowing the model to use vocabulary IDs directly to train/decode.
Expand Down
82 changes: 67 additions & 15 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
endif ()

project(marian CXX C)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.")
Expand Down Expand Up @@ -80,6 +81,48 @@
set(CMAKE_BUILD_TYPE "Release")
endif()

# iOS support
if(CMAKE_SYSTEM_NAME STREQUAL "iOS" )
set(ARM ON)
# need to ignore this warning for Xcode to be happy
list(APPEND ALL_WARNINGS -Wno-shorten-64-to-32;)
endif()

# ARM support: currently ONLY armv8. armv8 includes NEON by default
# we do not currently have good support for automatic architecture detection, including for cross-compilation
# this is planned for future PRs
if(ARM)

# Apple by default has Apple Accelerate. Otherwise fallback to RUY for GEMM
if(APPLE)
message(STATUS "Using Apple Accelerate SGEMM")
option(USE_RUY_SGEMM "Compile with Ruy SGEMM" OFF)
else(APPLE)
message(STATUS "Using Ruy SGEMM")
set(EXT_LIBS ${EXT_LIBS} ruy)
option(USE_RUY_SGEMM "Compile with Ruy SGEMM" ON)
endif(APPLE)

# Define that we are using ARM as required by simd_utils. See their README for info
add_compile_definitions(ARM FMA SSE)
# Some warnings as errors. I don't feel comfortable about the strict aliasing.
set(ARM_WARNINGS "-fno-strict-aliasing -Wno-comment")

if(MSVC)
add_compile_options(/flax-vector-conversions)
else(MSVC)
add_compile_options(-flax-vector-conversions)
endif(MSVC)
endif(ARM)

########
# pThreads: consider it as EXT_LIBS for a more portable binary
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
find_package(Threads REQUIRED)
set(EXT_LIBS ${EXT_LIBS} Threads::Threads)
########

###############################################################################
# Set compilation flags
if(MSVC)
Expand Down Expand Up @@ -139,13 +182,16 @@
set(INTRINSICS "")
list(APPEND INTRINSICS_NVCC)

option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON)
option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON)
option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON)
option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON)
option(COMPILE_AVX "Compile CPU code with AVX support" ON)
option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON)
option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON)
if(NOT ARM)
# none of these options are available on ARM
option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON)
option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON)
option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON)
option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON)
option(COMPILE_AVX "Compile CPU code with AVX support" ON)
option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON)
option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON)
endif(NOT ARM)

if(BUILD_ARCH STREQUAL "native")
message(STATUS "Building with -march=native and intrinsics will be chosen automatically by the compiler to match the current machine.")
Expand Down Expand Up @@ -221,7 +267,7 @@
# Clang-10.0.0 complains when CUDA is newer than 10.1
set(CLANG_IGNORE_UNKNOWN_CUDA "-Wno-unknown-warning-option -Wno-unknown-cuda-version")
endif()
set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA}")
set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA} ${ARM_WARNINGS}")

# These are used in src/CMakeLists.txt on a per-target basis
list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated;
Expand All @@ -240,24 +286,30 @@
set(CMAKE_RDYNAMIC_FLAG "-rdynamic")
endif(CMAKE_COMPILER_IS_GNUCC)

set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_CXX_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG")
set(CMAKE_CXX_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg")
set(CMAKE_CXX_FLAGS_PROFGEN "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction")
set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction")

# these need to be set separately
set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}")
set(CMAKE_C_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG")
set(CMAKE_C_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}")
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
set(CMAKE_C_FLAGS_PROFGEN "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate -fprofile-correction")
set(CMAKE_C_FLAGS_PROFUSE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction")

# set -march for all builds except iOS cross compilation
if(NOT CMAKE_SYSTEM_NAME STREQUAL "iOS" )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${BUILD_ARCH}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${BUILD_ARCH}")
endif()
endif(MSVC)

# with gcc 7.0 and above we need to mark fallthrough in switch case statements
Expand Down Expand Up @@ -461,7 +513,7 @@
endif(CUDA_FOUND)

else(COMPILE_CUDA)
message(WARNING "COMPILE_CUDA=off : Building only CPU version")

Check warning on line 516 in CMakeLists.txt

View workflow job for this annotation

GitHub Actions / Windows CPU-only

COMPILE_CUDA=off : Building only CPU version
endif(COMPILE_CUDA)

# TODO: make compatible with older CUDA versions
Expand Down Expand Up @@ -511,7 +563,7 @@
###############################################################################
# Find BLAS library
if(COMPILE_CPU)
if(NOT GENERATE_MARIAN_INSTALL_TARGETS)
if(NOT GENERATE_MARIAN_INSTALL_TARGETS AND NOT ARM)
set(EXT_LIBS ${EXT_LIBS} intgemm) # Enable intgemm when compiling CPU
add_definitions(-DCOMPILE_CPU=1)
endif()
Expand Down Expand Up @@ -580,7 +632,7 @@
endif()

if(DETERMINISTIC)
message(WARNING "Option DETERMINISTIC=ON: Trying to make training as deterministic as possible, may result in slow-down")

Check warning on line 635 in CMakeLists.txt

View workflow job for this annotation

GitHub Actions / Windows CPU-only

Option DETERMINISTIC=ON: Trying to make training as deterministic as

Check warning on line 635 in CMakeLists.txt

View workflow job for this annotation

GitHub Actions / Windows CPU+CUDA

Option DETERMINISTIC=ON: Trying to make training as deterministic as

Check warning on line 635 in CMakeLists.txt

View workflow job for this annotation

GitHub Actions / Windows CPU+CUDA

Option DETERMINISTIC=ON: Trying to make training as deterministic as
add_definitions(-DDETERMINISTIC=1)
list(APPEND CUDA_NVCC_FLAGS -DDETERMINISTIC=1; )
else()
Expand Down
2 changes: 1 addition & 1 deletion azure-regression-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ stages:

# https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
- bash: |
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB" | sudo apt-key add -
sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
Expand Down
Loading
Loading