Skip to content

Commit

Permalink
Merge branch 'ggerganov:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
mmnga authored Sep 3, 2023
2 parents 51519b8 + 8f429fa commit 49f6f80
Show file tree
Hide file tree
Showing 17 changed files with 415 additions and 196 deletions.
1 change: 0 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ on:
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
GGML_NLOOP: 3
GGML_NITER: 1
GGML_N_THREADS: 1

jobs:
Expand Down
36 changes: 36 additions & 0 deletions .github/workflows/code-coverage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Code Coverage
on: [push, pull_request]

env:
GGML_NLOOP: 3
GGML_N_THREADS: 1

jobs:
run:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential gcc-8 lcov
- name: Build
run: CC=gcc-8 make -j LLAMA_CODE_COVERAGE=1 tests

- name: Run tests
run: CC=gcc-8 make test

- name: Generate coverage report
run: |
make coverage
make lcov-report
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
files: lcov-report/coverage.info
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
*.exe
*.dll
*.log
*.gcov
*.gcno
*.gcda
*.dot
.DS_Store
.build/
.cache/
Expand All @@ -17,6 +21,9 @@
.vs/
.vscode/

lcov-report/
gcovr-report/

build*/
out/
tmp/
Expand Down
172 changes: 87 additions & 85 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-tex
# Binaries only useful for tests
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1

# Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report

default: $(BUILD_TARGETS)

test:
Expand All @@ -23,6 +26,18 @@ test:

all: $(BUILD_TARGETS) $(TEST_TARGETS)

coverage: ## Run code coverage
gcov -pb tests/*.cpp

lcov-report: coverage ## Generate lcov report
mkdir -p lcov-report
lcov --capture --directory . --output-file lcov-report/coverage.info
genhtml lcov-report/coverage.info --output-directory lcov-report

gcovr-report: coverage ## Generate gcovr report
mkdir -p gcovr-report
gcovr --root . --html --html-details --output gcovr-report/coverage.html

ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
Expand Down Expand Up @@ -67,21 +82,26 @@ OPT = -Ofast
else
OPT = -O3
endif
CFLAGS = -I. $(OPT) -std=c11 -fPIC
CXXFLAGS = -I. -I./common $(OPT) -std=c++11 -fPIC
LDFLAGS =
MK_CPPFLAGS = -I. -Icommon
MK_CFLAGS = $(CPPFLAGS) $(OPT) -std=c11 -fPIC
MK_CXXFLAGS = $(CPPFLAGS) $(OPT) -std=c++11 -fPIC
MK_LDFLAGS =

ifdef LLAMA_DEBUG
CFLAGS += -O0 -g
CXXFLAGS += -O0 -g
LDFLAGS += -g
MK_CFLAGS += -O0 -g
MK_CXXFLAGS += -O0 -g
MK_LDFLAGS += -g
else
CFLAGS += -DNDEBUG
CXXFLAGS += -DNDEBUG
MK_CPPFLAGS += -DNDEBUG
endif

ifdef LLAMA_SERVER_VERBOSE
CXXFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
endif


ifdef LLAMA_CODE_COVERAGE
CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
endif

ifdef LLAMA_DISABLE_LOGS
Expand All @@ -90,9 +110,9 @@ ifdef LLAMA_DISABLE_LOGS
endif # LLAMA_DISABLE_LOGS

# warnings
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar

ifeq '' '$(findstring clang++,$(CXX))'
# g++ only
Expand All @@ -101,29 +121,9 @@ endif

# OS specific
# TODO: support Windows
ifeq ($(UNAME_S),Linux)
CFLAGS += -pthread
CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),Darwin)
CFLAGS += -pthread
CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),FreeBSD)
CFLAGS += -pthread
CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),NetBSD)
CFLAGS += -pthread
CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),OpenBSD)
CFLAGS += -pthread
CXXFLAGS += -pthread
endif
ifeq ($(UNAME_S),Haiku)
CFLAGS += -pthread
CXXFLAGS += -pthread
ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
MK_CFLAGS += -pthread
MK_CXXFLAGS += -pthread
endif

# detect Windows
Expand All @@ -149,12 +149,11 @@ ifeq ($(_WIN32),1)
endif

ifdef LLAMA_GPROF
CFLAGS += -pg
CXXFLAGS += -pg
MK_CFLAGS += -pg
MK_CXXFLAGS += -pg
endif
ifdef LLAMA_PERF
CFLAGS += -DGGML_PERF
CXXFLAGS += -DGGML_PERF
MK_CPPFLAGS += -DGGML_PERF
endif

# Architecture specific
Expand All @@ -165,16 +164,16 @@ ifndef RISCV

ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
# Use all CPU extensions that are available:
CFLAGS += -march=native -mtune=native
CXXFLAGS += -march=native -mtune=native
MK_CFLAGS += -march=native -mtune=native
MK_CXXFLAGS += -march=native -mtune=native

# Usage AVX-only
#CFLAGS += -mfma -mf16c -mavx
#CXXFLAGS += -mfma -mf16c -mavx
#MK_CFLAGS += -mfma -mf16c -mavx
#MK_CXXFLAGS += -mfma -mf16c -mavx

# Usage SSSE3-only (Not is SSE3!)
#CFLAGS += -mssse3
#CXXFLAGS += -mssse3
#MK_CFLAGS += -mssse3
#MK_CXXFLAGS += -mssse3
endif

# The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
Expand All @@ -188,34 +187,33 @@ endif
ifneq ($(filter aarch64%,$(UNAME_M)),)
# Apple M1, M2, etc.
# Raspberry Pi 3, 4, Zero 2 (64-bit)
CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native
MK_CFLAGS += -mcpu=native
MK_CXXFLAGS += -mcpu=native
endif

ifneq ($(filter armv6%,$(UNAME_M)),)
# Raspberry Pi 1, Zero
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
endif

ifneq ($(filter armv7%,$(UNAME_M)),)
# Raspberry Pi 2
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
endif

ifneq ($(filter armv8%,$(UNAME_M)),)
# Raspberry Pi 3, 4, Zero 2 (32-bit)
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
endif

ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
ifneq (,$(findstring POWER9,$(POWER9_M)))
CFLAGS += -mcpu=power9
CXXFLAGS += -mcpu=power9
endif
# Require c++23's std::byteswap for big-endian support.
ifeq ($(UNAME_M),ppc64)
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
MK_CFLAGS += -mcpu=power9
MK_CXXFLAGS += -mcpu=power9
endif
endif

Expand All @@ -225,44 +223,43 @@ else
endif

ifndef LLAMA_NO_K_QUANTS
CFLAGS += -DGGML_USE_K_QUANTS
CXXFLAGS += -DGGML_USE_K_QUANTS
MK_CPPFLAGS += -DGGML_USE_K_QUANTS
OBJS += k_quants.o
ifdef LLAMA_QKK_64
CFLAGS += -DGGML_QKK_64
CXXFLAGS += -DGGML_QKK_64
MK_CPPFLAGS += -DGGML_QKK_64
endif
endif

ifndef LLAMA_NO_ACCELERATE
# Mac M1 - include Accelerate framework.
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
ifeq ($(UNAME_S),Darwin)
CFLAGS += -DGGML_USE_ACCELERATE
LDFLAGS += -framework Accelerate
MK_CPPFLAGS += -DGGML_USE_ACCELERATE
MK_LDFLAGS += -framework Accelerate
endif
endif # LLAMA_NO_ACCELERATE

ifdef LLAMA_MPI
CFLAGS += -DGGML_USE_MPI -Wno-cast-qual
CXXFLAGS += -DGGML_USE_MPI -Wno-cast-qual
MK_CPPFLAGS += -DGGML_USE_MPI
MK_CFLAGS += -Wno-cast-qual
MK_CXXFLAGS += -Wno-cast-qual
OBJS += ggml-mpi.o
endif # LLAMA_MPI

ifdef LLAMA_OPENBLAS
CFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags openblas)
LDFLAGS += $(shell pkg-config --libs openblas)
MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
MK_LDFLAGS += $(shell pkg-config --libs openblas)
endif # LLAMA_OPENBLAS

ifdef LLAMA_BLIS
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
LDFLAGS += -lblis -L/usr/local/lib
MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
MK_LDFLAGS += -lblis -L/usr/local/lib
endif # LLAMA_BLIS

ifdef LLAMA_CUBLAS
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
OBJS += ggml-cuda.o
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
ifdef LLAMA_CUDA_NVCC
Expand Down Expand Up @@ -313,14 +310,15 @@ endif # LLAMA_CUBLAS

ifdef LLAMA_CLBLAST

CFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags clblast OpenCL)
CXXFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags clblast OpenCL)
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)

# Mac provides OpenCL as a framework
ifeq ($(UNAME_S),Darwin)
LDFLAGS += -lclblast -framework OpenCL
MK_LDFLAGS += -lclblast -framework OpenCL
else
LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
endif
OBJS += ggml-opencl.o

Expand All @@ -335,10 +333,9 @@ ifdef LLAMA_HIPBLAS
LLAMA_CUDA_DMMV_X ?= 32
LLAMA_CUDA_MMV_Y ?= 1
LLAMA_CUDA_KQUANTS_ITER ?= 2
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
LDFLAGS += -lhipblas -lamdhip64 -lrocblas
MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS
MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
Expand All @@ -353,10 +350,9 @@ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
endif # LLAMA_HIPBLAS

ifdef LLAMA_METAL
CFLAGS += -DGGML_USE_METAL #-DGGML_METAL_NDEBUG
CXXFLAGS += -DGGML_USE_METAL
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
OBJS += ggml-metal.o
MK_CPPFLAGS += -DGGML_USE_METAL #-DGGML_METAL_NDEBUG
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
OBJS += ggml-metal.o
endif # LLAMA_METAL

ifdef LLAMA_METAL
Expand All @@ -369,11 +365,17 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
$(CC) $(CFLAGS) -c $< -o $@
endif # LLAMA_MPI

ifdef LLAMA_NO_K_QUANTS
ifndef LLAMA_NO_K_QUANTS
k_quants.o: k_quants.c k_quants.h
$(CC) $(CFLAGS) -c $< -o $@
endif # LLAMA_NO_K_QUANTS

# combine build flags with cmdline overrides
override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
override CFLAGS := $(MK_CFLAGS) $(CFLAGS)
override CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)

#
# Print build information
#
Expand Down Expand Up @@ -417,7 +419,7 @@ libllama.so: llama.o ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)

clean:
rm -vf *.o tests/*.o *.so *.dll benchmark-matmult build-info.h $(BUILD_TARGETS) $(TEST_TARGETS)
rm -vrf *.o tests/*.o *.so *.dll benchmark-matmult build-info.h *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)

#
# Examples
Expand Down
Loading

0 comments on commit 49f6f80

Please sign in to comment.