From 2c260b9e3a2cb50f358fd27b224147eb44f3e79d Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Wed, 25 Sep 2024 16:03:30 -0400 Subject: [PATCH] Only expose libggml --- llama_cpp/_ggml.py | 131 --------------------------------------------- 1 file changed, 131 deletions(-) diff --git a/llama_cpp/_ggml.py b/llama_cpp/_ggml.py index 2a287fd86..5bee8a93b 100644 --- a/llama_cpp/_ggml.py +++ b/llama_cpp/_ggml.py @@ -5,139 +5,8 @@ import os import pathlib -import ctypes - import llama_cpp._ctypes_extensions as ctypes_ext -import numpy as np - - libggml_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib" libggml = ctypes_ext.load_shared_library("ggml", libggml_base_path) -ggml_function = ctypes_ext.ctypes_function_for_shared_library(libggml) - - -# define GGML_MAX_DIMS 4 -GGML_MAX_DIMS = 4 - -# define GGML_MAX_OP_PARAMS 64 -GGML_MAX_OP_PARAMS = 64 - -# define GGML_MAX_SRC 10 -GGML_MAX_SRC = 10 - -# define GGML_MAX_NAME 64 -GGML_MAX_NAME = 64 - - -# // n-dimensional tensor -# struct ggml_tensor { -# enum ggml_type type; -# -# GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor"); -# -# struct ggml_backend_buffer * buffer; -# -# int64_t ne[GGML_MAX_DIMS]; // number of elements -# size_t nb[GGML_MAX_DIMS]; // stride in bytes: -# // nb[0] = ggml_type_size(type) -# // nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding -# // nb[i] = nb[i-1] * ne[i-1] -# -# // compute data -# enum ggml_op op; -# -# // op params - allocated as int32_t for alignment -# int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)]; -# -# int32_t flags; -# -# struct ggml_tensor * grad; -# struct ggml_tensor * src[GGML_MAX_SRC]; -# -# // source tensor and offset for views -# struct ggml_tensor * view_src; -# size_t view_offs; -# -# void * data; -# -# char name[GGML_MAX_NAME]; -# -# void * extra; // extra things e.g. for ggml-cuda.cu -# -# // char padding[4]; -# }; -class ggml_tensor(ctypes.Structure): - __fields__ = [ - ("type", ctypes.c_int), - ("buffer", ctypes.c_void_p), - ("ne", ctypes.c_int64 * 8), - ("nb", ctypes.c_size_t * 8), - ("op", ctypes.c_int), - ("op_params", ctypes.c_int32 * 8), - ("flags", ctypes.c_int32), - ("grad", ctypes.c_void_p), - ("src", ctypes.c_void_p * 8), - ("view_src", ctypes.c_void_p), - ("view_offs", ctypes.c_size_t), - ("data", ctypes.c_void_p), - ("name", ctypes.c_char * 64), - ("extra", ctypes.c_void_p), - ] - - -ggml_tensor_p = ctypes_ext.CtypesPointer[ggml_tensor] -ggml_tensor_p_ctypes = ctypes.POINTER(ggml_tensor) - - -# GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); -@ggml_function( - "ggml_backend_tensor_get", - [ggml_tensor_p_ctypes, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t], - ctypes.c_void_p, -) -def ggml_backend_tensor_get( - tensor: ggml_tensor_p, data: ctypes.c_void_p, offset: int, size: int -) -> None: - ... - - -# GGML_API GGML_CALL size_t ggml_nbytes (const struct ggml_tensor * tensor); -@ggml_function( - "ggml_nbytes", - [ggml_tensor_p_ctypes], - ctypes.c_size_t, -) -def ggml_nbytes(tensor: ggml_tensor_p) -> int: - ... - - -# GGML_API GGML_CALL int64_t ggml_nelements (const struct ggml_tensor * tensor); -@ggml_function( - "ggml_nelements", - [ggml_tensor_p_ctypes], - ctypes.c_int64, -) -def ggml_nelements(tensor: ggml_tensor_p) -> int: - ... - - -# GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars -@ggml_function( - "ggml_n_dims", - [ggml_tensor_p_ctypes], - ctypes.c_int, -) -def ggml_n_dims(tensor: ggml_tensor_p) -> int: - ... - - -def ggml_tensor_to_numpy(tensor: ggml_tensor_p): - nbytes = ggml_nbytes(tensor) - nelements = ggml_nelements(tensor) - data = np.empty(nelements, dtype=np.float32) - ggml_backend_tensor_get( - tensor, ctypes.cast(data.ctypes.data, ctypes.c_void_p), 0, nbytes - ) - return data.reshape(tensor.contents.ne[: ggml_n_dims(tensor)])