Skip to content

Commit

Permalink
Wrap as_built function (#1994)
Browse files Browse the repository at this point in the history
  • Loading branch information
kounelisagis committed Jul 5, 2024
1 parent 19bd09e commit f0942d9
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 10 deletions.
1 change: 1 addition & 0 deletions tiledb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
from .highlevel import (
array_exists,
array_fragments,
as_built,
empty_like,
from_numpy,
open,
Expand Down
27 changes: 18 additions & 9 deletions tiledb/core.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,12 @@ static std::unique_ptr<StatsInfo> g_stats;
py::dtype tiledb_dtype(tiledb_datatype_t type, uint32_t cell_val_num);

struct BufferInfo {

BufferInfo(std::string name, size_t data_nbytes, tiledb_datatype_t data_type,
uint32_t cell_val_num, size_t offsets_num, size_t validity_num,
bool isvar = false, bool isnullable = false)

: name(name), type(data_type), cell_val_num(cell_val_num), isvar(isvar),
isnullable(isnullable) {

try {
dtype = tiledb_dtype(data_type, cell_val_num);
elem_nbytes = tiledb_datatype_size(type);
Expand Down Expand Up @@ -282,7 +280,6 @@ uint64_t count_zeros(py::array_t<uint8_t> a) {
}

class PyAgg {

using ByteBuffer = py::array_t<uint8_t>;
using AggToBufferMap = std::map<std::string, ByteBuffer>;
using AttrToAggsMap = std::map<std::string, AggToBufferMap>;
Expand Down Expand Up @@ -524,7 +521,6 @@ class PyAgg {
};

class PyQuery {

private:
Context ctx_;
std::shared_ptr<tiledb::Domain> domain_;
Expand Down Expand Up @@ -762,7 +758,6 @@ class PyQuery {
bool is_sparse() { return array_->schema().array_type() == TILEDB_SPARSE; }

void import_buffer(std::string name, py::array data, py::array offsets) {

tiledb_datatype_t type;
uint32_t cell_val_num;
std::tie(type, cell_val_num) = buffer_type(name);
Expand Down Expand Up @@ -939,7 +934,6 @@ class PyQuery {
auto offset_ptr = buf.offsets.mutable_data();

if (buf.isvar) {

if (offset_elem_num > 0) {
// account for 'sm.var_offsets.extra_element'
offset_elem_num -= (use_arrow_) ? 1 : 0;
Expand Down Expand Up @@ -1120,7 +1114,6 @@ class PyQuery {
}

void allocate_buffers() {

// allocate buffers for dims
// - we want to return dims first, if any requested
for (size_t dim_idx = 0; dim_idx < domain_->ndim(); dim_idx++) {
Expand Down Expand Up @@ -1260,7 +1253,6 @@ class PyQuery {

py::array unpack_buffer(std::string name, py::array buf,
py::array_t<uint64_t> off) {

auto start = std::chrono::high_resolution_clock::now();

if (off.size() < 1)
Expand Down Expand Up @@ -1673,6 +1665,22 @@ py::object python_internal_stats(bool dict = false) {
}
}

py::str as_built_dump() {
tiledb_string_t *s;
int rc = tiledb_as_built_dump(&s);
if (rc != TILEDB_OK) {
TPY_ERROR_LOC("Could not dump as built.");
}
const char *data_ptr;
py::size_t length;

tiledb_string_view(s, &data_ptr, &length);
py::str res(data_ptr, length);
tiledb_string_free(&s);

return res;
}

void init_core(py::module &m) {
init_query_condition(m);

Expand Down Expand Up @@ -1724,12 +1732,13 @@ void init_core(py::module &m) {
m.def("array_to_buffer", &convert_np);

m.def("init_stats", &init_stats);
m.def("disable_stats", &init_stats);
m.def("disable_stats", &disable_stats);
m.def("python_internal_stats", &python_internal_stats,
py::arg("dict") = false);
m.def("increment_stat", &increment_stat);
m.def("get_stats", &get_stats);
m.def("use_stats", &use_stats);
m.def("as_built_dump", &as_built_dump);

/*
We need to make sure C++ TileDBError is translated to a correctly-typed py
Expand Down
17 changes: 17 additions & 0 deletions tiledb/highlevel.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

import numpy as np

import tiledb
Expand Down Expand Up @@ -254,6 +256,21 @@ def is_ndarray_like(arr):
return schema


def as_built(return_json_string=False):
"""
Dumps the TileDB build configuration to a dictionary or string.
:param bool return_json_string: Return the output as a string instead of a dictionary
:return: dict or str
"""
res = tiledb.main.as_built_dump()

if return_json_string:
return res

return json.loads(res)


def _schema_like_numpy(
array,
ctx,
Expand Down
63 changes: 62 additions & 1 deletion tiledb/tests/test_libtiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3829,7 +3829,7 @@ def test_offset_can_fit_data_var_size_cannot(
tiledb.stats_disable()


class TestTest(DiskTestCase):
class TestPath(DiskTestCase):
def test_path(self, pytestconfig):
path = self.path("foo")
if pytestconfig.getoption("vfs") == "s3":
Expand All @@ -3843,3 +3843,64 @@ def test_path(self, pytestconfig):
)
def test_no_output(self):
print("this test should fail")


class TestAsBuilt(DiskTestCase):
def test_as_built(self):
dump = tiledb.as_built(return_json_string=True)
assert isinstance(dump, str)
# ensure we get a non-empty string
assert len(dump) > 0
dump_dict = tiledb.as_built()
assert isinstance(dump_dict, dict)
# ensure we get a non-empty dict
assert len(dump_dict) > 0

# validate top-level key
assert "as_built" in dump_dict
assert isinstance(dump_dict["as_built"], dict)
assert len(dump_dict["as_built"]) > 0

# validate parameters key
assert "parameters" in dump_dict["as_built"]
assert isinstance(dump_dict["as_built"]["parameters"], dict)
assert len(dump_dict["as_built"]["parameters"]) > 0

# validate storage_backends key
assert "storage_backends" in dump_dict["as_built"]["parameters"]
assert isinstance(dump_dict["as_built"]["parameters"]["storage_backends"], dict)
assert len(dump_dict["as_built"]["parameters"]["storage_backends"]) > 0

x = dump_dict["as_built"]["parameters"]["storage_backends"]

# validate storage_backends attributes
vfs = tiledb.VFS()
if vfs.supports("azure"):
assert x["azure"]["enabled"] == True
else:
assert x["azure"]["enabled"] == False

if vfs.supports("gcs"):
assert x["gcs"]["enabled"] == True
else:
assert x["gcs"]["enabled"] == False

if vfs.supports("hdfs"):
assert x["hdfs"]["enabled"] == True
else:
assert x["hdfs"]["enabled"] == False

if vfs.supports("s3"):
assert x["s3"]["enabled"] == True
else:
assert x["s3"]["enabled"] == False

# validate support key
assert "support" in dump_dict["as_built"]["parameters"]
assert isinstance(dump_dict["as_built"]["parameters"]["support"], dict)
assert len(dump_dict["as_built"]["parameters"]["support"]) > 0

# validate support attributes - check only if boolean
assert dump_dict["as_built"]["parameters"]["support"]["serialization"][
"enabled"
] in [True, False]

0 comments on commit f0942d9

Please sign in to comment.