TileDB-Inc · ihnorton · Jun 10, 2020 · Jun 9, 2020 · Jun 9, 2020 · Jun 9, 2020
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,3 +1,7 @@
+# TileDB-Py 0.6.3 Release Notes
+
+* Fix unnecessary implicit ordering requirement for multi-attribute assignment. [#328](https://github.com/TileDB-Inc/TileDB-Py/pull/328)
+
 # TileDB-Py 0.6.2 Release Notes
 
 ## Bug fixes

diff --git a/tiledb/dataframe_.py b/tiledb/dataframe_.py
@@ -4,6 +4,7 @@
 import os
 import io
 from collections import OrderedDict
+import warnings
 
 if sys.version_info >= (3,3):
     unicode_type = str
@@ -22,7 +23,9 @@
     'tile_order': 'row-major',
     'allows_duplicates': False,
     'sparse': False,
-    'mode': 'ingest'
+    'mode': 'ingest',
+    'attrs_filters': None,
+    'coords_filters': None
 }
 
 def parse_tiledb_kwargs(kwargs):
@@ -38,6 +41,10 @@ def parse_tiledb_kwargs(kwargs):
         args['allows_duplicates'] = kwargs.pop('allows_duplicates')
     if 'mode' in kwargs:
         args['mode'] = kwargs.pop('mode')
+    if 'attrs_filters' in kwargs:
+        args['attrs_filters'] = kwargs.pop('attrs_filters')
+    if 'coords_filters' in kwargs:
+        args['coords_filters'] = kwargs.pop('coords_filters')
 
     return args
 
@@ -96,7 +103,7 @@ def dtype_from_column(col):
     )
 
 # TODO make this a staticmethod on Attr?
-def attrs_from_df(df, index_dims=None, ctx=None):
+def attrs_from_df(df, index_dims=None, filters=None, ctx=None):
     attr_reprs = dict()
 
     if ctx is None:
@@ -108,7 +115,7 @@ def attrs_from_df(df, index_dims=None, ctx=None):
         if index_dims and name in index_dims:
             continue
         attr_info = dtype_from_column(col)
-        attrs.append(tiledb.Attr(name=name, dtype=attr_info.dtype))
+        attrs.append(tiledb.Attr(name=name, dtype=attr_info.dtype, filters=filters))
 
         if attr_info.repr is not None:
             attr_reprs[name] = attr_info.repr
@@ -201,16 +208,22 @@ def create_dims(ctx, dataframe, index_dims):
 
     return dims
 
-
 def from_dataframe(uri, dataframe, **kwargs):
+    # deprecated in 0.6.3
+    warnings.warn("tiledb.from_dataframe is deprecated; please use .from_pandas",
+                  DeprecationWarning)
+
+    from_pandas(uri, dataframe, **kwargs)
+
+def from_pandas(uri, dataframe, **kwargs):
     """Create TileDB array at given URI from pandas dataframe
 
     :param uri: URI for new TileDB array
     :param dataframe: pandas DataFrame
     :param mode: Creation mode, one of 'ingest' (default), 'create_schema'
     :param kwargs: optional keyword arguments for Pandas and TileDB.
-                TileDB context and configuration arguments
-                may be passed in a dictionary as `tiledb_args={...}`
+        TileDB arguments: tile_order, cell_order, allows_duplicates, sparse,
+                          mode, attrs_filters, coords_filters
     :return:
     """
     args = parse_tiledb_kwargs(kwargs)
@@ -222,6 +235,8 @@ def from_dataframe(uri, dataframe, **kwargs):
     sparse = args['sparse']
     index_dims = args.get('index_dims', None)
     mode = args.get('mode', 'ingest')
+    attrs_filters = args.get('attrs_filters', None)
+    coords_filters = args.get('coords_filters', None)
 
     write = True
     if mode is not None and mode == 'schema_only':
@@ -230,6 +245,14 @@ def from_dataframe(uri, dataframe, **kwargs):
     if ctx is None:
         ctx = tiledb.default_ctx()
 
+    if attrs_filters is None:
+       attrs_filters = tiledb.FilterList(
+            [tiledb.ZstdFilter(1, ctx=ctx)])
+
+    if coords_filters is None:
+        coords_filters = tiledb.FilterList(
+            [tiledb.ZstdFilter(1, ctx=ctx)])
+
     nrows = len(dataframe)
     tiling = np.min((nrows % 200, nrows))
 
@@ -247,14 +270,17 @@ def from_dataframe(uri, dataframe, **kwargs):
        *dims,
        ctx = ctx
     )
-    attrs, attr_metadata = attrs_from_df(dataframe, index_dims=index_dims)
+
+    attrs, attr_metadata = attrs_from_df(dataframe, index_dims=index_dims,
+                                         filters=attrs_filters)
 
     # now create the ArraySchema
     schema = tiledb.ArraySchema(
         domain=domain,
         attrs=attrs,
         cell_order=cell_order,
         tile_order=tile_order,
+        coords_filters=coords_filters,
         allows_duplicates=allows_duplicates,
         sparse=sparse
     )
@@ -302,6 +328,9 @@ def open_dataframe(uri):
     >>> tiledb.objec_type("iris.tldb")
     'array'
     """
+    warnings.warn("open_dataframe is deprecated and will be removed in the next release",
+                  DeprecationWarning)
+
     import pandas as pd
 
     # TODO support `distributed=True` option?
@@ -383,4 +412,4 @@ def from_csv(uri, csv_file, **kwargs):
     df = pandas.read_csv(csv_file, **kwargs)
 
     kwargs.update(tiledb_args)
-    from_dataframe(uri, df, **kwargs)
+    from_pandas(uri, df, **kwargs)
diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx
@@ -4205,7 +4205,10 @@ cdef class DenseArrayImpl(Array):
         cdef list values = list()
 
         if isinstance(val, dict):
-            for (k, v) in val.items():
+            for attr_idx in range(self.schema.nattr):
+                attr = self.schema.attr(attr_idx)
+                k = attr.name
+                v = val[k]
                 attr = self.schema.attr(k)
                 attributes.append(attr._internal_name)
                 # object arrays are var-len and handled later

diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py
@@ -1125,6 +1125,12 @@ def test_multiple_attributes(self):
         with tiledb.DenseArray(self.path("foo"), mode='w', ctx=ctx) as T:
             T[:] = V
 
+        # check setting attribute in different order from Attr definition
+        #   https://github.com/TileDB-Inc/TileDB-Py/issues/299
+        V2 = {"floats": V_floats, "ints": V_ints}
+        with tiledb.DenseArray(self.path("foo"), mode='w', ctx=ctx) as T:
+            T[:] = V
+
         with tiledb.DenseArray(self.path("foo"), mode='r', ctx=ctx) as T:
             R = T[:]
             assert_array_equal(V["ints"], R["ints"])

diff --git a/tiledb/tests/test_pandas_dataframe.py b/tiledb/tests/test_pandas_dataframe.py
@@ -316,7 +316,7 @@ def test_csv_col_to_sparse_dims(self):
         tmp_array2b = os.path.join(tmp_dir, "array2b")
 
         # create a duplicate value
-        df.int_vals[0] = df.int_vals[1]
+        df.loc[0, 'int_vals'] = df.int_vals[1]
         df.sort_values('int_vals', inplace=True)
 
         df.to_csv(tmp_csv2, index=False)
@@ -345,11 +345,17 @@ def test_csv_schema_only(self):
         df.sort_values('time', inplace=True)
         df.to_csv(tmp_csv, index=False)
 
+        attrs_filters = tiledb.FilterList([tiledb.ZstdFilter(1)])
+        # from_dataframe default is 1, so use 7 here to check
+        #   the arg is correctly parsed/passed
+        coords_filters = tiledb.FilterList([tiledb.ZstdFilter(7)])
+
         tmp_array = os.path.join(tmp_dir, "array")
         tiledb.from_csv(tmp_array, tmp_csv,
                         index_col=['time', 'double_range'],
                         parse_dates=['time'],
-                        mode='schema_only')
+                        mode='schema_only',
+                        coords_filters=coords_filters)
 
         t0, t1 = df.time.min(), df.time.max()
 
@@ -360,8 +366,9 @@ def test_csv_schema_only(self):
                           tiledb.Dim(name='double_range', domain=(-1000.0, 1000.0), tile=1.0, dtype='float64'),
                         ]),
                         attrs=[
-                          tiledb.Attr(name='int_vals', dtype='int64'),
+                          tiledb.Attr(name='int_vals', dtype='int64', filters=attrs_filters),
                         ],
+                        coords_filters=coords_filters,
                         cell_order='row-major',
                         tile_order='row-major', sparse=True,
                         allows_duplicates=False)
@@ -371,4 +378,12 @@ def test_csv_schema_only(self):
         self.assertEqual(array_nfiles, 3)
 
         with tiledb.open(tmp_array) as A:
-            self.assertEqual(A.schema, ref_schema)
+            self.assertEqual(A.schema, ref_schema)
+
+            # TODO currently no equality check for filters
+            self.assertEqual(
+                A.schema.coords_filters[0].level, coords_filters[0].level
+            )
+            self.assertEqual(
+                A.schema.attr(0).filters[0].level, attrs_filters[0].level
+            )