Skip to content

Commit

Permalink
Update index_data.py for datatype conversion and alignment (#1813)
Browse files Browse the repository at this point in the history
* Update index_data.py for data convertion and alignment

* Update qlib/utils/index_data.py

* Update qlib/utils/index_data.py

* fix linting

---------

Co-authored-by: taozhiwang <taozhiwa@gmail.com>
Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
  • Loading branch information
3 people authored Jun 24, 2024
1 parent a339fc1 commit cde8020
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
13 changes: 12 additions & 1 deletion qlib/utils/index_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ def __init__(self, idx_list: Union[List, pd.Index, "Index", int]):
self.index_map = self.idx_list = np.arange(idx_list)
self._is_sorted = True
else:
# Check if all elements in idx_list are of the same type
if not all(isinstance(x, type(idx_list[0])) for x in idx_list):
raise TypeError("All elements in idx_list must be of the same type")
# Check if all elements in idx_list are of the same datetime64 precision
if isinstance(idx_list[0], np.datetime64) and not all(x.dtype == idx_list[0].dtype for x in idx_list):
raise TypeError("All elements in idx_list must be of the same datetime64 precision")
self.idx_list = np.array(idx_list)
# NOTE: only the first appearance is indexed
self.index_map = dict(zip(self.idx_list, range(len(self))))
Expand All @@ -131,7 +137,12 @@ def _convert_type(self, item):
if self.idx_list.dtype.type is np.datetime64:
if isinstance(item, pd.Timestamp):
# This happens often when creating index based on pandas.DatetimeIndex and query with pd.Timestamp
return item.to_numpy()
return item.to_numpy().astype(self.idx_list.dtype)
elif isinstance(item, np.datetime64):
# This happens often when creating index based on np.datetime64 and query with another precision
return item.astype(self.idx_list.dtype)
# NOTE: It is hard to consider every case at first.
# We just try to cover part of cases to make it more user-friendly
return item

def index(self, item) -> int:
Expand Down
18 changes: 18 additions & 0 deletions tests/misc/test_index_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,24 @@ def test_corner_cases(self):
print(sd)
self.assertTrue(sd.iloc[0] == 2)

# test different precisions of time data
timeindex = [
np.datetime64("2024-06-22T00:00:00.000000000"),
np.datetime64("2024-06-21T00:00:00.000000000"),
np.datetime64("2024-06-20T00:00:00.000000000"),
]
sd = idd.SingleData([1, 2, 3], index=timeindex)
self.assertTrue(
sd.index.index(np.datetime64("2024-06-21T00:00:00.000000000"))
== sd.index.index(np.datetime64("2024-06-21T00:00:00"))
)
self.assertTrue(sd.index.index(pd.Timestamp("2024-06-21 00:00")) == 1)

# Bad case: the input is not aligned
timeindex[1] = (np.datetime64("2024-06-21T00:00:00.00"),)
with self.assertRaises(TypeError):
sd = idd.SingleData([1, 2, 3], index=timeindex)

def test_ops(self):
sd1 = idd.SingleData([1, 2, 3, 4], index=["foo", "bar", "f", "g"])
sd2 = idd.SingleData([1, 2, 3, 4], index=["foo", "bar", "f", "g"])
Expand Down

0 comments on commit cde8020

Please sign in to comment.