Skip to content

Commit

Permalink
fix: pivot table timestamp grouping (apache#10774)
Browse files Browse the repository at this point in the history
* fix: pivot table timestamp grouping

* address comments
  • Loading branch information
villebro committed Sep 3, 2020
1 parent 6e75ea6 commit 0e4cbff
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 14 deletions.
46 changes: 33 additions & 13 deletions superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import math
import re
from collections import defaultdict, OrderedDict
from datetime import datetime, timedelta
from datetime import date, datetime, timedelta
from itertools import product
from typing import (
Any,
Expand Down Expand Up @@ -602,11 +602,11 @@ class TableViz(BaseViz):

def process_metrics(self) -> None:
"""Process form data and store parsed column configs.
1. Determine query mode based on form_data params.
- Use `query_mode` if it has a valid value
- Set as RAW mode if `all_columns` is set
- Otherwise defaults to AGG mode
2. Determine output columns based on query mode.
1. Determine query mode based on form_data params.
- Use `query_mode` if it has a valid value
- Set as RAW mode if `all_columns` is set
- Otherwise defaults to AGG mode
2. Determine output columns based on query mode.
"""
# Verify form data first: if not specifying query mode, then cannot have both
# GROUP BY and RAW COLUMNS.
Expand Down Expand Up @@ -813,6 +813,31 @@ def get_aggfunc(
# only min and max work properly for non-numerics
return aggfunc if aggfunc in ("min", "max") else "max"

@staticmethod
def _format_datetime(value: Union[pd.Timestamp, datetime, date, str]) -> str:
"""
Format a timestamp in such a way that the viz will be able to apply
the correct formatting in the frontend.
:param value: the value of a temporal column
:return: formatted timestamp if it is a valid timestamp, otherwise
the original value
"""
tstamp: Optional[pd.Timestamp] = None
if isinstance(value, pd.Timestamp):
tstamp = value
if isinstance(value, datetime) or isinstance(value, date):
tstamp = pd.Timestamp(value)
if isinstance(value, str):
try:
tstamp = pd.Timestamp(value)
except ValueError:
pass
if tstamp:
return f"__timestamp:{datetime_to_epoch(tstamp)}"
# fallback in case something incompatible is returned
return cast(str, value)

def get_data(self, df: pd.DataFrame) -> VizData:
if df.empty:
return None
Expand All @@ -828,15 +853,10 @@ def get_data(self, df: pd.DataFrame) -> VizData:
groupby = self.form_data.get("groupby") or []
columns = self.form_data.get("columns") or []

def _format_datetime(value: Any) -> Optional[str]:
if isinstance(value, str):
return f"__timestamp:{datetime_to_epoch(pd.Timestamp(value))}"
return None

for column_name in groupby + columns:
column = self.datasource.get_column(column_name)
if column and column.type in ("DATE", "DATETIME", "TIMESTAMP"):
ts = df[column_name].apply(_format_datetime)
if column and column.is_temporal:
ts = df[column_name].apply(self._format_datetime)
df[column_name] = ts

if self.form_data.get("transpose_pivot"):
Expand Down
34 changes: 33 additions & 1 deletion tests/viz_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.
# isort:skip_file
import uuid
from datetime import datetime
from datetime import date, datetime, timezone
import logging
from math import nan
from unittest.mock import Mock, patch
Expand Down Expand Up @@ -1345,6 +1345,38 @@ def test_get_aggfunc_non_numeric(self):
== "min"
)

def test_format_datetime_from_pd_timestamp(self):
tstamp = pd.Timestamp(datetime(2020, 9, 3, tzinfo=timezone.utc))
assert (
viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
)

def test_format_datetime_from_datetime(self):
tstamp = datetime(2020, 9, 3, tzinfo=timezone.utc)
assert (
viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
)

def test_format_datetime_from_date(self):
tstamp = date(2020, 9, 3)
assert (
viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
)

def test_format_datetime_from_string(self):
tstamp = "2020-09-03T00:00:00"
assert (
viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
)

def test_format_datetime_from_invalid_string(self):
tstamp = "abracadabra"
assert viz.PivotTableViz._format_datetime(tstamp) == tstamp

def test_format_datetime_from_int(self):
assert viz.PivotTableViz._format_datetime(123) == 123
assert viz.PivotTableViz._format_datetime(123.0) == 123.0


class TestDistributionPieViz(SupersetTestCase):
base_df = pd.DataFrame(
Expand Down

0 comments on commit 0e4cbff

Please sign in to comment.