From 0e4cbffc2a1ae56d53154bec221c6b69a1589a98 Mon Sep 17 00:00:00 2001 From: Ville Brofeldt <33317356+villebro@users.noreply.github.com> Date: Thu, 3 Sep 2020 19:49:54 +0300 Subject: [PATCH] fix: pivot table timestamp grouping (#10774) * fix: pivot table timestamp grouping * address comments --- superset/viz.py | 46 +++++++++++++++++++++++++++++++++------------- tests/viz_tests.py | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 14 deletions(-) diff --git a/superset/viz.py b/superset/viz.py index bdf4c5351e49f..df00d4314ae59 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -27,7 +27,7 @@ import math import re from collections import defaultdict, OrderedDict -from datetime import datetime, timedelta +from datetime import date, datetime, timedelta from itertools import product from typing import ( Any, @@ -602,11 +602,11 @@ class TableViz(BaseViz): def process_metrics(self) -> None: """Process form data and store parsed column configs. - 1. Determine query mode based on form_data params. - - Use `query_mode` if it has a valid value - - Set as RAW mode if `all_columns` is set - - Otherwise defaults to AGG mode - 2. Determine output columns based on query mode. + 1. Determine query mode based on form_data params. + - Use `query_mode` if it has a valid value + - Set as RAW mode if `all_columns` is set + - Otherwise defaults to AGG mode + 2. Determine output columns based on query mode. """ # Verify form data first: if not specifying query mode, then cannot have both # GROUP BY and RAW COLUMNS. @@ -813,6 +813,31 @@ def get_aggfunc( # only min and max work properly for non-numerics return aggfunc if aggfunc in ("min", "max") else "max" + @staticmethod + def _format_datetime(value: Union[pd.Timestamp, datetime, date, str]) -> str: + """ + Format a timestamp in such a way that the viz will be able to apply + the correct formatting in the frontend. + + :param value: the value of a temporal column + :return: formatted timestamp if it is a valid timestamp, otherwise + the original value + """ + tstamp: Optional[pd.Timestamp] = None + if isinstance(value, pd.Timestamp): + tstamp = value + if isinstance(value, datetime) or isinstance(value, date): + tstamp = pd.Timestamp(value) + if isinstance(value, str): + try: + tstamp = pd.Timestamp(value) + except ValueError: + pass + if tstamp: + return f"__timestamp:{datetime_to_epoch(tstamp)}" + # fallback in case something incompatible is returned + return cast(str, value) + def get_data(self, df: pd.DataFrame) -> VizData: if df.empty: return None @@ -828,15 +853,10 @@ def get_data(self, df: pd.DataFrame) -> VizData: groupby = self.form_data.get("groupby") or [] columns = self.form_data.get("columns") or [] - def _format_datetime(value: Any) -> Optional[str]: - if isinstance(value, str): - return f"__timestamp:{datetime_to_epoch(pd.Timestamp(value))}" - return None - for column_name in groupby + columns: column = self.datasource.get_column(column_name) - if column and column.type in ("DATE", "DATETIME", "TIMESTAMP"): - ts = df[column_name].apply(_format_datetime) + if column and column.is_temporal: + ts = df[column_name].apply(self._format_datetime) df[column_name] = ts if self.form_data.get("transpose_pivot"): diff --git a/tests/viz_tests.py b/tests/viz_tests.py index b52ed7fc150c0..6490a4fe10371 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -16,7 +16,7 @@ # under the License. # isort:skip_file import uuid -from datetime import datetime +from datetime import date, datetime, timezone import logging from math import nan from unittest.mock import Mock, patch @@ -1345,6 +1345,38 @@ def test_get_aggfunc_non_numeric(self): == "min" ) + def test_format_datetime_from_pd_timestamp(self): + tstamp = pd.Timestamp(datetime(2020, 9, 3, tzinfo=timezone.utc)) + assert ( + viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0" + ) + + def test_format_datetime_from_datetime(self): + tstamp = datetime(2020, 9, 3, tzinfo=timezone.utc) + assert ( + viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0" + ) + + def test_format_datetime_from_date(self): + tstamp = date(2020, 9, 3) + assert ( + viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0" + ) + + def test_format_datetime_from_string(self): + tstamp = "2020-09-03T00:00:00" + assert ( + viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0" + ) + + def test_format_datetime_from_invalid_string(self): + tstamp = "abracadabra" + assert viz.PivotTableViz._format_datetime(tstamp) == tstamp + + def test_format_datetime_from_int(self): + assert viz.PivotTableViz._format_datetime(123) == 123 + assert viz.PivotTableViz._format_datetime(123.0) == 123.0 + class TestDistributionPieViz(SupersetTestCase): base_df = pd.DataFrame(