Skip to content

Commit

Permalink
feat: linear imputation in Resample (#19393)
Browse files Browse the repository at this point in the history
(cherry picked from commit a39dd44)
  • Loading branch information
zhaoyongjie authored and villebro committed Apr 3, 2022
1 parent 56e78b9 commit 5f2ffb3
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ export const advancedAnalyticsControls: ControlPanelSectionConfig = {
choices: [
['asfreq', 'Null imputation'],
['zerofill', 'Zero imputation'],
['linear', 'Linear interpolation'],
['ffill', 'Forward values'],
['bfill', 'Backward values'],
['median', 'Median values'],
Expand Down
7 changes: 7 additions & 0 deletions superset/utils/pandas_postprocessing/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from flask_babel import gettext as _

from superset.exceptions import InvalidPostProcessingError
from superset.utils.pandas_postprocessing.utils import RESAMPLE_METHOD


def resample(
Expand All @@ -40,9 +41,15 @@ def resample(
"""
if not isinstance(df.index, pd.DatetimeIndex):
raise InvalidPostProcessingError(_("Resample operation requires DatetimeIndex"))
if method not in RESAMPLE_METHOD:
raise InvalidPostProcessingError(
_("Resample method should in ") + ", ".join(RESAMPLE_METHOD) + "."
)

if method == "asfreq" and fill_value is not None:
_df = df.resample(rule).asfreq(fill_value=fill_value)
elif method == "linear":
_df = df.resample(rule).interpolate()
else:
_df = getattr(df.resample(rule), method)()
return _df
2 changes: 2 additions & 0 deletions superset/utils/pandas_postprocessing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@
"P1W/1970-01-04T00:00:00Z": "W",
}

RESAMPLE_METHOD = ("asfreq", "bfill", "ffill", "linear", "median", "mean", "sum")

FLAT_COLUMN_SEPARATOR = ", "


Expand Down
46 changes: 46 additions & 0 deletions tests/unit_tests/pandas_postprocessing/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import numpy as np
import pandas as pd
import pytest
from pandas import to_datetime

from superset.exceptions import InvalidPostProcessingError
from superset.utils import pandas_postprocessing as pp
Expand Down Expand Up @@ -151,3 +153,47 @@ def test_resample_should_raise_ex():
pp.resample(
df=categories_df, rule="1D", method="asfreq",
)

with pytest.raises(InvalidPostProcessingError):
pp.resample(
df=timeseries_df, rule="1D", method="foobar",
)


def test_resample_linear():
df = pd.DataFrame(
index=to_datetime(["2019-01-01", "2019-01-05", "2019-01-08"]),
data={"label": ["a", "e", "j"], "y": [1.0, 5.0, 8.0]},
)
post_df = pp.resample(df=df, rule="1D", method="linear")
"""
label y
2019-01-01 a 1.0
2019-01-02 NaN 2.0
2019-01-03 NaN 3.0
2019-01-04 NaN 4.0
2019-01-05 e 5.0
2019-01-06 NaN 6.0
2019-01-07 NaN 7.0
2019-01-08 j 8.0
"""
assert post_df.equals(
pd.DataFrame(
index=pd.to_datetime(
[
"2019-01-01",
"2019-01-02",
"2019-01-03",
"2019-01-04",
"2019-01-05",
"2019-01-06",
"2019-01-07",
"2019-01-08",
]
),
data={
"label": ["a", np.NaN, np.NaN, np.NaN, "e", np.NaN, np.NaN, "j"],
"y": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
},
)
)

0 comments on commit 5f2ffb3

Please sign in to comment.