Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix PX timezone treatment #2749

Merged
merged 1 commit into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@ This project adheres to [Semantic Versioning](http://semver.org/).
`binary_backend`, `binary_format` and `binary_compression_level` control
how to generate the b64 string ([#2691](https://github.com/plotly/plotly.py/pull/2691)
- `px.imshow` has a new `constrast_rescaling` argument in order to choose how
to set data values corresponding to the bounds of the color range
to set data values corresponding to the bounds of the color range
([#2691](https://github.com/plotly/plotly.py/pull/2691)

### Fixed

- Plotly Express no longer converts datetime columns of input dataframes to UTC ([#2749](https://github.com/plotly/plotly.py/pull/2749))
- Plotly Express has more complete support for datetimes as additional `hover_data` ([#2749](https://github.com/plotly/plotly.py/pull/2749))


## [4.9.0] - 2020-07-16

### Added
Expand Down
63 changes: 29 additions & 34 deletions packages/python/plotly/plotly/express/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
trace_patch = trace_spec.trace_patch.copy() or {}
fit_results = None
hover_header = ""
custom_data_len = 0
for attr_name in trace_spec.attrs:
attr_value = args[attr_name]
attr_label = get_decorated_label(args, attr_value, attr_name)
Expand All @@ -243,7 +242,7 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
)
]
trace_patch["dimensions"] = [
dict(label=get_label(args, name), values=column.values)
dict(label=get_label(args, name), values=column)
for (name, column) in dims
]
if trace_spec.constructor == go.Splom:
Expand Down Expand Up @@ -287,10 +286,8 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
y = sorted_trace_data[args["y"]].values
x = sorted_trace_data[args["x"]].values

x_is_date = False
if x.dtype.type == np.datetime64:
x = x.astype(int) / 10 ** 9 # convert to unix epoch seconds
x_is_date = True
elif x.dtype.type == np.object_:
try:
x = x.astype(np.float64)
Expand All @@ -308,21 +305,22 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
"Could not convert value of 'y' into a numeric type."
)

# preserve original values of "x" in case they're dates
trace_patch["x"] = sorted_trace_data[args["x"]][
np.logical_not(np.logical_or(np.isnan(y), np.isnan(x)))
]

if attr_value == "lowess":
# missing ='drop' is the default value for lowess but not for OLS (None)
# we force it here in case statsmodels change their defaults
trendline = sm.nonparametric.lowess(y, x, missing="drop")
trace_patch["x"] = trendline[:, 0]
trace_patch["y"] = trendline[:, 1]
hover_header = "<b>LOWESS trendline</b><br><br>"
elif attr_value == "ols":
fit_results = sm.OLS(
y, sm.add_constant(x), missing="drop"
).fit()
trace_patch["y"] = fit_results.predict()
trace_patch["x"] = x[
np.logical_not(np.logical_or(np.isnan(y), np.isnan(x)))
]
hover_header = "<b>OLS trendline</b><br>"
if len(fit_results.params) == 2:
hover_header += "%s = %g * %s + %g<br>" % (
Expand All @@ -339,8 +337,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
hover_header += (
"R<sup>2</sup>=%f<br><br>" % fit_results.rsquared
)
if x_is_date:
trace_patch["x"] = pd.to_datetime(trace_patch["x"] * 10 ** 9)
mapping_labels[get_label(args, args["x"])] = "%{x}"
mapping_labels[get_label(args, args["y"])] = "%{y} <b>(trend)</b>"
elif attr_name.startswith("error"):
Expand All @@ -350,8 +346,9 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
trace_patch[error_xy] = {}
trace_patch[error_xy][arr] = trace_data[attr_value]
elif attr_name == "custom_data":
trace_patch["customdata"] = trace_data[attr_value].values
custom_data_len = len(attr_value) # number of custom data columns
# here we store a data frame in customdata, and it's serialized
# as a list of row lists, which is what we want
trace_patch["customdata"] = trace_data[attr_value]
elif attr_name == "hover_name":
if trace_spec.constructor not in [
go.Histogram,
Expand All @@ -368,29 +365,23 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
go.Histogram2dContour,
]:
hover_is_dict = isinstance(attr_value, dict)
customdata_cols = args.get("custom_data") or []
for col in attr_value:
if hover_is_dict and not attr_value[col]:
continue
try:
position = args["custom_data"].index(col)
except (ValueError, AttributeError, KeyError):
position = custom_data_len
custom_data_len += 1
if "customdata" in trace_patch:
trace_patch["customdata"] = np.hstack(
(
trace_patch["customdata"],
trace_data[col].values[:, None],
)
)
else:
trace_patch["customdata"] = trace_data[col].values[
:, None
]
position = len(customdata_cols)
customdata_cols.append(col)
attr_label_col = get_decorated_label(args, col, None)
mapping_labels[attr_label_col] = "%%{customdata[%d]}" % (
position
)

# here we store a data frame in customdata, and it's serialized
# as a list of row lists, which is what we want
trace_patch["customdata"] = trace_data[customdata_cols]
elif attr_name == "color":
if trace_spec.constructor in [go.Choropleth, go.Choroplethmapbox]:
trace_patch["z"] = trace_data[attr_value]
Expand Down Expand Up @@ -1029,6 +1020,16 @@ def _escape_col_name(df_input, col_name, extra):
return col_name


def to_unindexed_series(x):
"""
assuming x is list-like or even an existing pd.Series, return a new pd.Series with
no index, without extracting the data from an existing Series via numpy, which
seems to mangle datetime columns. Stripping the index from existing pd.Series is
required to get things to match up right in the new DataFrame we're building
"""
return pd.Series(x).reset_index(drop=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this makes a copy of the series because of the reset_index step, so it's suboptimal for non datetime columns. Could a solution be to return x.values in other cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm yeah we weren't copying before but we are now. After spending like 8 hours trying various permutations here I think I'm OK with the copy on the principle of "make it correct, then make it fast". We can optimize it later if this really causes problems.



def process_args_into_dataframe(args, wide_mode, var_name, value_name):
"""
After this function runs, the `all_attrables` keys of `args` all contain only
Expand Down Expand Up @@ -1140,10 +1141,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
length,
)
)
if hasattr(real_argument, "values"):
df_output[col_name] = real_argument.values
else:
df_output[col_name] = np.array(real_argument)
df_output[col_name] = to_unindexed_series(real_argument)
elif not df_provided:
raise ValueError(
"String or int arguments are only possible when a "
Expand Down Expand Up @@ -1178,7 +1176,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
)
else:
col_name = str(argument)
df_output[col_name] = df_input[argument].values
df_output[col_name] = to_unindexed_series(df_input[argument])
# ----------------- argument is likely a column / array / list.... -------
else:
if df_provided and hasattr(argument, "name"):
Expand Down Expand Up @@ -1207,10 +1205,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
"length of previously-processed arguments %s is %d"
% (field, len(argument), str(list(df_output.columns)), length)
)
if hasattr(argument, "values"):
df_output[str(col_name)] = argument.values
else:
df_output[str(col_name)] = np.array(argument)
df_output[str(col_name)] = to_unindexed_series(argument)

# Finally, update argument with column name now that column exists
assert col_name is not None, (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,10 @@ def test_sunburst_hoverdict_color():
hover_data={"pop": ":,"},
)
assert "color" in fig.data[0].hovertemplate


def test_date_in_hover():
df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
df["date"] = pd.to_datetime(df["date"])
fig = px.scatter(df, x="value", y="value", hover_data=["date"])
assert str(fig.data[0].customdata[0][0]) == str(df["date"][0])
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ def test_build_df_with_index():
assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"])


def test_timezones():
df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
df["date"] = pd.to_datetime(df["date"])
args = dict(data_frame=df, x="date", y="value")
out = build_dataframe(args, go.Scatter)
assert str(out["data_frame"]["date"][0]) == str(df["date"][0])


def test_non_matching_index():
df = pd.DataFrame(dict(y=[1, 2, 3]), index=["a", "b", "c"])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,11 @@ def test_trendline_on_timeseries(mode):
)

df["date"] = pd.to_datetime(df["date"])
df["date"] = df["date"].dt.tz_localize("CET") # force a timezone
fig = px.scatter(df, x="date", y="GOOG", trendline=mode)
assert len(fig.data) == 2
assert len(fig.data[0].x) == len(fig.data[1].x)
assert type(fig.data[0].x[0]) == datetime
assert type(fig.data[1].x[0]) == datetime
assert np.all(fig.data[0].x == fig.data[1].x)
assert str(fig.data[0].x[0]) == str(fig.data[1].x[0])
30 changes: 15 additions & 15 deletions packages/python/plotly/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
; PASSING ADDITONAL ARGUMENTS TO TEST COMMANDS
; The {posargs} is tox-specific and passes in any command line args after `--`.
; For example, given the testing command in *this* file:
; pytest {posargs} -x plotly/tests/test_core
; pytest {posargs} plotly/tests/test_core
;
; The following command:
; tox -- -k 'not nodev'
;
; Tells tox to call:
; pytest -k 'not nodev' -x plotly/tests/test_core
; pytest -k 'not nodev' plotly/tests/test_core
;

[tox]
Expand Down Expand Up @@ -81,25 +81,25 @@ deps=
basepython={env:PLOTLY_TOX_PYTHON_27:}
commands=
python --version
pytest {posargs} -x plotly/tests/test_core
pytest {posargs} plotly/tests/test_core

[testenv:py35-core]
basepython={env:PLOTLY_TOX_PYTHON_35:}
commands=
python --version
pytest {posargs} -x plotly/tests/test_core
pytest {posargs} plotly/tests/test_core

[testenv:py36-core]
basepython={env:PLOTLY_TOX_PYTHON_36:}
commands=
python --version
pytest {posargs} -x plotly/tests/test_core
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did you remove the -x ? Exiting on first failing test is good for the environment :-).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I needed to be able to see if I fixed a test even if an "earlier" one failed. Looking at the timings, it seems to me that the container setup etc takes more time than the test suites proper, so while we have the container spun up we may as well extract as much information as possible about the run, is my feeling :)

pytest {posargs} plotly/tests/test_core

[testenv:py37-core]
basepython={env:PLOTLY_TOX_PYTHON_37:}
commands=
python --version
pytest {posargs} -x plotly/tests/test_core
pytest {posargs} plotly/tests/test_core
pytest {posargs} -x test_init/test_dependencies_not_imported.py
pytest {posargs} -x test_init/test_lazy_imports.py

Expand All @@ -111,41 +111,41 @@ commands=
;; Do some coverage reporting. No need to do this for all environments.
; mkdir -p {envbindir}/../../coverage-reports/{envname}
; coverage erase
; coverage run --include="*/plotly/*" --omit="*/tests*" {envbindir}/nosetests {posargs} -x plotly/tests
; coverage run --include="*/plotly/*" --omit="*/tests*" {envbindir}/nosetests {posargs} plotly/tests
; coverage html -d "{envbindir}/../../coverage-reports/{envname}" --title={envname}

[testenv:py27-optional]
basepython={env:PLOTLY_TOX_PYTHON_27:}
commands=
python --version
pytest {posargs} -x plotly/tests/test_core
pytest {posargs} -x plotly/tests/test_optional
pytest {posargs} plotly/tests/test_core
pytest {posargs} plotly/tests/test_optional
pytest _plotly_utils/tests/
pytest plotly/tests/test_io

[testenv:py35-optional]
basepython={env:PLOTLY_TOX_PYTHON_35:}
commands=
python --version
pytest {posargs} -x plotly/tests/test_core
pytest {posargs} -x plotly/tests/test_optional
pytest {posargs} plotly/tests/test_core
pytest {posargs} plotly/tests/test_optional
pytest _plotly_utils/tests/
pytest plotly/tests/test_io

[testenv:py36-optional]
basepython={env:PLOTLY_TOX_PYTHON_36:}
commands=
python --version
pytest {posargs} -x plotly/tests/test_core
pytest {posargs} -x plotly/tests/test_optional
pytest {posargs} plotly/tests/test_core
pytest {posargs} plotly/tests/test_optional
pytest _plotly_utils/tests/
pytest plotly/tests/test_io

[testenv:py37-optional]
basepython={env:PLOTLY_TOX_PYTHON_37:}
commands=
python --version
pytest {posargs} -x plotly/tests/test_core
pytest {posargs} -x plotly/tests/test_optional
pytest {posargs} plotly/tests/test_core
pytest {posargs} plotly/tests/test_optional
pytest _plotly_utils/tests/
pytest plotly/tests/test_io