Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Remove double pickling for cached payloads #10222

Merged
merged 1 commit into from
Jul 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ assists people when migrating to a new version.

## Next

* [10222](https://github.com/apache/incubator-superset/pull/10222): a change which changes how payloads are cached. Previous cached objects cannot be decoded and thus will be reloaded from source.

* [10130](https://github.com/apache/incubator-superset/pull/10130): a change which deprecates the `dbs.perm` column in favor of SQLAlchemy [hybird attributes](https://docs.sqlalchemy.org/en/13/orm/extensions/hybrid.html).

* [10034](https://github.com/apache/incubator-superset/pull/10034): a change which deprecates the public security manager `assert_datasource_permission`, `assert_query_context_permission`, `assert_viz_permission`, and `rejected_tables` methods with the `raise_for_access` method which also handles assertion logic for SQL tables.
Expand Down
10 changes: 1 addition & 9 deletions superset/common/query_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import copy
import logging
import math
import pickle as pkl
from datetime import datetime, timedelta
from typing import Any, ClassVar, Dict, List, Optional, Union

Expand Down Expand Up @@ -225,7 +224,6 @@ def get_df_payload( # pylint: disable=too-many-locals,too-many-statements
if cache_value:
stats_logger.incr("loading_from_cache")
try:
cache_value = pkl.loads(cache_value)
df = cache_value["df"]
query = cache_value["query"]
status = utils.QueryStatus.SUCCESS
Expand Down Expand Up @@ -260,14 +258,8 @@ def get_df_payload( # pylint: disable=too-many-locals,too-many-statements
if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED:
try:
cache_value = dict(dttm=cached_dttm, df=df, query=query)
cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL)

logger.info(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I annexed this as we no longer a priori know the size of the payload and it seems somewhat like an unnecessary comment.

"Caching %d chars at key %s", len(cache_binary), cache_key
)

stats_logger.incr("set_cache_key")
cache.set(cache_key, cache_binary, timeout=self.cache_timeout)
cache.set(cache_key, cache_value, timeout=self.cache_timeout)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

random thought [out of scope for this PR], we should time this operation eventually

except Exception as ex: # pylint: disable=broad-except
# cache.set call can fail if the backend is down or if
# the key is too large or whatever other reasons
Expand Down
8 changes: 0 additions & 8 deletions superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import inspect
import logging
import math
import pickle as pkl
import re
import uuid
from collections import defaultdict, OrderedDict
Expand Down Expand Up @@ -443,7 +442,6 @@ def get_df_payload(
if cache_value:
stats_logger.incr("loading_from_cache")
try:
cache_value = pkl.loads(cache_value)
df = cache_value["df"]
self.query = cache_value["query"]
self._any_cached_dttm = cache_value["dttm"]
Expand Down Expand Up @@ -488,12 +486,6 @@ def get_df_payload(
):
try:
cache_value = dict(dttm=cached_dttm, df=df, query=self.query)
cache_value = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL)

logger.info(
"Caching {} chars at key {}".format(len(cache_value), cache_key)
)

stats_logger.incr("set_cache_key")
cache.set(cache_key, cache_value, timeout=self.cache_timeout)
except Exception as ex:
Expand Down
8 changes: 0 additions & 8 deletions superset/viz_sip38.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import inspect
import logging
import math
import pickle as pkl
import re
import uuid
from collections import defaultdict, OrderedDict
Expand Down Expand Up @@ -481,7 +480,6 @@ def get_df_payload(
if cache_value:
stats_logger.incr("loading_from_cache")
try:
cache_value = pkl.loads(cache_value)
df = cache_value["df"]
self.query = cache_value["query"]
self._any_cached_dttm = cache_value["dttm"]
Expand Down Expand Up @@ -525,12 +523,6 @@ def get_df_payload(
):
try:
cache_value = dict(dttm=cached_dttm, df=df, query=self.query)
cache_value = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL)

logger.info(
"Caching {} chars at key {}".format(len(cache_value), cache_key)
)

stats_logger.incr("set_cache_key")
cache.set(cache_key, cache_value, timeout=self.cache_timeout)
except Exception as ex:
Expand Down