Skip to content

Commit

Permalink
fix: Remove double pickling for cached payloads (apache#10222)
Browse files Browse the repository at this point in the history
Co-authored-by: John Bodley <john.bodley@airbnb.com>
  • Loading branch information
2 people authored and auxten committed Nov 20, 2020
1 parent 4d5dc7e commit b61f10d
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 25 deletions.
2 changes: 2 additions & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ assists people when migrating to a new version.

## Next

* [10222](https://github.com/apache/incubator-superset/pull/10222): a change which changes how payloads are cached. Previous cached objects cannot be decoded and thus will be reloaded from source.

* [10130](https://github.com/apache/incubator-superset/pull/10130): a change which deprecates the `dbs.perm` column in favor of SQLAlchemy [hybird attributes](https://docs.sqlalchemy.org/en/13/orm/extensions/hybrid.html).

* [10034](https://github.com/apache/incubator-superset/pull/10034): a change which deprecates the public security manager `assert_datasource_permission`, `assert_query_context_permission`, `assert_viz_permission`, and `rejected_tables` methods with the `raise_for_access` method which also handles assertion logic for SQL tables.
Expand Down
10 changes: 1 addition & 9 deletions superset/common/query_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import copy
import logging
import math
import pickle as pkl
from datetime import datetime, timedelta
from typing import Any, ClassVar, Dict, List, Optional, Union

Expand Down Expand Up @@ -225,7 +224,6 @@ def get_df_payload( # pylint: disable=too-many-locals,too-many-statements
if cache_value:
stats_logger.incr("loading_from_cache")
try:
cache_value = pkl.loads(cache_value)
df = cache_value["df"]
query = cache_value["query"]
status = utils.QueryStatus.SUCCESS
Expand Down Expand Up @@ -260,14 +258,8 @@ def get_df_payload( # pylint: disable=too-many-locals,too-many-statements
if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED:
try:
cache_value = dict(dttm=cached_dttm, df=df, query=query)
cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL)

logger.info(
"Caching %d chars at key %s", len(cache_binary), cache_key
)

stats_logger.incr("set_cache_key")
cache.set(cache_key, cache_binary, timeout=self.cache_timeout)
cache.set(cache_key, cache_value, timeout=self.cache_timeout)
except Exception as ex: # pylint: disable=broad-except
# cache.set call can fail if the backend is down or if
# the key is too large or whatever other reasons
Expand Down
8 changes: 0 additions & 8 deletions superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import inspect
import logging
import math
import pickle as pkl
import re
import uuid
from collections import defaultdict, OrderedDict
Expand Down Expand Up @@ -443,7 +442,6 @@ def get_df_payload(
if cache_value:
stats_logger.incr("loading_from_cache")
try:
cache_value = pkl.loads(cache_value)
df = cache_value["df"]
self.query = cache_value["query"]
self._any_cached_dttm = cache_value["dttm"]
Expand Down Expand Up @@ -488,12 +486,6 @@ def get_df_payload(
):
try:
cache_value = dict(dttm=cached_dttm, df=df, query=self.query)
cache_value = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL)

logger.info(
"Caching {} chars at key {}".format(len(cache_value), cache_key)
)

stats_logger.incr("set_cache_key")
cache.set(cache_key, cache_value, timeout=self.cache_timeout)
except Exception as ex:
Expand Down
8 changes: 0 additions & 8 deletions superset/viz_sip38.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import inspect
import logging
import math
import pickle as pkl
import re
import uuid
from collections import defaultdict, OrderedDict
Expand Down Expand Up @@ -481,7 +480,6 @@ def get_df_payload(
if cache_value:
stats_logger.incr("loading_from_cache")
try:
cache_value = pkl.loads(cache_value)
df = cache_value["df"]
self.query = cache_value["query"]
self._any_cached_dttm = cache_value["dttm"]
Expand Down Expand Up @@ -525,12 +523,6 @@ def get_df_payload(
):
try:
cache_value = dict(dttm=cached_dttm, df=df, query=self.query)
cache_value = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL)

logger.info(
"Caching {} chars at key {}".format(len(cache_value), cache_key)
)

stats_logger.incr("set_cache_key")
cache.set(cache_key, cache_value, timeout=self.cache_timeout)
except Exception as ex:
Expand Down

0 comments on commit b61f10d

Please sign in to comment.