From cf60f664a4fb230dd239dfec2b393dbc318a5cbe Mon Sep 17 00:00:00 2001 From: John Bodley <4567245+john-bodley@users.noreply.github.com> Date: Thu, 2 Jul 2020 16:46:36 -0700 Subject: [PATCH] fix: Remove double pickling for cached payloads (#10222) Co-authored-by: John Bodley --- UPDATING.md | 2 ++ superset/common/query_context.py | 10 +--------- superset/viz.py | 8 -------- superset/viz_sip38.py | 8 -------- 4 files changed, 3 insertions(+), 25 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 3810cda0e9b60..0c04796b87431 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -23,6 +23,8 @@ assists people when migrating to a new version. ## Next +* [10222](https://github.com/apache/incubator-superset/pull/10222): a change which changes how payloads are cached. Previous cached objects cannot be decoded and thus will be reloaded from source. + * [10130](https://github.com/apache/incubator-superset/pull/10130): a change which deprecates the `dbs.perm` column in favor of SQLAlchemy [hybird attributes](https://docs.sqlalchemy.org/en/13/orm/extensions/hybrid.html). * [10034](https://github.com/apache/incubator-superset/pull/10034): a change which deprecates the public security manager `assert_datasource_permission`, `assert_query_context_permission`, `assert_viz_permission`, and `rejected_tables` methods with the `raise_for_access` method which also handles assertion logic for SQL tables. diff --git a/superset/common/query_context.py b/superset/common/query_context.py index fb70f0ae8f570..bf0a3e28cccda 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -17,7 +17,6 @@ import copy import logging import math -import pickle as pkl from datetime import datetime, timedelta from typing import Any, ClassVar, Dict, List, Optional, Union @@ -225,7 +224,6 @@ def get_df_payload( # pylint: disable=too-many-locals,too-many-statements if cache_value: stats_logger.incr("loading_from_cache") try: - cache_value = pkl.loads(cache_value) df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS @@ -260,14 +258,8 @@ def get_df_payload( # pylint: disable=too-many-locals,too-many-statements if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: try: cache_value = dict(dttm=cached_dttm, df=df, query=query) - cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL) - - logger.info( - "Caching %d chars at key %s", len(cache_binary), cache_key - ) - stats_logger.incr("set_cache_key") - cache.set(cache_key, cache_binary, timeout=self.cache_timeout) + cache.set(cache_key, cache_value, timeout=self.cache_timeout) except Exception as ex: # pylint: disable=broad-except # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons diff --git a/superset/viz.py b/superset/viz.py index 9a5675625c314..d265e198ca0be 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -26,7 +26,6 @@ import inspect import logging import math -import pickle as pkl import re import uuid from collections import defaultdict, OrderedDict @@ -443,7 +442,6 @@ def get_df_payload( if cache_value: stats_logger.incr("loading_from_cache") try: - cache_value = pkl.loads(cache_value) df = cache_value["df"] self.query = cache_value["query"] self._any_cached_dttm = cache_value["dttm"] @@ -488,12 +486,6 @@ def get_df_payload( ): try: cache_value = dict(dttm=cached_dttm, df=df, query=self.query) - cache_value = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL) - - logger.info( - "Caching {} chars at key {}".format(len(cache_value), cache_key) - ) - stats_logger.incr("set_cache_key") cache.set(cache_key, cache_value, timeout=self.cache_timeout) except Exception as ex: diff --git a/superset/viz_sip38.py b/superset/viz_sip38.py index 9ef4841f0aab6..bed34b719762d 100644 --- a/superset/viz_sip38.py +++ b/superset/viz_sip38.py @@ -27,7 +27,6 @@ import inspect import logging import math -import pickle as pkl import re import uuid from collections import defaultdict, OrderedDict @@ -481,7 +480,6 @@ def get_df_payload( if cache_value: stats_logger.incr("loading_from_cache") try: - cache_value = pkl.loads(cache_value) df = cache_value["df"] self.query = cache_value["query"] self._any_cached_dttm = cache_value["dttm"] @@ -525,12 +523,6 @@ def get_df_payload( ): try: cache_value = dict(dttm=cached_dttm, df=df, query=self.query) - cache_value = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL) - - logger.info( - "Caching {} chars at key {}".format(len(cache_value), cache_key) - ) - stats_logger.incr("set_cache_key") cache.set(cache_key, cache_value, timeout=self.cache_timeout) except Exception as ex: