Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(thumbnails): add support for user specific thumbs #22328

Merged
merged 21 commits into from
Dec 14, 2022
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/docs/installation/alerts-reports.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ REDIS_PORT = "6379"

class CeleryConfig:
broker_url = 'redis://%s:%s/0' % (REDIS_HOST, REDIS_PORT)
imports = ('superset.sql_lab', "superset.tasks", "superset.tasks.thumbnails", )
imports = ('superset.sql_lab', "superset.tasks", "superset.thumbnails.tasks", )
result_backend = 'redis://%s:%s/0' % (REDIS_HOST, REDIS_PORT)
worker_prefetch_multiplier = 10
task_acks_late = True
Expand Down
9 changes: 8 additions & 1 deletion docs/docs/installation/cache.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ FEATURE_FLAGS = {
}
```

By default thumbnails are rendered using the `THUMBNAIL_SELENIUM_USER` user account. To render thumbnails as the
logged in user (e.g. in environments that are using user impersonation), use the following configuration:

```python
THUMBNAIL_EXECUTE_AS = [ExecutorType.CURRENT_USER]
```

For this feature you will need a cache system and celery workers. All thumbnails are stored on cache
and are processed asynchronously by the workers.

Expand All @@ -66,7 +73,7 @@ from s3cache.s3cache import S3Cache

class CeleryConfig(object):
broker_url = "redis://localhost:6379/0"
imports = ("superset.sql_lab", "superset.tasks", "superset.tasks.thumbnails")
imports = ("superset.sql_lab", "superset.tasks", "superset.thumbnails.tasks")
result_backend = "redis://localhost:6379/0"
worker_prefetch_multiplier = 10
task_acks_late = True
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/installation/running-on-kubernetes.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ configOverrides:

class CeleryConfig(object):
broker_url = f"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0"
imports = ('superset.sql_lab', "superset.tasks", "superset.tasks.thumbnails", )
imports = ('superset.sql_lab', "superset.tasks", "superset.thumbnails.tasks", )
result_backend = f"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0"
task_annotations = {
'sql_lab.get_sql_results': {
Expand Down
37 changes: 23 additions & 14 deletions superset/charts/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import logging
from datetime import datetime
from io import BytesIO
from typing import Any, Optional
from typing import Any, cast, Optional
from zipfile import is_zipfile, ZipFile

from flask import redirect, request, Response, send_file, url_for
Expand Down Expand Up @@ -74,7 +74,8 @@
from superset.constants import MODEL_API_RW_METHOD_PERMISSION_MAP, RouteMethod
from superset.extensions import event_logger
from superset.models.slice import Slice
from superset.tasks.thumbnails import cache_chart_thumbnail
from superset.tasks.utils import get_current_user
from superset.thumbnails.tasks import cache_chart_thumbnail
from superset.utils.screenshots import ChartScreenshot
from superset.utils.urls import get_url_path
from superset.views.base_api import (
Expand Down Expand Up @@ -557,7 +558,7 @@ def cache_screenshot(self, pk: int, **kwargs: Any) -> WerkzeugResponse:
# Don't shrink the image if thumb_size is not specified
thumb_size = rison_dict.get("thumb_size") or window_size

chart = self.datamodel.get(pk, self._base_filters)
chart = cast(Slice, self.datamodel.get(pk, self._base_filters))
if not chart:
return self.response_404()

Expand All @@ -570,14 +571,13 @@ def cache_screenshot(self, pk: int, **kwargs: Any) -> WerkzeugResponse:

def trigger_celery() -> WerkzeugResponse:
logger.info("Triggering screenshot ASYNC")
kwargs = {
"url": chart_url,
"digest": chart.digest,
"force": True,
"window_size": window_size,
"thumb_size": thumb_size,
}
cache_chart_thumbnail.delay(**kwargs)
cache_chart_thumbnail.delay(
current_user=get_current_user(),
chart_id=chart.id,
force=True,
window_size=window_size,
thumb_size=thumb_size,
)
return self.response(
202, cache_key=cache_key, chart_url=chart_url, image_url=image_url
)
Expand Down Expand Up @@ -680,16 +680,21 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Any) -> WerkzeugResponse:
500:
$ref: '#/components/responses/500'
"""
chart = self.datamodel.get(pk, self._base_filters)
chart = cast(Slice, self.datamodel.get(pk, self._base_filters))
if not chart:
return self.response_404()

current_user = get_current_user()
url = get_url_path("Superset.slice", slice_id=chart.id, standalone="true")
if kwargs["rison"].get("force", False):
logger.info(
"Triggering thumbnail compute (chart id: %s) ASYNC", str(chart.id)
)
cache_chart_thumbnail.delay(url, chart.digest, force=True)
cache_chart_thumbnail.delay(
current_user=current_user,
chart_id=chart.id,
force=True,
)
return self.response(202, message="OK Async")
# fetch the chart screenshot using the current user and cache if set
screenshot = ChartScreenshot(url, chart.digest).get_from_cache(
Expand All @@ -701,7 +706,11 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Any) -> WerkzeugResponse:
logger.info(
"Triggering thumbnail compute (chart id: %s) ASYNC", str(chart.id)
)
cache_chart_thumbnail.delay(url, chart.digest, force=True)
cache_chart_thumbnail.delay(
current_user=current_user,
chart_id=chart.id,
force=True,
)
return self.response(202, message="OK Async")
# If digests
if chart.digest != digest:
Expand Down
2 changes: 1 addition & 1 deletion superset/cli/thumbnails.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def compute_thumbnails(
# pylint: disable=import-outside-toplevel
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice
from superset.tasks.thumbnails import (
from superset.thumbnails.tasks import (
cache_chart_thumbnail,
cache_dashboard_thumbnail,
)
Expand Down
58 changes: 42 additions & 16 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
at the end of this file.
"""
# pylint: disable=too-many-lines
from __future__ import annotations
villebro marked this conversation as resolved.
Show resolved Hide resolved

import imp # pylint: disable=deprecated-module
import importlib.util
import json
Expand Down Expand Up @@ -57,9 +59,9 @@
from superset.advanced_data_type.types import AdvancedDataType
from superset.constants import CHANGE_ME_SECRET_KEY
from superset.jinja_context import BaseTemplateProcessor
from superset.reports.types import ReportScheduleExecutor
from superset.stats_logger import DummyStatsLogger
from superset.superset_typing import CacheConfig
from superset.tasks.types import ExecutorType
from superset.utils.core import is_test, NO_TIME_RANGE, parse_boolean_string
from superset.utils.encrypt import SQLAlchemyUtilsAdapter
from superset.utils.log import DBEventLogger
Expand All @@ -72,6 +74,8 @@

from superset.connectors.sqla.models import SqlaTable
from superset.models.core import Database
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice

# Realtime stats logger, a StatsD implementation exists
STATS_LOGGER = DummyStatsLogger()
Expand Down Expand Up @@ -575,9 +579,33 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]:

# ---------------------------------------------------
# Thumbnail config (behind feature flag)
# Also used by Alerts & Reports
# ---------------------------------------------------
THUMBNAIL_SELENIUM_USER = "admin"
# When executing Alerts & Reports or Thumbnails as the Selenium user, this defines
# the username of the account used to render the queries and dashboards/charts
THUMBNAIL_SELENIUM_USER: Optional[str] = "admin"

# To be able to have different thumbnails for different users, use these configs to
# define which user to execute the thumbnails and potentially custom functions for
# calculating thumbnail digests. To have unique thumbnails for all users, use the
# following config:
# THUMBNAIL_EXECUTE_AS = [ExecutorType.CURRENT_USER]
THUMBNAIL_EXECUTE_AS = [ExecutorType.SELENIUM]

# By default, thumbnail digests are calculated based on various parameters in the
# chart/dashboard metadata, and in the case of user-specific thumbnails, the
# username. To specify a custom digest function, use the following config parameters
# to define callbacks that receive
# 1. the model (dashboard or chart)
# 2. the executor type (e.g. ExecutorType.SELENIUM)
# 3. the executor's username (note, this is the executor as defined by
# `THUMBNAIL_EXECUTE_AS`; the executor is only equal to the currently logged in
# user if the executor type is equal to `ExecutorType.CURRENT_USER`)
# and return the final digest string:
THUMBNAIL_DASHBOARD_DIGEST_FUNC: Optional[
Callable[[Dashboard, ExecutorType, str], str]
] = None
THUMBNAIL_CHART_DIGEST_FUNC: Optional[Callable[[Slice, ExecutorType, str], str]] = None

THUMBNAIL_CACHE_CONFIG: CacheConfig = {
"CACHE_TYPE": "NullCache",
"CACHE_NO_NULL_WARNING": True,
Expand Down Expand Up @@ -930,7 +958,7 @@ class CeleryConfig: # pylint: disable=too-few-public-methods
# return f'tmp_{schema}'
# Function accepts database object, user object, schema name and sql that will be run.
SQLLAB_CTAS_SCHEMA_NAME_FUNC: Optional[
Callable[["Database", "models.User", str, str], str]
Callable[[Database, models.User, str, str], str]
] = None

# If enabled, it can be used to store the results of long-running queries
Expand All @@ -955,8 +983,8 @@ class CeleryConfig: # pylint: disable=too-few-public-methods
# Function that creates upload directory dynamically based on the
# database used, user and schema provided.
def CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC( # pylint: disable=invalid-name
database: "Database",
user: "models.User", # pylint: disable=unused-argument
database: Database,
user: models.User, # pylint: disable=unused-argument
schema: Optional[str],
) -> str:
# Note the final empty path enforces a trailing slash.
Expand All @@ -974,7 +1002,7 @@ def CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC( # pylint: disable=invalid-name
# db configuration and a result of this function.

# mypy doesn't catch that if case ensures list content being always str
ALLOWED_USER_CSV_SCHEMA_FUNC: Callable[["Database", "models.User"], List[str]] = (
ALLOWED_USER_CSV_SCHEMA_FUNC: Callable[[Database, models.User], List[str]] = (
lambda database, user: [UPLOADED_CSV_HIVE_NAMESPACE]
if UPLOADED_CSV_HIVE_NAMESPACE
else []
Expand Down Expand Up @@ -1174,16 +1202,14 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument
# creator if either is contained within the list of owners, otherwise the first owner
# will be used) and finally `THUMBNAIL_SELENIUM_USER`, set as follows:
# ALERT_REPORTS_EXECUTE_AS = [
# ReportScheduleExecutor.CREATOR_OWNER,
# ReportScheduleExecutor.CREATOR,
# ReportScheduleExecutor.MODIFIER_OWNER,
# ReportScheduleExecutor.MODIFIER,
# ReportScheduleExecutor.OWNER,
# ReportScheduleExecutor.SELENIUM,
# ScheduledTaskExecutor.CREATOR_OWNER,
# ScheduledTaskExecutor.CREATOR,
# ScheduledTaskExecutor.MODIFIER_OWNER,
# ScheduledTaskExecutor.MODIFIER,
# ScheduledTaskExecutor.OWNER,
# ScheduledTaskExecutor.SELENIUM,
# ]
ALERT_REPORTS_EXECUTE_AS: List[ReportScheduleExecutor] = [
ReportScheduleExecutor.SELENIUM
]
ALERT_REPORTS_EXECUTE_AS: List[ExecutorType] = [ExecutorType.SELENIUM]
# if ALERT_REPORTS_WORKING_TIME_OUT_KILL is True, set a celery hard timeout
# Equal to working timeout + ALERT_REPORTS_WORKING_TIME_OUT_LAG
ALERT_REPORTS_WORKING_TIME_OUT_LAG = int(timedelta(seconds=10).total_seconds())
Expand Down
20 changes: 15 additions & 5 deletions superset/dashboards/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import logging
from datetime import datetime
from io import BytesIO
from typing import Any, Callable, Optional
from typing import Any, Callable, cast, Optional
from zipfile import is_zipfile, ZipFile

from flask import make_response, redirect, request, Response, send_file, url_for
Expand Down Expand Up @@ -82,7 +82,8 @@
from superset.extensions import event_logger
from superset.models.dashboard import Dashboard
from superset.models.embedded_dashboard import EmbeddedDashboard
from superset.tasks.thumbnails import cache_dashboard_thumbnail
from superset.tasks.utils import get_current_user
from superset.thumbnails.tasks import cache_dashboard_thumbnail
from superset.utils.cache import etag_cache
from superset.utils.screenshots import DashboardScreenshot
from superset.utils.urls import get_url_path
Expand Down Expand Up @@ -879,16 +880,21 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Any) -> WerkzeugResponse:
500:
$ref: '#/components/responses/500'
"""
dashboard = self.datamodel.get(pk, self._base_filters)
dashboard = cast(Dashboard, self.datamodel.get(pk, self._base_filters))
villebro marked this conversation as resolved.
Show resolved Hide resolved
if not dashboard:
return self.response_404()

dashboard_url = get_url_path(
"Superset.dashboard", dashboard_id_or_slug=dashboard.id
)
# If force, request a screenshot from the workers
current_user = get_current_user()
if kwargs["rison"].get("force", False):
cache_dashboard_thumbnail.delay(dashboard_url, dashboard.digest, force=True)
cache_dashboard_thumbnail.delay(
current_user=current_user,
dashboard_id=dashboard.id,
force=True,
)
return self.response(202, message="OK Async")
# fetch the dashboard screenshot using the current user and cache if set
screenshot = DashboardScreenshot(
Expand All @@ -897,7 +903,11 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Any) -> WerkzeugResponse:
# If the screenshot does not exist, request one from the workers
if not screenshot:
self.incr_stats("async", self.thumbnail.__name__)
cache_dashboard_thumbnail.delay(dashboard_url, dashboard.digest, force=True)
cache_dashboard_thumbnail.delay(
current_user=current_user,
dashboard_id=dashboard.id,
force=True,
)
return self.response(202, message="OK Async")
# If digests
if dashboard.digest != digest:
Expand Down
22 changes: 10 additions & 12 deletions superset/models/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@
from superset.models.helpers import AuditMixinNullable, ImportExportMixin
from superset.models.slice import Slice
from superset.models.user_attributes import UserAttribute
from superset.tasks.thumbnails import cache_dashboard_thumbnail
from superset.tasks.utils import get_current_user
from superset.thumbnails.digest import get_dashboard_digest
from superset.thumbnails.tasks import cache_dashboard_thumbnail
from superset.utils import core as utils
from superset.utils.core import get_user_id
from superset.utils.decorators import debounce
from superset.utils.hashing import md5_sha_from_str
from superset.utils.urls import get_url_path

metadata = Model.metadata # pylint: disable=no-member
config = app.config
Expand Down Expand Up @@ -241,11 +241,7 @@ def dashboard_link(self) -> Markup:

@property
def digest(self) -> str:
"""
Returns a MD5 HEX digest that makes this dashboard unique
"""
unique_string = f"{self.position_json}.{self.css}.{self.json_metadata}"
return md5_sha_from_str(unique_string)
return get_dashboard_digest(self)

@property
def thumbnail_url(self) -> str:
Expand Down Expand Up @@ -329,8 +325,11 @@ def position(self) -> Dict[str, Any]:
return {}

def update_thumbnail(self) -> None:
url = get_url_path("Superset.dashboard", dashboard_id_or_slug=self.id)
cache_dashboard_thumbnail.delay(url, self.digest, force=True)
cache_dashboard_thumbnail.delay(
current_user=get_current_user(),
dashboard_id=self.id,
force=True,
)

@debounce(0.1)
def clear_cache(self) -> None:
Expand Down Expand Up @@ -439,8 +438,7 @@ def export_dashboards( # pylint: disable=too-many-locals

@classmethod
def get(cls, id_or_slug: Union[str, int]) -> Dashboard:
session = db.session()
qry = session.query(Dashboard).filter(id_or_slug_filter(id_or_slug))
qry = db.session.query(Dashboard).filter(id_or_slug_filter(id_or_slug))
villebro marked this conversation as resolved.
Show resolved Hide resolved
return qry.one_or_none()


Expand Down
Loading