From 0822efa5d6921f63c25dc679a7860541d604eb24 Mon Sep 17 00:00:00 2001 From: Junda Yang Date: Thu, 20 Sep 2018 11:21:11 -0700 Subject: [PATCH] Add schema level access control on csv upload (#5787) * Add schema level access control on csv upload * add db migrate merge point * fix flake 8 * fix test * remove unnecessary db migration * fix flake * nit * fix test for test_schemas_access_for_csv_upload_endpoint * fix test_csv_import test * use security_manager to check whether schema is allowed to be accessed * bring security manager to the party * flake8 & repush to retrigger test * address comments * remove trailing comma (cherry picked from commit b6d7d57c40a7ae2563dcfaefe721dbdfcb452a94) --- superset/forms.py | 63 ++++++++++++--- superset/models/core.py | 10 ++- superset/security.py | 6 +- .../form_view/csv_to_database_view/edit.html | 46 +++++++++++ .../superset/models/database/add.html | 1 + .../superset/models/database/edit.html | 1 + .../superset/models/database/macros.html | 6 ++ superset/utils.py | 1 + superset/views/core.py | 79 +++++++++++++++++-- tests/base_tests.py | 5 +- tests/core_tests.py | 30 +++++++ 11 files changed, 224 insertions(+), 24 deletions(-) create mode 100644 superset/templates/superset/form_view/csv_to_database_view/edit.html diff --git a/superset/forms.py b/superset/forms.py index 5983989787532..6108162f84a3b 100644 --- a/superset/forms.py +++ b/superset/forms.py @@ -15,7 +15,7 @@ from wtforms.ext.sqlalchemy.fields import QuerySelectField from wtforms.validators import DataRequired, NumberRange, Optional -from superset import app, db +from superset import app, db, security_manager from superset.models import core as models config = app.config @@ -49,10 +49,51 @@ def filter_not_empty_values(value): class CsvToDatabaseForm(DynamicForm): # pylint: disable=E0211 - def csv_enabled_dbs(): - return db.session.query( + def csv_allowed_dbs(): + csv_allowed_dbs = [] + csv_enabled_dbs = db.session.query( models.Database).filter_by( - allow_csv_upload=True).all() + allow_csv_upload=True).all() + for csv_enabled_db in csv_enabled_dbs: + if CsvToDatabaseForm.at_least_one_schema_is_allowed(csv_enabled_db): + csv_allowed_dbs.append(csv_enabled_db) + return csv_allowed_dbs + + @staticmethod + def at_least_one_schema_is_allowed(database): + """ + If the user has access to the database or all datasource + 1. if schemas_allowed_for_csv_upload is empty + a) if database does not support schema + user is able to upload csv without specifying schema name + b) if database supports schema + user is able to upload csv to any schema + 2. if schemas_allowed_for_csv_upload is not empty + a) if database does not support schema + This situation is impossible and upload will fail + b) if database supports schema + user is able to upload to schema in schemas_allowed_for_csv_upload + elif the user does not access to the database or all datasource + 1. if schemas_allowed_for_csv_upload is empty + a) if database does not support schema + user is unable to upload csv + b) if database supports schema + user is unable to upload csv + 2. if schemas_allowed_for_csv_upload is not empty + a) if database does not support schema + This situation is impossible and user is unable to upload csv + b) if database supports schema + user is able to upload to schema in schemas_allowed_for_csv_upload + """ + if (security_manager.database_access(database) or + security_manager.all_datasource_access()): + return True + schemas = database.get_schema_access_for_csv_upload() + if (schemas and + security_manager.schemas_accessible_by_user( + database, schemas, False)): + return True + return False name = StringField( _('Table Name'), @@ -66,8 +107,14 @@ def csv_enabled_dbs(): FileRequired(), FileAllowed(['csv'], _('CSV Files Only!'))]) con = QuerySelectField( _('Database'), - query_factory=csv_enabled_dbs, + query_factory=csv_allowed_dbs, get_pk=lambda a: a.id, get_label=lambda a: a.database_name) + schema = StringField( + _('Schema'), + description=_('Specify a schema (if database flavor supports this).'), + validators=[Optional()], + widget=BS3TextFieldWidget(), + filters=[lambda x: x or None]) sep = StringField( _('Delimiter'), description=_('Delimiter used by CSV file (for whitespace use \s+).'), @@ -83,12 +130,6 @@ def csv_enabled_dbs(): ('fail', _('Fail')), ('replace', _('Replace')), ('append', _('Append'))], validators=[DataRequired()]) - schema = StringField( - _('Schema'), - description=_('Specify a schema (if database flavour supports this).'), - validators=[Optional()], - widget=BS3TextFieldWidget(), - filters=[lambda x: x or None]) header = IntegerField( _('Header Row'), description=_( diff --git a/superset/models/core.py b/superset/models/core.py index 11916bb8cf4ee..23482f64f1a4c 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -646,7 +646,7 @@ class Database(Model, AuditMixinNullable, ImportMixin): expose_in_sqllab = Column(Boolean, default=False) allow_run_sync = Column(Boolean, default=True) allow_run_async = Column(Boolean, default=False) - allow_csv_upload = Column(Boolean, default=True) + allow_csv_upload = Column(Boolean, default=False) allow_ctas = Column(Boolean, default=False) allow_dml = Column(Boolean, default=False) force_ctas_schema = Column(String(250)) @@ -654,11 +654,11 @@ class Database(Model, AuditMixinNullable, ImportMixin): extra = Column(Text, default=textwrap.dedent("""\ { "metadata_params": {}, - "engine_params": {} + "engine_params": {}, + "schemas_allowed_for_csv_upload": [] } """)) perm = Column(String(1000)) - impersonate_user = Column(Boolean, default=False) export_fields = ('database_name', 'sqlalchemy_uri', 'cache_timeout', 'expose_in_sqllab', 'allow_run_sync', 'allow_run_async', @@ -916,6 +916,7 @@ def get_extra(self): extra = json.loads(self.extra) except Exception as e: logging.error(e) + raise e return extra def get_table(self, table_name, schema=None): @@ -939,6 +940,9 @@ def get_pk_constraint(self, table_name, schema=None): def get_foreign_keys(self, table_name, schema=None): return self.inspector.get_foreign_keys(table_name, schema) + def get_schema_access_for_csv_upload(self): + return self.get_extra().get('schemas_allowed_for_csv_upload', []) + @property def sqlalchemy_uri_decrypted(self): conn = sqla.engine.url.make_url(self.sqlalchemy_uri) diff --git a/superset/security.py b/superset/security.py index 8ea8c04d09f06..1f7b3e86bb1be 100644 --- a/superset/security.py +++ b/superset/security.py @@ -183,10 +183,12 @@ def user_datasource_perms(self): datasource_perms.add(perm.view_menu.name) return datasource_perms - def schemas_accessible_by_user(self, database, schemas): + def schemas_accessible_by_user(self, database, schemas, hierarchical=True): from superset import db from superset.connectors.sqla.models import SqlaTable - if self.database_access(database) or self.all_datasource_access(): + if (hierarchical and + (self.database_access(database) or + self.all_datasource_access())): return schemas subset = set() diff --git a/superset/templates/superset/form_view/csv_to_database_view/edit.html b/superset/templates/superset/form_view/csv_to_database_view/edit.html new file mode 100644 index 0000000000000..0f0e5db296035 --- /dev/null +++ b/superset/templates/superset/form_view/csv_to_database_view/edit.html @@ -0,0 +1,46 @@ +{% extends 'appbuilder/general/model/edit.html' %} + +{% block tail_js %} + {{ super() }} + +{% endblock %} \ No newline at end of file diff --git a/superset/templates/superset/models/database/add.html b/superset/templates/superset/models/database/add.html index 0ce38e9ef7752..4f4a9ff526c55 100644 --- a/superset/templates/superset/models/database/add.html +++ b/superset/templates/superset/models/database/add.html @@ -4,4 +4,5 @@ {% block tail_js %} {{ super() }} {{ macros.testconn() }} + {{ macros.expand_extra_textarea() }} {% endblock %} diff --git a/superset/templates/superset/models/database/edit.html b/superset/templates/superset/models/database/edit.html index 5effaeb506d56..9d13b7c5038aa 100644 --- a/superset/templates/superset/models/database/edit.html +++ b/superset/templates/superset/models/database/edit.html @@ -4,4 +4,5 @@ {% block tail_js %} {{ super() }} {{ macros.testconn() }} + {{ macros.expand_extra_textarea() }} {% endblock %} diff --git a/superset/templates/superset/models/database/macros.html b/superset/templates/superset/models/database/macros.html index da895b353772a..12ee5f7a365a3 100644 --- a/superset/templates/superset/models/database/macros.html +++ b/superset/templates/superset/models/database/macros.html @@ -57,3 +57,9 @@ }); {% endmacro %} + +{% macro expand_extra_textarea() %} + +{% endmacro %} diff --git a/superset/utils.py b/superset/utils.py index ad1e61b270eb4..b5bb545d92789 100644 --- a/superset/utils.py +++ b/superset/utils.py @@ -843,6 +843,7 @@ def get_or_create_main_db(): dbobj.set_sqlalchemy_uri(conf.get('SQLALCHEMY_DATABASE_URI')) dbobj.expose_in_sqllab = True dbobj.allow_run_sync = True + dbobj.allow_csv_upload = True db.session.add(dbobj) db.session.commit() return dbobj diff --git a/superset/views/core.py b/superset/views/core.py index 9324a8d393edf..9862404c9039b 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -156,10 +156,10 @@ class DatabaseView(SupersetModelView, DeleteMixin, YamlExportMixin): # noqa 'modified', 'allow_csv_upload', ] add_columns = [ - 'database_name', 'sqlalchemy_uri', 'cache_timeout', 'extra', - 'expose_in_sqllab', 'allow_run_sync', 'allow_run_async', 'allow_csv_upload', + 'database_name', 'sqlalchemy_uri', 'cache_timeout', 'expose_in_sqllab', + 'allow_run_sync', 'allow_run_async', 'allow_csv_upload', 'allow_ctas', 'allow_dml', 'force_ctas_schema', 'impersonate_user', - 'allow_multi_schema_metadata_fetch', + 'allow_multi_schema_metadata_fetch', 'extra', ] search_exclude_columns = ( 'password', 'tables', 'created_by', 'changed_by', 'queries', @@ -205,14 +205,19 @@ class DatabaseView(SupersetModelView, DeleteMixin, YamlExportMixin): # noqa 'When allowing CREATE TABLE AS option in SQL Lab, ' 'this option forces the table to be created in this schema'), 'extra': utils.markdown( - 'JSON string containing extra configuration elements. ' - 'The ``engine_params`` object gets unpacked into the ' + 'JSON string containing extra configuration elements.
' + '1. The ``engine_params`` object gets unpacked into the ' '[sqlalchemy.create_engine]' '(http://docs.sqlalchemy.org/en/latest/core/engines.html#' 'sqlalchemy.create_engine) call, while the ``metadata_params`` ' 'gets unpacked into the [sqlalchemy.MetaData]' '(http://docs.sqlalchemy.org/en/rel_1_0/core/metadata.html' - '#sqlalchemy.schema.MetaData) call. ', True), + '#sqlalchemy.schema.MetaData) call.
' + '2. The ``schemas_allowed_for_csv_upload`` is a comma separated list ' + 'of schemas that CSVs are allowed to upload to. ' + 'Specify it as **"schemas_allowed": ["public", "csv_upload"]**. ' + 'If database flavor does not support schema or any schema is allowed ' + 'to be accessed, just leave the list empty', True), 'impersonate_user': _( 'If Presto, all the queries in SQL Lab are going to be executed as the ' 'currently logged on user who must have permission to run them.
' @@ -227,6 +232,8 @@ class DatabaseView(SupersetModelView, DeleteMixin, YamlExportMixin): # noqa 'Duration (in seconds) of the caching timeout for this database. ' 'A timeout of 0 indicates that the cache never expires. ' 'Note this defaults to the global timeout if undefined.'), + 'allow_csv_upload': _( + 'If selected, please set the schemas allowed for csv upload in Extra.'), } label_columns = { 'expose_in_sqllab': _('Expose in SQL Lab'), @@ -304,6 +311,7 @@ class DatabaseAsync(DatabaseView): class CsvToDatabaseView(SimpleFormView): form = CsvToDatabaseForm + form_template = 'superset/form_view/csv_to_database_view/edit.html' form_title = _('CSV to Database configuration') add_columns = ['database', 'schema', 'table_name'] @@ -315,9 +323,19 @@ def form_get(self, form): form.skip_blank_lines.data = True form.infer_datetime_format.data = True form.decimal.data = '.' - form.if_exists.data = 'append' + form.if_exists.data = 'fail' def form_post(self, form): + database = form.con.data + schema_name = form.schema.data or '' + + if not self.is_schema_allowed(database, schema_name): + message = _('Database "{0}" Schema "{1}" is not allowed for csv uploads. ' + 'Please contact Superset Admin'.format(database.database_name, + schema_name)) + flash(message, 'danger') + return redirect('/csvtodatabaseview/form') + csv_file = form.csv_file.data form.csv_file.data.filename = secure_filename(form.csv_file.data.filename) csv_filename = form.csv_file.data.filename @@ -351,6 +369,15 @@ def form_post(self, form): flash(message, 'info') return redirect('/tablemodelview/list/') + def is_schema_allowed(self, database, schema): + if not database.allow_csv_upload: + return False + schemas = database.get_schema_access_for_csv_upload() + if schemas: + return schema in schemas + return (security_manager.database_access(database) or + security_manager.all_datasource_access()) + appbuilder.add_view_no_menu(CsvToDatabaseView) @@ -2762,6 +2789,44 @@ def slice_query(self, slice_id): link=security_manager.get_datasource_access_link(viz_obj.datasource)) return self.get_query_string_response(viz_obj) + @api + @has_access_api + @expose('/schema_access_for_csv_upload') + def schemas_access_for_csv_upload(self): + """ + This method exposes an API endpoint to + get the schema access control settings for csv upload in this database + """ + if not request.args.get('db_id'): + return json_error_response( + 'No database is allowed for your csv upload') + + db_id = int(request.args.get('db_id')) + database = ( + db.session + .query(models.Database) + .filter_by(id=db_id) + .one() + ) + try: + schemas_allowed = database.get_schema_access_for_csv_upload() + if (security_manager.database_access(database) or + security_manager.all_datasource_access()): + return self.json_response(schemas_allowed) + # the list schemas_allowed should not be empty here + # and the list schemas_allowed_processed returned from security_manager + # should not be empty either, + # otherwise the database should have been filtered out + # in CsvToDatabaseForm + schemas_allowed_processed = security_manager.schemas_accessible_by_user( + database, schemas_allowed, False) + return self.json_response(schemas_allowed_processed) + except Exception: + return json_error_response(( + 'Failed to fetch schemas allowed for csv upload in this database! ' + 'Please contact Superset Admin!\n\n' + 'The error message returned was:\n{}').format(traceback.format_exc())) + appbuilder.add_view_no_menu(Superset) diff --git a/tests/base_tests.py b/tests/base_tests.py index 269a7cb80509a..61e35784f3dd9 100644 --- a/tests/base_tests.py +++ b/tests/base_tests.py @@ -121,10 +121,13 @@ def get_table(self, table_id): .one() ) - def get_or_create(self, cls, criteria, session): + def get_or_create(self, cls, criteria, session, **kwargs): obj = session.query(cls).filter_by(**criteria).first() if not obj: obj = cls(**criteria) + obj.__dict__.update(**kwargs) + session.add(obj) + session.commit() return obj def login(self, username='admin', password='general'): diff --git a/tests/core_tests.py b/tests/core_tests.py index f03c51f2b392c..e3305496e3d28 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -17,6 +17,7 @@ import string import unittest +import mock import pandas as pd import psycopg2 from six import text_type @@ -696,6 +697,35 @@ def test_slice_payload_viz_markdown(self): self.assertEqual(data['status'], None) self.assertEqual(data['error'], None) + @mock.patch('superset.security.SupersetSecurityManager.schemas_accessible_by_user') + @mock.patch('superset.security.SupersetSecurityManager.database_access') + @mock.patch('superset.security.SupersetSecurityManager.all_datasource_access') + def test_schemas_access_for_csv_upload_endpoint(self, + mock_all_datasource_access, + mock_database_access, + mock_schemas_accessible): + mock_all_datasource_access.return_value = False + mock_database_access.return_value = False + mock_schemas_accessible.return_value = ['this_schema_is_allowed_too'] + database_name = 'fake_db_100' + db_id = 100 + extra = """{ + "schemas_allowed_for_csv_upload": + ["this_schema_is_allowed", "this_schema_is_allowed_too"] + }""" + + self.login(username='admin') + dbobj = self.get_or_create( + cls=models.Database, + criteria={'database_name': database_name}, + session=db.session, + id=db_id, + extra=extra) + data = self.get_json_resp( + url='/superset/schema_access_for_csv_upload?db_id={db_id}' + .format(db_id=dbobj.id)) + assert data == ['this_schema_is_allowed_too'] + if __name__ == '__main__': unittest.main()