Added verification to ensure objects are not referenced multiple time…

…s within a spec
Squarespace · Apr 12, 2018 · 958d08b · 958d08b
1 parent 725f3c8
commit 958d08b
Show file tree

Hide file tree

Showing 2 changed files with 210 additions and 23 deletions.
diff --git a/pgbedrock/core_configure.py b/pgbedrock/core_configure.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 import getpass
 import logging
 import os
@@ -23,6 +24,11 @@
 FILE_OPEN_ERROR_MSG = "Unable to open file '{}':\n{}"
 MISSING_ENVVAR_MSG = "Required environment variable not found:\n{}"
 VALIDATION_ERR_MSG = 'Spec error: role "{}", field "{}": {}'
+MULTIPLE_SCHEMA_OWNER_ERR_MSG = 'Spec error: schema "{}" owned by more than one role: {}'
+DUPLICATE_ROLE_DEFINITIONS_ERR_MSG = 'Spec error: role(s) defined more than once: {}'
+OBJECT_REF_READ_WRITE_ERR = ('Spec error: objects have been unnecessarily given both read and write privileges.'
+                             'pgbedrock automatically grants read access when write access is requested.\n\t{}'
+                             )
 SUCCESS_MSG = "\nNo changes needed. Congratulations! :)"
 
 SPEC_SCHEMA_YAML = """
@@ -89,19 +95,20 @@ def has_changes(statements):
     return False
 
 
-def load_spec(path):
+def render_template(path):
     """ Load a spec. There may be templated password variables, which we render using Jinja. """
     try:
         dir_path, filename = os.path.split(path)
         environment = jinja2.Environment(loader=jinja2.FileSystemLoader(dir_path),
                                          undefined=jinja2.StrictUndefined)
         loaded = environment.get_template(filename)
         rendered = loaded.render(env=os.environ)
-        return yaml.load(rendered)
     except jinja2.exceptions.TemplateNotFound as err:
         common.fail(FILE_OPEN_ERROR_MSG.format(path, err))
     except jinja2.exceptions.UndefinedError as err:
         common.fail(MISSING_ENVVAR_MSG.format(err))
+    else:
+        return rendered
 
 
 def run_module_sql(module_sql, cursor, verbose):
@@ -131,22 +138,114 @@ def run_password_sql(cursor, all_password_sql_to_run):
         common.fail(msg=common.FAILED_QUERY_MSG.format(query, e))
 
 
-def verify_spec(spec):
-    """ Ensure keywords make sense, particularly because it's easy to use singulars
-    instead of plurals (e.g. 'table' instead of 'tables') """
-    assert isinstance(spec, dict)
+def verify_schema(spec):
+    """Checks spec for schema errors."""
+    error_messages = []
 
     schema = yaml.load(SPEC_SCHEMA_YAML)
     v = cerberus.Validator(schema)
-    error_messages = []
     for rolename, config in spec.items():
         if not config:
             continue
-
         v.validate(config)
         for field, err_msg in v.errors.items():
             error_messages.append(VALIDATION_ERR_MSG.format(rolename, field, err_msg[0]))
 
+    return error_messages
+
+
+def check_for_multi_schema_owners(spec):
+    """Checks spec for schema with multiple owners."""
+    error_messages = []
+
+    globally_owned_schemas = defaultdict(list)
+    for role, config in spec.items():
+        if not config:
+            continue
+        if config.get('has_personal_schema'):
+            # indicates a role has a personal schema with its same name
+            globally_owned_schemas[role].append(role)
+        if config.get('owns') and config['owns'].get('schemas'):
+            role_owned_schemas = config['owns']['schemas']
+            for schema in role_owned_schemas:
+                globally_owned_schemas[schema].append(role)
+    for schema, owners in globally_owned_schemas.items():
+        if len(owners) > 1:
+            owners_formatted = ", ".join(owners)
+            error_messages.append(MULTIPLE_SCHEMA_OWNER_ERR_MSG.format(schema, owners_formatted))
+
+    return error_messages
+
+
+def check_read_write_obj_references(spec):
+    """Verifies objects aren't defined in both read and write privileges sections."""
+    error_messages = []
+
+    multi_refs = defaultdict(dict)
+    for role, config in spec.items():
+        if config and config.get('privileges'):
+            for obj in config['privileges']:
+                try:
+                    reads = set(config['privileges'][obj]['read'])
+                    writes = set(config['privileges'][obj]['write'])
+                    duplicates = reads.intersection(writes)
+                    if duplicates:
+                        multi_refs[role][obj] = list(duplicates)
+                except KeyError:
+                    continue
+    if multi_refs:
+        multi_ref_strings = ["%s: %s" % (k, v) for k, v in multi_refs.items()]
+        multi_ref_err_string = "\n\t".join(multi_ref_strings)
+        error_messages.append(OBJECT_REF_READ_WRITE_ERR.format(multi_ref_err_string))
+
+    return error_messages
+
+
+def detect_multiple_role_definitions(rendered_spec_template):
+    """Checks spec for roles declared multiple times.
+
+    In a spec template, if a role is declared more than once there exists a risk that the re-declaration will
+    override the desired configuration. pgbedrock considers a config containing this risk to be invalid and
+    will throw an error.
+
+    To accomplish this, the yaml.loader.Loader object is used to convert spec template
+    into a document tree.  Then, the root object's child nodes (which are the roles) are checked for duplicates.
+
+    Outputs:
+        []string    The decision to return a list of strings was deliberate, despite
+                    the fact that the length of the list can at most be one. The reason for this
+                    is that the other spec verification functions
+                    (check_for_multi_schema_owners and check_read_write_obj_references, verify_schema)
+                    also return a list of strings.  This return signature consistency makes the code
+                    in the verify_spec function cleaner.
+    """
+    error_messages = []
+    loader = yaml.loader.Loader(rendered_spec_template)
+    document_tree = loader.get_single_node()
+    if document_tree is None:
+        return None
+
+    role_definitions = defaultdict(int)
+    for node in document_tree.value:
+        role_definitions[node[0].value] += 1
+    multi_defined_roles = [k for k, v in role_definitions.items() if v > 1]
+    if multi_defined_roles:
+        error_message = " ,".join(multi_defined_roles)
+        error_messages = [DUPLICATE_ROLE_DEFINITIONS_ERR_MSG.format(error_message)]
+
+    return error_messages
+
+
+def verify_spec(rendered_template, spec):
+    assert isinstance(spec, dict)
+
+    error_messages = []
+    error_messages += detect_multiple_role_definitions(rendered_template)
+    verification_functions = (verify_schema,
+                              check_for_multi_schema_owners,
+                              check_read_write_obj_references)
+    for fn in verification_functions:
+        error_messages += fn(spec)
     if error_messages:
         common.fail('\n'.join(error_messages))
 
@@ -203,11 +302,14 @@ def configure(spec, host, port, user, password, dbname, prompt, attributes, memb
     db_connection = common.get_db_connection(host, port, dbname, user, password)
     cursor = db_connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
 
-    spec = load_spec(spec)
+
+    rendered_template = render_template(spec)
+    spec = yaml.load(rendered_template)
+    verify_spec(rendered_template, spec)
+
     sql_to_run = []
     password_changed = False  # Initialize this in case the attributes module isn't run
 
-    verify_spec(spec)
 
     if attributes:
         sql_to_run.append(create_divider('attributes'))

diff --git a/tests/test_core_configure.py b/tests/test_core_configure.py
@@ -22,7 +22,7 @@ def set_envvar(request):
     del os.environ[k]
 
 
-def test_load_spec(tmpdir):
+def test_render_template(tmpdir):
     spec_path = tmpdir.join('spec.yml')
     spec_path.write("""
         fred:
@@ -44,7 +44,8 @@ def test_load_spec(tmpdir):
           schemas:
               - service1_schema
     """)
-    spec = core_configure.load_spec(spec_path.strpath)
+    spec = core_configure.render_template(spec_path.strpath)
+    spec = yaml.load(spec)
 
     assert len(spec) == 4
     assert set(spec.keys()) == {'admin', 'my_group', 'service1', 'fred'}
@@ -59,7 +60,8 @@ def test_load_spec_with_templated_variables(tmpdir, set_envvar):
           options:
             - PASSWORD: "{{ env['FRED_PASSWORD'] }}"
     """)
-    spec = core_configure.load_spec(spec_path.strpath)
+    spec = core_configure.render_template(spec_path.strpath)
+    spec = yaml.load(spec)
 
     password_option = spec['fred']['options'][0]
     assert password_option['PASSWORD'] == 'a_password'
@@ -79,7 +81,7 @@ def test_load_spec_fails_missing_templated_envvars(capsys, tmpdir):
     spec_path.write(spec)
 
     with pytest.raises(SystemExit):
-        core_configure.load_spec(spec_path.strpath)
+        core_configure.render_template(spec_path.strpath)
 
     out, err = capsys.readouterr()
     expected = core_configure.MISSING_ENVVAR_MSG.format('')
@@ -93,7 +95,7 @@ def test_load_spec_fails_file_not_found(capsys):
     path = os.path.join(dirname, filename)
 
     with pytest.raises(SystemExit):
-        core_configure.load_spec(path)
+        core_configure.render_template(path)
 
     out, _ = capsys.readouterr()
     assert core_configure.FILE_OPEN_ERROR_MSG.format(path, '') in out
@@ -108,13 +110,9 @@ def test_verify_spec_fails(capsys):
                 - flub
         """
     spec = yaml.load(spec_yaml)
-
-    with pytest.raises(SystemExit):
-        core_configure.verify_spec(spec)
-
-    out, _ = capsys.readouterr()
-    expected = core_configure.VALIDATION_ERR_MSG.format('fred', 'attribute', 'unknown field\n')
-    assert expected in out
+    errors = core_configure.verify_schema(spec)
+    expected = core_configure.VALIDATION_ERR_MSG.format('fred', 'attribute', 'unknown field')
+    assert expected == errors[0]
 
 
 def test_verify_spec_succeeds(capsys):
@@ -126,7 +124,94 @@ def test_verify_spec_succeeds(capsys):
         mark:
         """
     spec = yaml.load(spec_yaml)
-    core_configure.verify_spec(spec)
+    errors = core_configure.verify_schema(spec)
+    assert len(errors) == 0
+
+
+def test_verify_spec_fails_multiple_roles_own_schema(capsys):
+    spec_yaml = """
+    jfinance:
+        owns:
+            schemas:
+                - finance_documents
+    jfauxnance:
+        owns:
+            schemas:
+                - finance_documents
+    """
+    spec = yaml.load(spec_yaml)
+    errors = core_configure.check_for_multi_schema_owners(spec)
+    expected = core_configure.MULTIPLE_SCHEMA_OWNER_ERR_MSG.format('finance_documents', 'jfinance, jfauxnance')
+    assert [expected] == errors
+
+
+def test_verify_spec_fails_multiple_roles_own_schema_personal_schema(capsys):
+    spec_yaml = """
+    jfinance:
+        has_personal_schema: yes
+        owns:
+            schemas:
+                - finance_documents
+    jfauxnance:
+        owns:
+            schemas:
+                - jfinance
+    """
+    spec = yaml.load(spec_yaml)
+    errors = core_configure.check_for_multi_schema_owners(spec)
+    expected = core_configure.MULTIPLE_SCHEMA_OWNER_ERR_MSG.format('jfinance', 'jfinance, jfauxnance')
+    assert [expected] == errors
+
+
+def test_verify_spec_fails_role_defined_multiple_times(tmpdir, capsys):
+    spec_path = tmpdir.join('spec.yml')
+    spec_path.write("""
+    jfinance:
+        owns:
+            schemas:
+                - finance_documents
+    jfinance:
+        owns:
+            schemas:
+                - even_more_finance_documents
+    patty:
+        owns:
+            schemas:
+                - tupperwear
+    """)
+    rendered_template = core_configure.render_template(spec_path.strpath)
+    errors = core_configure.detect_multiple_role_definitions(rendered_template)
+    expected = core_configure.DUPLICATE_ROLE_DEFINITIONS_ERR_MSG.format('jfinance')
+    assert [expected] == errors
+
+
+def test_verify_spec_fails_object_referenced_read_write(capsys):
+    spec_yaml = """
+    margerie:
+        can_login: true
+        privileges:
+            {}:
+                read:
+                    - big_bad
+                write:
+                    - big_bad
+    danil:
+        can_login: true
+        privileges:
+            sequences:
+                read:
+                    - hoop
+                write:
+                    - grok
+    """
+
+    privilege_types = ('schemas', 'sequences', 'tables')
+    for t in privilege_types:
+        spec = yaml.load(spec_yaml.format(t))
+        errors = core_configure.check_read_write_obj_references(spec)
+        err_string = "margerie: {'%s': ['big_bad']}" % t
+        expected = core_configure.OBJECT_REF_READ_WRITE_ERR.format(err_string)
+        assert [expected] == errors
 
 
 @pytest.mark.parametrize('statements, expected', [