From 1a2733bbe667eac3cb58429e6318eead52903a95 Mon Sep 17 00:00:00 2001 From: friendtocephalopods <52580251+friendtocephalopods@users.noreply.github.com> Date: Thu, 17 Sep 2020 14:17:08 -0700 Subject: [PATCH] feat: tweaks for gremlin support (#60) * common: gremlin support tweaks add optional key parameter to column, table. rename Statistics -> Stat. add fixtures, streaming, utils to common. Update mypy to 761 to fix false errors. Fix flake8 complaints about extra line in __init__.py files Signed-off-by: Joshua Hoskins * Feedback 1 Remove streams Move fixtures to amundsen_common/tests Stat as additional class, deprecate Statistics Signed-off-by: Joshua Hoskins * feedback 2 remove utils, make mypy ignore missing imports module-specific Signed-off-by: Joshua Hoskins --- amundsen_common/__init__.py | 1 - amundsen_common/log/__init__.py | 1 - amundsen_common/models/__init__.py | 1 - amundsen_common/models/dashboard.py | 1 - amundsen_common/models/table.py | 25 ++- amundsen_common/models/user.py | 10 +- amundsen_common/tests/__init__.py | 2 + amundsen_common/tests/fixtures.py | 227 ++++++++++++++++++++++++++++ requirements.txt | 2 +- setup.cfg | 9 ++ setup.py | 4 +- tests/__init__.py | 1 - tests/tests/__init__.py | 2 + tests/tests/test_fixtures.py | 106 +++++++++++++ tests/unit/__init__.py | 1 - tests/unit/log/__init__.py | 1 - 16 files changed, 378 insertions(+), 16 deletions(-) create mode 100644 amundsen_common/tests/__init__.py create mode 100644 amundsen_common/tests/fixtures.py create mode 100644 tests/tests/__init__.py create mode 100644 tests/tests/test_fixtures.py diff --git a/amundsen_common/__init__.py b/amundsen_common/__init__.py index d66c0ef..f3145d7 100644 --- a/amundsen_common/__init__.py +++ b/amundsen_common/__init__.py @@ -1,3 +1,2 @@ # Copyright Contributors to the Amundsen project. # SPDX-License-Identifier: Apache-2.0 - diff --git a/amundsen_common/log/__init__.py b/amundsen_common/log/__init__.py index d66c0ef..f3145d7 100644 --- a/amundsen_common/log/__init__.py +++ b/amundsen_common/log/__init__.py @@ -1,3 +1,2 @@ # Copyright Contributors to the Amundsen project. # SPDX-License-Identifier: Apache-2.0 - diff --git a/amundsen_common/models/__init__.py b/amundsen_common/models/__init__.py index d66c0ef..f3145d7 100644 --- a/amundsen_common/models/__init__.py +++ b/amundsen_common/models/__init__.py @@ -1,3 +1,2 @@ # Copyright Contributors to the Amundsen project. # SPDX-License-Identifier: Apache-2.0 - diff --git a/amundsen_common/models/dashboard.py b/amundsen_common/models/dashboard.py index c0d7681..a3c94c7 100644 --- a/amundsen_common/models/dashboard.py +++ b/amundsen_common/models/dashboard.py @@ -24,4 +24,3 @@ class DashboardSummarySchema(AttrsSchema): class Meta: target = DashboardSummary register_as_scheme = True - diff --git a/amundsen_common/models/table.py b/amundsen_common/models/table.py index 1493413..bd535b0 100644 --- a/amundsen_common/models/table.py +++ b/amundsen_common/models/table.py @@ -62,6 +62,9 @@ class Meta: @attr.s(auto_attribs=True, kw_only=True) class Statistics: + """ + DEPRECATED. Use Stat + """ stat_type: str stat_val: Optional[str] = None start_epoch: Optional[int] = None @@ -69,18 +72,36 @@ class Statistics: class StatisticsSchema(AttrsSchema): + """ + DEPRECATED. Use StatSchema + """ class Meta: target = Statistics register_as_scheme = True +@attr.s(auto_attribs=True, kw_only=True) +class Stat: + stat_type: str + stat_val: Optional[str] = None + start_epoch: Optional[int] = None + end_epoch: Optional[int] = None + + +class StatSchema(AttrsSchema): + class Meta: + target = Stat + register_as_scheme = True + + @attr.s(auto_attribs=True, kw_only=True) class Column: name: str + key: Optional[str] = None description: Optional[str] = None col_type: str sort_order: int - stats: List[Statistics] = [] + stats: List[Stat] = [] class ColumnSchema(AttrsSchema): @@ -115,6 +136,7 @@ class Meta: target = Source register_as_scheme = True + @attr.s(auto_attribs=True, kw_only=True) class ResourceReport: name: str @@ -151,6 +173,7 @@ class Table: cluster: str schema: str name: str + key: Optional[str] = None tags: List[Tag] = [] badges: List[Badge] = [] table_readers: List[Reader] = [] diff --git a/amundsen_common/models/user.py b/amundsen_common/models/user.py index 55ff2a7..09b924d 100644 --- a/amundsen_common/models/user.py +++ b/amundsen_common/models/user.py @@ -1,7 +1,7 @@ # Copyright Contributors to the Amundsen project. # SPDX-License-Identifier: Apache-2.0 -from typing import Optional, Dict +from typing import Any, Optional, Dict import attr from marshmallow import ValidationError, validates_schema, pre_load @@ -38,7 +38,7 @@ class User: manager_id: Optional[str] = None role_name: Optional[str] = None profile_url: Optional[str] = None - other_key_values: Optional[Dict[str, str]] = attr.ib(factory=dict) + other_key_values: Optional[Dict[str, str]] = attr.ib(factory=dict) # type: ignore # TODO: Add frequent_used, bookmarked, & owned resources @@ -57,14 +57,14 @@ def _str_no_value(self, s: Optional[str]) -> bool: return False @pre_load - def preprocess_data(self, data: Dict) -> Dict: + def preprocess_data(self, data: Dict[str, Any]) -> Dict[str, Any]: if self._str_no_value(data.get('user_id')): data['user_id'] = data.get('email') if self._str_no_value(data.get('profile_url')): data['profile_url'] = '' if data.get('GET_PROFILE_URL'): - data['profile_url'] = data.get('GET_PROFILE_URL')(data['user_id']) + data['profile_url'] = data.get('GET_PROFILE_URL')(data['user_id']) # type: ignore first_name = data.get('first_name') last_name = data.get('last_name') @@ -81,7 +81,7 @@ def preprocess_data(self, data: Dict) -> Dict: return data @validates_schema - def validate_user(self, data: Dict) -> None: + def validate_user(self, data: Dict[str, Any]) -> None: if self._str_no_value(data.get('display_name')): raise ValidationError('"display_name", "full_name", or "email" must be provided') diff --git a/amundsen_common/tests/__init__.py b/amundsen_common/tests/__init__.py new file mode 100644 index 0000000..f3145d7 --- /dev/null +++ b/amundsen_common/tests/__init__.py @@ -0,0 +1,2 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 diff --git a/amundsen_common/tests/fixtures.py b/amundsen_common/tests/fixtures.py new file mode 100644 index 0000000..4fbd040 --- /dev/null +++ b/amundsen_common/tests/fixtures.py @@ -0,0 +1,227 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import string +from typing import Any, List, Optional + +from amundsen_common.models.table import (Application, Column, + ProgrammaticDescription, Stat, Table, + Tag) +from amundsen_common.models.user import User + + +class Fixtures: + """ + These fixtures are useful for creating test objects. For an example usage, check out tests/tests/test_fixtures.py + """ + counter = 1000 + + @staticmethod + def next_int() -> int: + i = Fixtures.counter + Fixtures.counter += 1 + return i + + @staticmethod + def next_string(*, prefix: str = '', length: int = 10) -> str: + astr: str = prefix + \ + ''.join(Fixtures.next_item(items=list(string.ascii_lowercase)) for _ in range(length)) + \ + ('%06d' % Fixtures.next_int()) + return astr + + @staticmethod + def next_range() -> range: + return range(0, Fixtures.next_int() % 5) + + @staticmethod + def next_item(*, items: List[Any]) -> Any: + return items[Fixtures.next_int() % len(items)] + + @staticmethod + def next_database() -> str: + return Fixtures.next_item(items=list(["database1", "database2"])) + + @staticmethod + def next_application(*, application_id: Optional[str] = None) -> Application: + if not application_id: + application_id = Fixtures.next_string(prefix='ap', length=8) + application = Application(application_url=f'https://{application_id}.example.com', + description=f'{application_id} description', + name=application_id.capitalize(), + id=application_id) + return application + + @staticmethod + def next_tag(*, tag_name: Optional[str] = None) -> Tag: + if not tag_name: + tag_name = Fixtures.next_string(prefix='ta', length=8) + return Tag(tag_name=tag_name, tag_type='default') + + @staticmethod + def next_tags() -> List[Tag]: + return sorted([Fixtures.next_tag() for _ in Fixtures.next_range()]) + + @staticmethod + def next_description_source() -> str: + return Fixtures.next_string(prefix='de', length=8) + + @staticmethod + def next_description(*, text: Optional[str] = None, source: Optional[str] = None) -> ProgrammaticDescription: + if not text: + text = Fixtures.next_string(length=20) + if not source: + source = Fixtures.next_description_source() + return ProgrammaticDescription(text=text, source=source) + + @staticmethod + def next_col_type() -> str: + return Fixtures.next_item(items=['varchar', 'int', 'blob', 'timestamp', 'datetime']) + + @staticmethod + def next_column(*, + table_key: str, + sort_order: int, + name: Optional[str] = None) -> Column: + if not name: + name = Fixtures.next_string(prefix='co', length=8) + + return Column(name=name, + description=f'{name} description', + col_type=Fixtures.next_col_type(), + key=f'{table_key}/{name}', + sort_order=sort_order, + stats=[Stat(stat_type='num_rows', + stat_val=f'{Fixtures.next_int() * 100}', + start_epoch=None, + end_epoch=None)]) + + @staticmethod + def next_columns(*, + table_key: str, + randomize_pii: bool = False, + randomize_data_subject: bool = False) -> List[Column]: + return [Fixtures.next_column(table_key=table_key, + sort_order=i + ) for i in Fixtures.next_range()] + + @staticmethod + def next_descriptions() -> List[ProgrammaticDescription]: + return sorted([Fixtures.next_description() for _ in Fixtures.next_range()]) + + @staticmethod + def next_table(table: Optional[str] = None, + cluster: Optional[str] = None, + schema: Optional[str] = None, + database: Optional[str] = None, + tags: Optional[List[Tag]] = None, + application: Optional[Application] = None) -> Table: + """ + Returns a table for testing in the test_database + """ + if not database: + database = Fixtures.next_database() + + if not table: + table = Fixtures.next_string(prefix='tb', length=8) + + if not cluster: + cluster = Fixtures.next_string(prefix='cl', length=8) + + if not schema: + schema = Fixtures.next_string(prefix='sc', length=8) + + if not tags: + tags = Fixtures.next_tags() + + table_key: str = f'{database}://{cluster}.{schema}/{table}' + # TODO: add owners, watermarks, last_udpated_timestamp, source + return Table(database=database, + cluster=cluster, + schema=schema, + name=table, + key=table_key, + tags=tags, + table_writer=application, + table_readers=[], + description=f'{table} description', + programmatic_descriptions=Fixtures.next_descriptions(), + columns=Fixtures.next_columns(table_key=table_key), + is_view=False + ) + + @staticmethod + def next_user(*, user_id: Optional[str] = None, is_active: bool = True) -> User: + last_name = ''.join(Fixtures.next_item(items=list(string.ascii_lowercase)) for _ in range(6)).capitalize() + first_name = Fixtures.next_item(items=['alice', 'bob', 'carol', 'dan']).capitalize() + if not user_id: + user_id = Fixtures.next_string(prefix='us', length=8) + return User(user_id=user_id, + email=f'{user_id}@example.com', + is_active=is_active, + first_name=first_name, + last_name=last_name, + full_name=f'{first_name} {last_name}') + + +def next_application(**kwargs: Any) -> Application: + return Fixtures.next_application(**kwargs) + + +def next_int() -> int: + return Fixtures.next_int() + + +def next_string(**kwargs: Any) -> str: + return Fixtures.next_string(**kwargs) + + +def next_range() -> range: + return Fixtures.next_range() + + +def next_item(**kwargs: Any) -> Any: + return Fixtures.next_item(**kwargs) + + +def next_database() -> str: + return Fixtures.next_database() + + +def next_tag(**kwargs: Any) -> Tag: + return Fixtures.next_tag(**kwargs) + + +def next_tags() -> List[Tag]: + return Fixtures.next_tags() + + +def next_description_source() -> str: + return Fixtures.next_description_source() + + +def next_description(**kwargs: Any) -> ProgrammaticDescription: + return Fixtures.next_description(**kwargs) + + +def next_col_type() -> str: + return Fixtures.next_col_type() + + +def next_column(**kwargs: Any) -> Column: + return Fixtures.next_column(**kwargs) + + +def next_columns(**kwargs: Any) -> List[Column]: + return Fixtures.next_columns(**kwargs) + + +def next_descriptions() -> List[ProgrammaticDescription]: + return Fixtures.next_descriptions() + + +def next_table(**kwargs: Any) -> Table: + return Fixtures.next_table(**kwargs) + + +def next_user(**kwargs: Any) -> User: + return Fixtures.next_user(**kwargs) diff --git a/requirements.txt b/requirements.txt index e480d4f..5312baf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ flake8==3.7.8 Flask==1.1.1 marshmallow==2.15.3 marshmallow-annotations==2.4.0 -mypy==0.720 +mypy==0.761 pytest>=4.6 pytest-cov pytest-mock diff --git a/setup.cfg b/setup.cfg index f4db814..e47822b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,5 +37,14 @@ disallow_incomplete_defs = true disallow_untyped_defs = true no_implicit_optional = true +[mypy-marshmallow.*] +ignore_missing_imports = true + +[mypy-marshmallow_annotations.*] +ignore_missing_imports = true + +[mypy-setuptools.*] +ignore_missing_imports = true + [mypy-tests.*] disallow_untyped_defs = false diff --git a/setup.py b/setup.py index 6402f36..fcf17de 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,11 @@ # Copyright Contributors to the Amundsen project. # SPDX-License-Identifier: Apache-2.0 -from setuptools import setup, find_packages +from setuptools import find_packages, setup setup( name='amundsen-common', - version='0.5.0', + version='0.5.1', description='Common code library for Amundsen', long_description=open('README.md').read(), long_description_content_type='text/markdown', diff --git a/tests/__init__.py b/tests/__init__.py index d66c0ef..f3145d7 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,3 +1,2 @@ # Copyright Contributors to the Amundsen project. # SPDX-License-Identifier: Apache-2.0 - diff --git a/tests/tests/__init__.py b/tests/tests/__init__.py new file mode 100644 index 0000000..f3145d7 --- /dev/null +++ b/tests/tests/__init__.py @@ -0,0 +1,2 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/tests/test_fixtures.py b/tests/tests/test_fixtures.py new file mode 100644 index 0000000..a35c6c6 --- /dev/null +++ b/tests/tests/test_fixtures.py @@ -0,0 +1,106 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import unittest + +from amundsen_common.tests.fixtures import (next_application, next_col_type, + next_columns, next_database, + next_description, + next_description_source, + next_descriptions, next_int, next_item, + next_range, next_string, next_table, + next_tag, next_tags, next_user) +from amundsen_common.models.table import Column, ProgrammaticDescription, Stat + + +class TestFixtures(unittest.TestCase): + # tests are numbered to ensure they execute in order + def test_00_next_int(self) -> None: + self.assertEqual(1000, next_int()) + + def test_01_next_string(self) -> None: + self.assertEqual('nopqrstuvw001011', next_string()) + + def test_02_next_string(self) -> None: + self.assertEqual('foo_yzabcdefgh001022', next_string(prefix='foo_')) + + def test_03_next_string(self) -> None: + self.assertEqual('jklm001027', next_string(length=4)) + + def test_04_next_string(self) -> None: + self.assertEqual('bar_opqr001032', next_string(prefix='bar_', length=4)) + + def test_05_next_range(self) -> None: + self.assertEqual(3, len(next_range())) + + def test_06_next_item(self) -> None: + self.assertEqual('c', next_item(items=['a', 'b', 'c'])) + + def test_07_next_database(self) -> None: + self.assertEqual('database2', next_database()) + + def test_08_next_application(self) -> None: + app = next_application() + self.assertEqual('Apwxyzabcd001044', app.name) + self.assertEqual('apwxyzabcd001044', app.id) + self.assertEqual('https://apwxyzabcd001044.example.com', app.application_url) + + def test_09_next_application(self) -> None: + app = next_application(application_id='foo') + self.assertEqual('Foo', app.name) + self.assertEqual('foo', app.id) + self.assertEqual('https://foo.example.com', app.application_url) + + def test_10_next_tag(self) -> None: + tag = next_tag() + self.assertEqual('tafghijklm001053', tag.tag_name) + self.assertEqual('default', tag.tag_type) + + def test_11_next_tags(self) -> None: + tags = next_tags() + self.assertEqual(4, len(tags)) + self.assertEqual(['tahijklmno001081', + 'tapqrstuvw001063', + 'taqrstuvwx001090', + 'tayzabcdef001072'], [tag.tag_name for tag in tags]) + + def test_12_next_description_source(self) -> None: + self.assertEqual('dezabcdefg001099', next_description_source()) + + def test_13_next_description(self) -> None: + self.assertEqual(ProgrammaticDescription(text='ijklmnopqrstuvwxyzab001120', source='dedefghijk001129'), + next_description()) + + def test_14_next_col_type(self) -> None: + self.assertEqual('varchar', next_col_type()) + + def test_15_just_execute_next_columns(self) -> None: + columns = next_columns(table_key='not_important') + self.assertEqual(1, len(columns)) + self.assertEqual([Column(name='coopqrstuv001140', key='not_important/coopqrstuv001140', + description='coopqrstuv001140 description', col_type='int', + sort_order=0, stats=[Stat(stat_type='num_rows', stat_val='114200', + start_epoch=None, end_epoch=None)]) + ], columns) + + def test_16_just_execute_next_descriptions(self) -> None: + descs = next_descriptions() + self.assertEqual(3, len(descs)) + self.assertEqual([ + ProgrammaticDescription(source='dedefghijk001233', text='ijklmnopqrstuvwxyzab001224'), + ProgrammaticDescription(source='devwxyzabc001173', text='abcdefghijklmnopqrst001164'), + ProgrammaticDescription(source='dezabcdefg001203', text='efghijklmnopqrstuvwx001194')], descs) + + def test_17_just_execute_next_table(self) -> None: + table = next_table() + self.assertEqual(2, len(table.columns)) + self.assertEqual('tbnopqrstu001243', table.name) + self.assertEqual('database1://clwxyzabcd001252.scfghijklm001261/tbnopqrstu001243', table.key) + + def test_18_next_user(self) -> None: + user = next_user() + self.assertEqual('Jklmno', user.last_name) + self.assertEqual('Bob', user.first_name) + self.assertEqual('usqrstuvwx001350', user.user_id) + self.assertEqual('usqrstuvwx001350@example.com', user.email) + self.assertEqual(True, user.is_active) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index d66c0ef..f3145d7 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1,3 +1,2 @@ # Copyright Contributors to the Amundsen project. # SPDX-License-Identifier: Apache-2.0 - diff --git a/tests/unit/log/__init__.py b/tests/unit/log/__init__.py index d66c0ef..f3145d7 100644 --- a/tests/unit/log/__init__.py +++ b/tests/unit/log/__init__.py @@ -1,3 +1,2 @@ # Copyright Contributors to the Amundsen project. # SPDX-License-Identifier: Apache-2.0 -