Skip to content

Commit

Permalink
common: gremlin support tweaks
Browse files Browse the repository at this point in the history
add optional key parameter to column, table. rename Statistics -> Stat.
add fixtures, streaming, utils to common.
Update mypy to 761 to fix false errors.
Fix flake8 complaints about extra line in __init__.py files

Signed-off-by: Joshua Hoskins <hoskins@squareup.com>
  • Loading branch information
friendtocephalopods committed Sep 15, 2020
1 parent abaf553 commit 86a0704
Show file tree
Hide file tree
Showing 20 changed files with 964 additions and 19 deletions.
1 change: 0 additions & 1 deletion amundsen_common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

224 changes: 224 additions & 0 deletions amundsen_common/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import string
from typing import Any, List, Optional

from amundsen_common.models.table import (Application, Column,
ProgrammaticDescription, Stat, Table,
Tag)
from amundsen_common.models.user import User


class Fixtures:
counter = 1000

@staticmethod
def next_int() -> int:
i = Fixtures.counter
Fixtures.counter += 1
return i

@staticmethod
def next_string(*, prefix: str = '', length: int = 10) -> str:
astr: str = prefix + \
''.join(Fixtures.next_item(items=list(string.ascii_lowercase)) for _ in range(length)) + \
('%06d' % Fixtures.next_int())
return astr

@staticmethod
def next_range() -> range:
return range(0, Fixtures.next_int() % 5)

@staticmethod
def next_item(*, items: List[Any]) -> Any:
return items[Fixtures.next_int() % len(items)]

@staticmethod
def next_database() -> str:
return Fixtures.next_item(items=list(["database1", "database2"]))

@staticmethod
def next_application(*, application_id: Optional[str] = None) -> Application:
if not application_id:
application_id = Fixtures.next_string(prefix='ap', length=8)
application = Application(application_url=f'https://{application_id}.example.com',
description=f'{application_id} description',
name=application_id.capitalize(),
id=application_id)
return application

@staticmethod
def next_tag(*, tag_name: Optional[str] = None) -> Tag:
if not tag_name:
tag_name = Fixtures.next_string(prefix='ta', length=8)
return Tag(tag_name=tag_name, tag_type='default')

@staticmethod
def next_tags() -> List[Tag]:
return sorted([Fixtures.next_tag() for _ in Fixtures.next_range()])

@staticmethod
def next_description_source() -> str:
return Fixtures.next_string(prefix='de', length=8)

@staticmethod
def next_description(*, text: Optional[str] = None, source: Optional[str] = None) -> ProgrammaticDescription:
if not text:
text = Fixtures.next_string(length=20)
if not source:
source = Fixtures.next_description_source()
return ProgrammaticDescription(text=text, source=source)

@staticmethod
def next_col_type() -> str:
return Fixtures.next_item(items=['varchar', 'int', 'blob', 'timestamp', 'datetime'])

@staticmethod
def next_column(*,
table_key: str,
sort_order: int,
name: Optional[str] = None) -> Column:
if not name:
name = Fixtures.next_string(prefix='co', length=8)

return Column(name=name,
description=f'{name} description',
col_type=Fixtures.next_col_type(),
key=f'{table_key}/{name}',
sort_order=sort_order,
stats=[Stat(stat_type='num_rows',
stat_val=f'{Fixtures.next_int() * 100}',
start_epoch=None,
end_epoch=None)])

@staticmethod
def next_columns(*,
table_key: str,
randomize_pii: bool = False,
randomize_data_subject: bool = False) -> List[Column]:
return [Fixtures.next_column(table_key=table_key,
sort_order=i
) for i in Fixtures.next_range()]

@staticmethod
def next_descriptions() -> List[ProgrammaticDescription]:
return sorted([Fixtures.next_description() for _ in Fixtures.next_range()])

@staticmethod
def next_table(table: Optional[str] = None,
cluster: Optional[str] = None,
schema: Optional[str] = None,
database: Optional[str] = None,
tags: Optional[List[Tag]] = None,
application: Optional[Application] = None) -> Table:
"""
Returns a table for testing in the test_database
"""
if not database:
database = Fixtures.next_database()

if not table:
table = Fixtures.next_string(prefix='tb', length=8)

if not cluster:
cluster = Fixtures.next_string(prefix='cl', length=8)

if not schema:
schema = Fixtures.next_string(prefix='sc', length=8)

if not tags:
tags = Fixtures.next_tags()

table_key: str = f'{database}://{cluster}.{schema}/{table}'
# TODO: add owners, watermarks, last_udpated_timestamp, source
return Table(database=database,
cluster=cluster,
schema=schema,
name=table,
key=table_key,
tags=tags,
table_writer=application,
table_readers=[],
description=f'{table} description',
programmatic_descriptions=Fixtures.next_descriptions(),
columns=Fixtures.next_columns(table_key=table_key),
is_view=False
)

@staticmethod
def next_user(*, user_id: Optional[str] = None, is_active: bool = True) -> User:
last_name = ''.join(Fixtures.next_item(items=list(string.ascii_lowercase)) for _ in range(6)).capitalize()
first_name = Fixtures.next_item(items=['alice', 'bob', 'carol', 'dan']).capitalize()
if not user_id:
user_id = Fixtures.next_string(prefix='us', length=8)
return User(user_id=user_id,
email=f'{user_id}@example.com',
is_active=is_active,
first_name=first_name,
last_name=last_name,
full_name=f'{first_name} {last_name}')


def next_application(**kwargs: Any) -> Application:
return Fixtures.next_application(**kwargs)


def next_int() -> int:
return Fixtures.next_int()


def next_string(**kwargs: Any) -> str:
return Fixtures.next_string(**kwargs)


def next_range() -> range:
return Fixtures.next_range()


def next_item(**kwargs: Any) -> Any:
return Fixtures.next_item(**kwargs)


def next_database() -> str:
return Fixtures.next_database()


def next_tag(**kwargs: Any) -> Tag:
return Fixtures.next_tag(**kwargs)


def next_tags() -> List[Tag]:
return Fixtures.next_tags()


def next_description_source() -> str:
return Fixtures.next_description_source()


def next_description(**kwargs: Any) -> ProgrammaticDescription:
return Fixtures.next_description(**kwargs)


def next_col_type() -> str:
return Fixtures.next_col_type()


def next_column(**kwargs: Any) -> Column:
return Fixtures.next_column(**kwargs)


def next_columns(**kwargs: Any) -> List[Column]:
return Fixtures.next_columns(**kwargs)


def next_descriptions() -> List[ProgrammaticDescription]:
return Fixtures.next_descriptions()


def next_table(**kwargs: Any) -> Table:
return Fixtures.next_table(**kwargs)


def next_user(**kwargs: Any) -> User:
return Fixtures.next_user(**kwargs)
1 change: 0 additions & 1 deletion amundsen_common/log/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion amundsen_common/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion amundsen_common/models/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,3 @@ class DashboardSummarySchema(AttrsSchema):
class Meta:
target = DashboardSummary
register_as_scheme = True

9 changes: 9 additions & 0 deletions amundsen_common/models/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

from enum import Enum


class NodeLabel(Enum):
COLUMN = 1
TABLE = 2
11 changes: 7 additions & 4 deletions amundsen_common/models/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,26 +61,27 @@ class Meta:


@attr.s(auto_attribs=True, kw_only=True)
class Statistics:
class Stat:
stat_type: str
stat_val: Optional[str] = None
start_epoch: Optional[int] = None
end_epoch: Optional[int] = None


class StatisticsSchema(AttrsSchema):
class StatSchema(AttrsSchema):
class Meta:
target = Statistics
target = Stat
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class Column:
name: str
key: Optional[str] = None
description: Optional[str] = None
col_type: str
sort_order: int
stats: List[Statistics] = []
stats: List[Stat] = []


class ColumnSchema(AttrsSchema):
Expand Down Expand Up @@ -115,6 +116,7 @@ class Meta:
target = Source
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class ResourceReport:
name: str
Expand Down Expand Up @@ -151,6 +153,7 @@ class Table:
cluster: str
schema: str
name: str
key: Optional[str] = None
tags: List[Tag] = []
badges: List[Badge] = []
table_readers: List[Reader] = []
Expand Down
10 changes: 5 additions & 5 deletions amundsen_common/models/user.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

from typing import Optional, Dict
from typing import Any, Optional, Dict

import attr
from marshmallow import ValidationError, validates_schema, pre_load
Expand Down Expand Up @@ -38,7 +38,7 @@ class User:
manager_id: Optional[str] = None
role_name: Optional[str] = None
profile_url: Optional[str] = None
other_key_values: Optional[Dict[str, str]] = attr.ib(factory=dict)
other_key_values: Optional[Dict[str, str]] = attr.ib(factory=dict) # type: ignore
# TODO: Add frequent_used, bookmarked, & owned resources


Expand All @@ -57,14 +57,14 @@ def _str_no_value(self, s: Optional[str]) -> bool:
return False

@pre_load
def preprocess_data(self, data: Dict) -> Dict:
def preprocess_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
if self._str_no_value(data.get('user_id')):
data['user_id'] = data.get('email')

if self._str_no_value(data.get('profile_url')):
data['profile_url'] = ''
if data.get('GET_PROFILE_URL'):
data['profile_url'] = data.get('GET_PROFILE_URL')(data['user_id'])
data['profile_url'] = data.get('GET_PROFILE_URL')(data['user_id']) # type: ignore

first_name = data.get('first_name')
last_name = data.get('last_name')
Expand All @@ -81,7 +81,7 @@ def preprocess_data(self, data: Dict) -> Dict:
return data

@validates_schema
def validate_user(self, data: Dict) -> None:
def validate_user(self, data: Dict[str, Any]) -> None:
if self._str_no_value(data.get('display_name')):
raise ValidationError('"display_name", "full_name", or "email" must be provided')

Expand Down
2 changes: 2 additions & 0 deletions amundsen_common/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
Loading

0 comments on commit 86a0704

Please sign in to comment.