Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: tweaks for gremlin support #60

Merged
merged 3 commits into from
Sep 17, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion amundsen_common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

224 changes: 224 additions & 0 deletions amundsen_common/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import string
from typing import Any, List, Optional

from amundsen_common.models.table import (Application, Column,
ProgrammaticDescription, Stat, Table,
Tag)
from amundsen_common.models.user import User


class Fixtures:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the fixtures only for testing purpose? if so, we should move it under the tests folder.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yes, it's for testing! But it's intended to be useful in multiple repos to create test objects easily. I don't think the tests/ folder gets carried over cleanly when importing in other repos.

Maybe amundsen_common/tests/fixtures.py would be preferable?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, amundsen_common/tests/fixtures.py would be better.

Copy link
Contributor Author

@friendtocephalopods friendtocephalopods Sep 16, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • amundsen_common/tests/fixtures.py

counter = 1000
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be useful to add some documentation somewhere on how to use this Fixtures class (a summary of some kind so that people don't have to read all the code to figure it out)

Copy link
Contributor Author

@friendtocephalopods friendtocephalopods Sep 16, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • document fixtures


@staticmethod
def next_int() -> int:
i = Fixtures.counter
Fixtures.counter += 1
return i

@staticmethod
def next_string(*, prefix: str = '', length: int = 10) -> str:
astr: str = prefix + \
''.join(Fixtures.next_item(items=list(string.ascii_lowercase)) for _ in range(length)) + \
('%06d' % Fixtures.next_int())
return astr

@staticmethod
def next_range() -> range:
return range(0, Fixtures.next_int() % 5)

@staticmethod
def next_item(*, items: List[Any]) -> Any:
return items[Fixtures.next_int() % len(items)]

@staticmethod
def next_database() -> str:
return Fixtures.next_item(items=list(["database1", "database2"]))

@staticmethod
def next_application(*, application_id: Optional[str] = None) -> Application:
if not application_id:
application_id = Fixtures.next_string(prefix='ap', length=8)
application = Application(application_url=f'https://{application_id}.example.com',
description=f'{application_id} description',
name=application_id.capitalize(),
id=application_id)
return application

@staticmethod
def next_tag(*, tag_name: Optional[str] = None) -> Tag:
if not tag_name:
tag_name = Fixtures.next_string(prefix='ta', length=8)
return Tag(tag_name=tag_name, tag_type='default')

@staticmethod
def next_tags() -> List[Tag]:
return sorted([Fixtures.next_tag() for _ in Fixtures.next_range()])

@staticmethod
def next_description_source() -> str:
return Fixtures.next_string(prefix='de', length=8)

@staticmethod
def next_description(*, text: Optional[str] = None, source: Optional[str] = None) -> ProgrammaticDescription:
if not text:
text = Fixtures.next_string(length=20)
if not source:
source = Fixtures.next_description_source()
return ProgrammaticDescription(text=text, source=source)

@staticmethod
def next_col_type() -> str:
return Fixtures.next_item(items=['varchar', 'int', 'blob', 'timestamp', 'datetime'])

@staticmethod
def next_column(*,
table_key: str,
sort_order: int,
name: Optional[str] = None) -> Column:
if not name:
name = Fixtures.next_string(prefix='co', length=8)

return Column(name=name,
description=f'{name} description',
col_type=Fixtures.next_col_type(),
key=f'{table_key}/{name}',
sort_order=sort_order,
stats=[Stat(stat_type='num_rows',
stat_val=f'{Fixtures.next_int() * 100}',
start_epoch=None,
end_epoch=None)])

@staticmethod
def next_columns(*,
table_key: str,
randomize_pii: bool = False,
randomize_data_subject: bool = False) -> List[Column]:
return [Fixtures.next_column(table_key=table_key,
sort_order=i
) for i in Fixtures.next_range()]

@staticmethod
def next_descriptions() -> List[ProgrammaticDescription]:
return sorted([Fixtures.next_description() for _ in Fixtures.next_range()])

@staticmethod
def next_table(table: Optional[str] = None,
cluster: Optional[str] = None,
schema: Optional[str] = None,
database: Optional[str] = None,
tags: Optional[List[Tag]] = None,
application: Optional[Application] = None) -> Table:
"""
Returns a table for testing in the test_database
"""
if not database:
database = Fixtures.next_database()

if not table:
table = Fixtures.next_string(prefix='tb', length=8)

if not cluster:
cluster = Fixtures.next_string(prefix='cl', length=8)

if not schema:
schema = Fixtures.next_string(prefix='sc', length=8)

if not tags:
tags = Fixtures.next_tags()

table_key: str = f'{database}://{cluster}.{schema}/{table}'
# TODO: add owners, watermarks, last_udpated_timestamp, source
return Table(database=database,
cluster=cluster,
schema=schema,
name=table,
key=table_key,
tags=tags,
table_writer=application,
table_readers=[],
description=f'{table} description',
programmatic_descriptions=Fixtures.next_descriptions(),
columns=Fixtures.next_columns(table_key=table_key),
is_view=False
)

@staticmethod
def next_user(*, user_id: Optional[str] = None, is_active: bool = True) -> User:
last_name = ''.join(Fixtures.next_item(items=list(string.ascii_lowercase)) for _ in range(6)).capitalize()
first_name = Fixtures.next_item(items=['alice', 'bob', 'carol', 'dan']).capitalize()
if not user_id:
user_id = Fixtures.next_string(prefix='us', length=8)
return User(user_id=user_id,
email=f'{user_id}@example.com',
is_active=is_active,
first_name=first_name,
last_name=last_name,
full_name=f'{first_name} {last_name}')


def next_application(**kwargs: Any) -> Application:
return Fixtures.next_application(**kwargs)


def next_int() -> int:
return Fixtures.next_int()


def next_string(**kwargs: Any) -> str:
return Fixtures.next_string(**kwargs)


def next_range() -> range:
return Fixtures.next_range()


def next_item(**kwargs: Any) -> Any:
return Fixtures.next_item(**kwargs)


def next_database() -> str:
return Fixtures.next_database()


def next_tag(**kwargs: Any) -> Tag:
return Fixtures.next_tag(**kwargs)


def next_tags() -> List[Tag]:
return Fixtures.next_tags()


def next_description_source() -> str:
return Fixtures.next_description_source()


def next_description(**kwargs: Any) -> ProgrammaticDescription:
return Fixtures.next_description(**kwargs)


def next_col_type() -> str:
return Fixtures.next_col_type()


def next_column(**kwargs: Any) -> Column:
return Fixtures.next_column(**kwargs)


def next_columns(**kwargs: Any) -> List[Column]:
return Fixtures.next_columns(**kwargs)


def next_descriptions() -> List[ProgrammaticDescription]:
return Fixtures.next_descriptions()


def next_table(**kwargs: Any) -> Table:
return Fixtures.next_table(**kwargs)


def next_user(**kwargs: Any) -> User:
return Fixtures.next_user(**kwargs)
1 change: 0 additions & 1 deletion amundsen_common/log/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion amundsen_common/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

1 change: 0 additions & 1 deletion amundsen_common/models/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,3 @@ class DashboardSummarySchema(AttrsSchema):
class Meta:
target = DashboardSummary
register_as_scheme = True

11 changes: 7 additions & 4 deletions amundsen_common/models/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,26 +61,27 @@ class Meta:


@attr.s(auto_attribs=True, kw_only=True)
class Statistics:
class Stat:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unfortunately, this will break all the other services (https://github.com/amundsen-io/amundsenmetadatalibrary/search?q=Statistics&unscoped_q=Statistics) . I know this is not perfect. But let's keep it as it is and do it differently (e.g add another class named Stat and mark Statistics deprecated etc)

Copy link
Contributor Author

@friendtocephalopods friendtocephalopods Sep 16, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the suggestion is to change it all over to a new Stat class here and only remove Statistics after references are gone in the other repos? Sure, that works!

  • new class, not rename

stat_type: str
stat_val: Optional[str] = None
start_epoch: Optional[int] = None
end_epoch: Optional[int] = None


class StatisticsSchema(AttrsSchema):
class StatSchema(AttrsSchema):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same for the rename

class Meta:
target = Statistics
target = Stat
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class Column:
name: str
key: Optional[str] = None
description: Optional[str] = None
col_type: str
sort_order: int
stats: List[Statistics] = []
stats: List[Stat] = []


class ColumnSchema(AttrsSchema):
Expand Down Expand Up @@ -115,6 +116,7 @@ class Meta:
target = Source
register_as_scheme = True


@attr.s(auto_attribs=True, kw_only=True)
class ResourceReport:
name: str
Expand Down Expand Up @@ -151,6 +153,7 @@ class Table:
cluster: str
schema: str
name: str
key: Optional[str] = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you share more what the key is for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure! This is the table_uri. We store it on the object in the graph for indexing anyhow, so this saves us from having to recompute it everywhere each time we fetch it.

If we fully roll this out and make it non-optional, it also lets us not recompute the key downstream in the frontend as well: https://github.com/amundsen-io/amundsenfrontendlibrary/blob/ee0474fdcce6c8c1b01c9032b5cd2750b1d0d92c/amundsen_application/api/utils/metadata_utils.py#L110

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lgtm

tags: List[Tag] = []
badges: List[Badge] = []
table_readers: List[Reader] = []
Expand Down
10 changes: 5 additions & 5 deletions amundsen_common/models/user.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

from typing import Optional, Dict
from typing import Any, Optional, Dict

import attr
from marshmallow import ValidationError, validates_schema, pre_load
Expand Down Expand Up @@ -38,7 +38,7 @@ class User:
manager_id: Optional[str] = None
role_name: Optional[str] = None
profile_url: Optional[str] = None
other_key_values: Optional[Dict[str, str]] = attr.ib(factory=dict)
other_key_values: Optional[Dict[str, str]] = attr.ib(factory=dict) # type: ignore
# TODO: Add frequent_used, bookmarked, & owned resources


Expand All @@ -57,14 +57,14 @@ def _str_no_value(self, s: Optional[str]) -> bool:
return False

@pre_load
def preprocess_data(self, data: Dict) -> Dict:
def preprocess_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
if self._str_no_value(data.get('user_id')):
data['user_id'] = data.get('email')

if self._str_no_value(data.get('profile_url')):
data['profile_url'] = ''
if data.get('GET_PROFILE_URL'):
data['profile_url'] = data.get('GET_PROFILE_URL')(data['user_id'])
data['profile_url'] = data.get('GET_PROFILE_URL')(data['user_id']) # type: ignore

first_name = data.get('first_name')
last_name = data.get('last_name')
Expand All @@ -81,7 +81,7 @@ def preprocess_data(self, data: Dict) -> Dict:
return data

@validates_schema
def validate_user(self, data: Dict) -> None:
def validate_user(self, data: Dict[str, Any]) -> None:
if self._str_no_value(data.get('display_name')):
raise ValidationError('"display_name", "full_name", or "email" must be provided')

Expand Down
2 changes: 2 additions & 0 deletions amundsen_common/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

Loading