Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added programmatic description fields #104

Merged
8 changes: 7 additions & 1 deletion metadata_service/api/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@
'source': fields.String
}

programmatic_description_fields = {
'source': fields.String,
'text': fields.String
}

table_detail_fields = {
'database': fields.String,
'cluster': fields.String,
Expand All @@ -82,7 +87,8 @@
'table_writer': fields.Nested(table_writer_fields), # Optional
'last_updated_timestamp': fields.Integer, # Optional
'source': fields.Nested(source_fields), # Optional
'is_view': fields.Boolean # Optional
'is_view': fields.Boolean, # Optional
'programmatic_descriptions': fields.List(fields.Nested(programmatic_description_fields))
}


Expand Down
27 changes: 22 additions & 5 deletions metadata_service/proxy/neo4j_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from amundsen_common.models.table import (Application, Column, Reader, Source,
Statistics, Table, Tag, User,
Watermark)
Watermark, ProgrammaticDescription)
from amundsen_common.models.user import User as UserEntity
from beaker.cache import CacheManager
from beaker.util import parse_cache_config_options
Expand Down Expand Up @@ -67,7 +67,8 @@ def get_table(self, *, table_uri: str) -> Table:

readers = self._exec_usage_query(table_uri)

wmk_results, table_writer, timestamp_value, owners, tags, source, badges = self._exec_table_query(table_uri)
wmk_results, table_writer, timestamp_value, owners, tags, source, badges, prog_descs = \
self._exec_table_query(table_uri)

table = Table(database=last_neo4j_record['db']['name'],
cluster=last_neo4j_record['clstr']['name'],
Expand All @@ -83,7 +84,9 @@ def get_table(self, *, table_uri: str) -> Table:
table_writer=table_writer,
last_updated_timestamp=timestamp_value,
source=source,
is_view=self._safe_get(last_neo4j_record, 'tbl', 'is_view'))
is_view=self._safe_get(last_neo4j_record, 'tbl', 'is_view'),
programmatic_descriptions=prog_descs
)

return table

Expand Down Expand Up @@ -168,13 +171,15 @@ def _exec_table_query(self, table_uri: str) -> Tuple:
OPTIONAL MATCH (tbl)-[:TAGGED_BY]->(tag:Tag{tag_type: $tag_normal_type})
OPTIONAL MATCH (tbl)-[:TAGGED_BY]->(badge:Tag{tag_type: $tag_badge_type})
OPTIONAL MATCH (tbl)-[:SOURCE]->(src:Source)
OPTIONAL MATCH (tbl)-[:DESCRIPTION]->(prog_descriptions:Programmatic_Description)
RETURN collect(distinct wmk) as wmk_records,
application,
t.last_updated_timestamp as last_updated_timestamp,
collect(distinct owner) as owner_records,
collect(distinct tag) as tag_records,
collect(distinct badge) as badge_records,
src
src,
collect(distinct prog_descriptions) as prog_descriptions
""")

table_records = self._execute_cypher_query(statement=table_level_query,
Expand Down Expand Up @@ -236,7 +241,19 @@ def _exec_table_query(self, table_uri: str) -> Tuple:
src = Source(source_type=table_records['src']['source_type'],
source=table_records['src']['source'])

return wmk_results, table_writer, timestamp_value, owner_record, tags, src, badges
prog_descriptions = self._extract_programmatic_descriptions_from_query(table_records)
samshuster marked this conversation as resolved.
Show resolved Hide resolved

return wmk_results, table_writer, timestamp_value, owner_record, tags, src, badges, prog_descriptions

def _extract_programmatic_descriptions_from_query(self, table_records: dict) -> list:
prog_descriptions = []
for prog_description in table_records.get('prog_descriptions', []):
LOGGER.info(prog_description)
samshuster marked this conversation as resolved.
Show resolved Hide resolved
source = prog_description['description_source']
samshuster marked this conversation as resolved.
Show resolved Hide resolved
prog_descriptions.append(ProgrammaticDescription(source=source,
text=prog_description['description']))
prog_descriptions.sort(key=lambda x: x.source)
return prog_descriptions

@no_type_check
def _safe_get(self, dct, *keys):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pytest-mock==1.1
typing==3.6.4


amundsen-common==0.1.3rc0
amundsen-common==0.2.1
flasgger==0.9.3
Flask-RESTful==0.3.6
Flask==1.0.2
Expand Down
8 changes: 5 additions & 3 deletions tests/unit/api/table/test_table_detail_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
'col_type': 'String',
'sort_order': 0,
'stats': STATS
}]
}],
'programmatic_descriptions': []
}

API_RESPONSE = {
Expand All @@ -54,7 +55,8 @@
'type': 'String',
'sort_order': 0,
'stats': STATS
}]
}],
'programmatic_descriptions': []
}


Expand All @@ -63,7 +65,7 @@ def test_should_get_column_details(self) -> None:
self.mock_proxy.get_table.return_value = QUERY_RESPONSE

response = self.app.test_client().get(f'/table/{TABLE_URI}')

print(response.json)
samshuster marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(response.json, API_RESPONSE)
self.assertEqual(response.status_code, HTTPStatus.OK)
self.mock_proxy.get_table.assert_called_with(table_uri=TABLE_URI)
Expand Down
30 changes: 26 additions & 4 deletions tests/unit/proxy/test_neo4j_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from amundsen_common.models.table import (Application, Column, Source,
Statistics, Table, Tag, User,
Watermark)
Watermark, ProgrammaticDescription)
from amundsen_common.models.user import UserSchema

from mock import MagicMock, patch
Expand Down Expand Up @@ -96,7 +96,17 @@ def setUp(self) -> None:
'source': '/source_file_loc',
'key': 'some key',
'source_type': 'github'
}
},
'prog_descriptions': [
{
'description_source': 's3_crawler',
'description': 'Test Test Test'
},
{
'description_source': 'quality_report',
'description': 'Test Test'
}
]
}

table_writer = {
Expand Down Expand Up @@ -158,7 +168,13 @@ def test_get_table(self) -> None:
last_updated_timestamp=1,
source=Source(source='/source_file_loc',
source_type='github'),
is_view=False)
is_view=False,
programmatic_descriptions=[
ProgrammaticDescription(source='quality_report',
text='Test Test'),
ProgrammaticDescription(source='s3_crawler',
text='Test Test Test')
])

self.assertEqual(str(expected), str(table))

Expand Down Expand Up @@ -203,7 +219,13 @@ def test_get_table_view_only(self) -> None:
last_updated_timestamp=1,
source=Source(source='/source_file_loc',
source_type='github'),
is_view=True)
is_view=True,
programmatic_descriptions=[
ProgrammaticDescription(source='quality_report',
text='Test Test'),
ProgrammaticDescription(source='s3_crawler',
text='Test Test Test')
])

self.assertEqual(str(expected), str(table))

Expand Down