Skip to content

Commit

Permalink
chore: upgrade to marshmallow 3 (#963)
Browse files Browse the repository at this point in the history
* chore: upgrade to marshmallow 3 and marshmallow3-annotations

Signed-off-by: Dmitriy Kunitskiy <dkunitskiy@lyft.com>

* typos

Signed-off-by: Dmitriy Kunitskiy <dkunitskiy@lyft.com>
  • Loading branch information
Dmitriy Kunitskiy authored Mar 31, 2021
1 parent 33a681c commit dc2d6a6
Show file tree
Hide file tree
Showing 15 changed files with 72 additions and 76 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ build/
.coverage
.mypy_cache
.pytest_cache
.python-version


npm-debug.log
Expand Down
9 changes: 5 additions & 4 deletions amundsen_application/api/preview/v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from flask import Response, jsonify, make_response, request, current_app as app
from flask.blueprints import Blueprint
from marshmallow import ValidationError
from werkzeug.utils import import_string

from amundsen_application.models.preview_data import PreviewDataSchema
Expand Down Expand Up @@ -49,11 +50,11 @@ def get_table_preview() -> Response:
preview_data = json.loads(response.data).get('preview_data')
if status_code == HTTPStatus.OK:
# validate the returned table preview data
data, errors = PreviewDataSchema().load(preview_data)
if not errors:
try:
data = PreviewDataSchema().load(preview_data)
payload = jsonify({'previewData': data, 'msg': 'Success'})
else:
logging.error('Preview data dump returned errors: ' + str(errors))
except ValidationError as err:
logging.error('Preview data dump returned errors: ' + str(err.messages))
raise Exception('The preview client did not return a valid PreviewData object')
else:
message = 'Encountered error: Preview client request failed with code ' + str(status_code)
Expand Down
21 changes: 10 additions & 11 deletions amundsen_application/api/utils/metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import urllib.parse

from dataclasses import dataclass
from marshmallow import EXCLUDE
from typing import Any, Dict, List

from amundsen_common.models.dashboard import DashboardSummary, DashboardSummarySchema
Expand Down Expand Up @@ -46,10 +47,9 @@ def marshall_table_partial(table_dict: Dict) -> Dict:
TODO - Unify data format returned by search and metadata.
"""
schema = PopularTableSchema(strict=True)
# TODO: consider migrating to validate() instead of roundtripping
table: PopularTable = schema.load(table_dict).data
results = schema.dump(table).data
schema = PopularTableSchema()
table: PopularTable = schema.load(table_dict, unknown=EXCLUDE)
results = schema.dump(table)
# TODO: fix popular tables to provide these? remove if we're not using them?
# TODO: Add the 'key' or 'id' to the base PopularTableSchema
results['key'] = f'{table.database}://{table.cluster}.{table.schema}/{table.name}'
Expand Down Expand Up @@ -104,10 +104,9 @@ def marshall_table_full(table_dict: Dict) -> Dict:
:return: Table Dict with sanitized fields
"""

schema = TableSchema(strict=True)
# TODO: consider migrating to validate() instead of roundtripping
table: Table = schema.load(table_dict).data
results: Dict[str, Any] = schema.dump(table).data
schema = TableSchema()
table: Table = schema.load(table_dict)
results: Dict[str, Any] = schema.dump(table)

is_editable = is_table_editable(results['schema'], results['name'])
results['is_editable'] = is_editable
Expand Down Expand Up @@ -149,9 +148,9 @@ def marshall_dashboard_partial(dashboard_dict: Dict) -> Dict:
:param dashboard_dict: Dict of partial dashboard metadata
:return: partial dashboard Dict
"""
schema = DashboardSummarySchema(strict=True)
dashboard: DashboardSummary = schema.load(dashboard_dict).data
results = schema.dump(dashboard).data
schema = DashboardSummarySchema(unknown=EXCLUDE)
dashboard: DashboardSummary = schema.load(dashboard_dict)
results = schema.dump(dashboard)
results['type'] = 'dashboard'
# TODO: Bookmark logic relies on key, opting to add this here to avoid messy logic in
# React app and we have to clean up later.
Expand Down
27 changes: 12 additions & 15 deletions amundsen_application/base/base_announcement_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from http import HTTPStatus

from flask import jsonify, make_response, Response
from marshmallow import ValidationError

from amundsen_application.models.announcements import Announcements, AnnouncementsSchema

Expand All @@ -31,20 +32,16 @@ def _create_error_response(message: str) -> Response:
return make_response(payload, HTTPStatus.INTERNAL_SERVER_ERROR)

try:
try:
announcements = self.get_posts()
except Exception as e:
message = 'Encountered exception getting posts: ' + str(e)
return _create_error_response(message)

# validate the returned object
data, errors = AnnouncementsSchema().dump(announcements)
if not errors:
payload = jsonify({'posts': data.get('posts'), 'msg': 'Success'})
return make_response(payload, HTTPStatus.OK)
else:
message = 'Announcement data dump returned errors: ' + str(errors)
return _create_error_response(message)
announcements = self.get_posts()
except Exception as e:
message = 'Encountered exception: ' + str(e)
message = 'Encountered exception getting posts: ' + str(e)
return _create_error_response(message)

try:
data = AnnouncementsSchema().dump(announcements)
AnnouncementsSchema().load(data) # validate returned object
payload = jsonify({'posts': data.get('posts'), 'msg': 'Success'})
return make_response(payload, HTTPStatus.OK)
except ValidationError as err:
message = 'Announcement data dump returned errors: ' + str(err.messages)
return _create_error_response(message)
18 changes: 10 additions & 8 deletions amundsen_application/base/base_bigquery_preview_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

from http import HTTPStatus
import logging
from typing import Dict, List
from amundsen_application.base.base_preview_client import BasePreviewClient
from amundsen_application.models.preview_data import (
Expand All @@ -10,6 +11,7 @@
PreviewDataSchema,
)
from flask import Response, make_response, jsonify
from marshmallow import ValidationError
from google.cloud import bigquery


Expand Down Expand Up @@ -60,13 +62,13 @@ def get_preview_data(self, params: Dict, optionalHeaders: Dict = None) -> Respon
params["schema"],
params["tableName"],
)
data = PreviewDataSchema().dump(preview_data)[0]
errors = PreviewDataSchema().load(data)[1]
payload = jsonify({"preview_data": data})

if not errors:
try:
data = PreviewDataSchema().dump(preview_data)
PreviewDataSchema().load(data) # for validation only
payload = jsonify({"preview_data": data})
return make_response(payload, HTTPStatus.OK)
return make_response(
jsonify({"preview_data": {}}), HTTPStatus.INTERNAL_SERVER_ERROR
)
except ValidationError as err:
logging.error("PreviewDataSchema serialization error + " + str(err.messages))
return make_response(
jsonify({"preview_data": {}}), HTTPStatus.INTERNAL_SERVER_ERROR
)
11 changes: 7 additions & 4 deletions amundsen_application/base/base_superset_preview_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
# SPDX-License-Identifier: Apache-2.0

import abc
import logging

from flask import Response as FlaskResponse, make_response, jsonify
from http import HTTPStatus
from marshmallow import ValidationError
from requests import Response
from typing import Dict

Expand Down Expand Up @@ -45,12 +47,13 @@ def get_preview_data(self, params: Dict, optionalHeaders: Dict = None) -> FlaskR
response_dict = response.json()
columns = [ColumnItem(c['name'], c['type']) for c in response_dict['columns']]
preview_data = PreviewData(columns, response_dict['data'])
data = PreviewDataSchema().dump(preview_data)[0]
errors = PreviewDataSchema().load(data)[1]
if not errors:
try:
data = PreviewDataSchema().dump(preview_data)
PreviewDataSchema().load(data) # for validation only
payload = jsonify({'preview_data': data})
return make_response(payload, response.status_code)
else:
except ValidationError as err:
logging.error("PreviewDataSchema serialization error " + str(err.messages))
return make_response(jsonify({'preview_data': {}}), HTTPStatus.INTERNAL_SERVER_ERROR)
except Exception:
return make_response(jsonify({'preview_data': {}}), HTTPStatus.INTERNAL_SERVER_ERROR)
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Dict # noqa: F401

from flask import Response, jsonify, make_response, current_app as app
from marshmallow import ValidationError
from pyarrow import flight

from amundsen_application.base.base_superset_preview_client import BasePreviewClient
Expand Down Expand Up @@ -79,16 +80,15 @@ def get_preview_data(self, params: Dict, optionalHeaders: Dict = None) -> Respon
column_items = [ColumnItem(n, t) for n, t in zip(names, types)]

preview_data = PreviewData(column_items, rows)

data = PreviewDataSchema().dump(preview_data)[0]
errors = PreviewDataSchema().load(data)[1]
if errors:
logging.error(f'Error(s) occurred while building preview data: {errors}')
payload = jsonify({'preview_data': {}})
return make_response(payload, HTTPStatus.INTERNAL_SERVER_ERROR)
else:
try:
data = PreviewDataSchema().dump(preview_data)
PreviewDataSchema().load(data) # for validation only
payload = jsonify({'preview_data': data})
return make_response(payload, HTTPStatus.OK)
except ValidationError as err:
logging.error(f'Error(s) occurred while building preview data: {err.messages}')
payload = jsonify({'preview_data': {}})
return make_response(payload, HTTPStatus.INTERNAL_SERVER_ERROR)

except Exception as e:
logging.error(f'Encountered exception: {e}')
Expand Down
5 changes: 3 additions & 2 deletions amundsen_application/models/announcements.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from marshmallow import Schema, fields, post_dump
from marshmallow.exceptions import ValidationError

from typing import Dict, List
from typing import Dict, List, Any


class Post:
Expand All @@ -29,10 +29,11 @@ class AnnouncementsSchema(Schema):
posts = fields.Nested(PostSchema, many=True)

@post_dump
def validate_data(self, data: Dict) -> None:
def validate_data(self, data: Dict, **kwargs: Any) -> Dict:
posts = data.get('posts', [])
for post in posts:
if post.get('date') is None:
raise ValidationError('All posts must have a date')
if post.get('title') is None:
raise ValidationError('All posts must have a title')
return data
4 changes: 2 additions & 2 deletions amundsen_application/models/preview_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

from marshmallow import Schema, fields
from marshmallow import Schema, fields, EXCLUDE
from typing import List


Expand All @@ -24,6 +24,6 @@ def __init__(self, columns: List = [], data: List = [], error_text: str = '') ->


class PreviewDataSchema(Schema):
columns = fields.Nested(ColumnItemSchema, many=True)
columns = fields.Nested(ColumnItemSchema, many=True, unknown=EXCLUDE)
data = fields.List(fields.Dict, many=True)
error_text = fields.Str()
9 changes: 2 additions & 7 deletions amundsen_application/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,11 @@ def load_user(user_data: Dict) -> User:
# in the user metadata.
if _str_no_value(user_data.get('profile_url')) and app.config['GET_PROFILE_URL']:
user_data['profile_url'] = app.config['GET_PROFILE_URL'](user_data['user_id'])
data, errors = schema.load(user_data)
return data
return schema.load(user_data)
except ValidationError as err:
return err.messages


def dump_user(user: User) -> Dict:
schema = UserSchema()
try:
data, errors = schema.dump(user)
return data
except ValidationError as err:
return err.messages
return schema.dump(user)
9 changes: 2 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,7 @@ requests==2.25.1
# A lightweight library for converting complex datatypes to and from native Python datatypes.
# License: MIT
# Upstream url: https://github.com/marshmallow-code/marshmallow
marshmallow>=2.15.3,<3.0

# Allows declaring marshmallow schema through type annotations
# License: MIT
# Upstream url: https://github.com/justanr/marshmallow-annotations
marshmallow-annotations>=2.4.0,<3.0
marshmallow>=3.0,<=3.6

# A utility library for mocking out the requests Python library.
# License: Apache 2.0
Expand All @@ -73,7 +68,7 @@ SQLAlchemy==1.3.23

# A common package that holds the models deifnition and schemas that are used
# accross different amundsen repositories.
amundsen-common==0.6.0
amundsen-common==0.9.0

# Library for rest endpoints with Flask
# Upstream url: https://github.com/flask-restful/flask-restful
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ def build_js() -> None:
with open(requirements_path) as requirements_file:
requirements = requirements_file.readlines()

__version__ = '3.5.1'
__version__ = '3.6.0'

oicd = ['flaskoidc==0.1.1']
pyarrrow = ['pyarrow==3.0.0']
bigquery_preview = ['google-cloud-bigquery>=2.8.0,<3.0.0', 'flatten-dict==0.3.0']
bigquery_preview = ['google-cloud-bigquery>=2.13.1,<3.0.0', 'flatten-dict==0.3.0']
all_deps = requirements + oicd + pyarrrow + bigquery_preview

setup(
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/base/test_announcement_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test_get_posts_success(self) -> None:
{
'title': 'Test Title',
'date': 'December 31, 1999',
'info_list': ['Test announcement'],
'html_content': 'content',
}
]
response = MockClient(success_posts)._get_posts()
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/base/test_superset_preview_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def test_post_sql_json_incorrect_data_shape(self) -> None:
def test_post_sql_json_correct_data_shape(self) -> None:
"""
Test post_sql_json(), which should result in
a response with 500 error and empty preview_data payload
a response with 200 status and correct preview_data payload
:return:
"""
with app.test_request_context():
Expand Down
10 changes: 6 additions & 4 deletions tests/unit/models/test_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import flask
import unittest

from marshmallow import ValidationError

from amundsen_application.models.user import load_user, dump_user, UserSchema

app = flask.Flask(__name__)
Expand Down Expand Up @@ -76,17 +78,17 @@ def test_raise_error_if_no_display_name(self) -> None:
:return:
"""
with app.test_request_context():
data, errors = UserSchema().load({})
self.assertEqual(len(errors['_schema']), 1)
with self.assertRaises(ValidationError):
UserSchema().load({})

def test_raise_error_if_no_user_id(self) -> None:
"""
Error is raised if deserialization of Dict will not generate a user_id
:return:
"""
with app.test_request_context():
data, errors = UserSchema().load({'display_name': 'Test User'})
self.assertEqual(len(errors['_schema']), 1)
with self.assertRaises(ValidationError):
UserSchema().load({'display_name': 'Test User'})

def test_str_no_value(self) -> None:
"""
Expand Down

0 comments on commit dc2d6a6

Please sign in to comment.