From a2e4f038d8dd4cf86c82087cc9bb0a259d4ced06 Mon Sep 17 00:00:00 2001 From: Karik Isichei Date: Tue, 8 Jun 2021 10:04:37 +0100 Subject: [PATCH] Hot fix (#58) * patch working, still need to update package versions, etc * Changelog and version bump * Fixed another typo but code is still not working * Fixing Glue issue * Getting rid of flake issue * Updating changelog --- CHANGELOG.md | 7 +++++ mojap_metadata/converters/__init__.py | 9 ++++--- .../converters/glue_converter/__init__.py | 2 +- mojap_metadata/metadata/metadata.py | 26 ++++++++++++------- pyproject.toml | 2 +- tests/test_glue_converter.py | 18 ++++++++----- tests/test_metadata.py | 5 ++++ 7 files changed, 48 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b04b78..ba6f81f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.2.1 - 2021-06-07 + +### Changed + +- Fixed bug where unpacking complex types (aka types with `<>`) did not correctly return contents of brackets. +- Added parameter `field_sep` to `converters._flatten_and_convert_complex_data_type` as Glue schemas fail if spaces are in complex data type definitions. Glue schemas now have no spaces before or after `,` when creating complex data types. + ## v1.2.0 - 2021-05-17 ### Changed diff --git a/mojap_metadata/converters/__init__.py b/mojap_metadata/converters/__init__.py index 12dcd31..1ed8717 100644 --- a/mojap_metadata/converters/__init__.py +++ b/mojap_metadata/converters/__init__.py @@ -28,6 +28,7 @@ def _flatten_and_convert_complex_data_type( data_type: Union[dict, str], converter_fun: Callable, complex_dtype_names: Tuple[str] = None, + field_sep: str = ", ", ) -> str: """Recursive function to flattern a complex datatype in a dictionary format i.e. output from (from Metadata.unpack_complex_data_type). @@ -57,16 +58,16 @@ def _flatten_and_convert_complex_data_type( for k, v in data_type.items(): if k in complex_dtype_names: inner_data_type = _flatten_and_convert_complex_data_type( - v, converter_fun, complex_dtype_names + v, converter_fun, complex_dtype_names, field_sep=field_sep ) return f"{converter_fun(k)}<{inner_data_type}>" else: new_v = _flatten_and_convert_complex_data_type( - v, converter_fun, complex_dtype_names + v, converter_fun, complex_dtype_names, field_sep=field_sep ) fields.append(f"{k}:{new_v}") - - return ", ".join(fields) + del new_v + return field_sep.join(fields) @dataclass diff --git a/mojap_metadata/converters/glue_converter/__init__.py b/mojap_metadata/converters/glue_converter/__init__.py index 672dc34..656b785 100644 --- a/mojap_metadata/converters/glue_converter/__init__.py +++ b/mojap_metadata/converters/glue_converter/__init__.py @@ -222,7 +222,7 @@ def convert_col_type(self, coltype: str) -> str: data_type = _unpack_complex_data_type(coltype) return _flatten_and_convert_complex_data_type( - data_type, self.convert_basic_col_type + data_type, self.convert_basic_col_type, field_sep="," ) def convert_basic_col_type(self, coltype: str) -> str: diff --git a/mojap_metadata/metadata/metadata.py b/mojap_metadata/metadata/metadata.py index 57cdebb..58f3f87 100644 --- a/mojap_metadata/metadata/metadata.py +++ b/mojap_metadata/metadata/metadata.py @@ -69,7 +69,7 @@ def _parse_and_split(text: str, char: str) -> List[str]: elif s == "<": in_parentheses[2] += 1 elif s == ">": - in_parentheses[2] += 1 + in_parentheses[2] -= 1 if s == char and not any([bool(p) for p in in_parentheses]): yield text[start + 1 : i].strip() @@ -80,16 +80,24 @@ def _parse_and_split(text: str, char: str) -> List[str]: def _get_first_level(text: str) -> str: """Returns everything in first set of <>""" - start = 0 - end = len(text) + bracket_counter = 0 + start = -1 + end = -1 + found_first_bracket = False for i, c in enumerate(text): if c == "<": - start = i + 1 - break - for i, c in enumerate(reversed(text)): - if c == ">": - end = len(text) - (i + 1) - break + bracket_counter += 1 + if not found_first_bracket: + start = i + 1 + found_first_bracket = True + elif c == ">": + bracket_counter -= 1 + if bracket_counter == 0: + end = i + break + + if start == -1 or end == -1: + raise ValueError(f"No closed brackets found in: {text}") return text[start:end] diff --git a/pyproject.toml b/pyproject.toml index 81bf1e8..a304af1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool] [tool.poetry] name = "mojap-metadata" -version = "1.2.0" +version = "1.2.1" description = "A python package to manage metadata" license = "MIT" authors = ["MoJ Data Engineering "] diff --git a/tests/test_glue_converter.py b/tests/test_glue_converter.py index 8ab593b..bae5e96 100644 --- a/tests/test_glue_converter.py +++ b/tests/test_glue_converter.py @@ -69,24 +69,30 @@ def test_converter_accepts_type(meta_type): ("list_>", "array>", None), ("large_list", "array", None), ("large_list>", "array>", None), - ("struct", "struct", None), + ("struct", "struct", None), + ("struct", "struct", None), ( "struct>", - "struct>", + "struct>", None, ), ( "list_>", - "array>", + "array>", None, ), - ("struct", "struct", None), + ("struct", "struct", None), ("list_", "array", None), ( - "struct>", - "struct>", + "struct>", # noqa + "struct>", None, ), + ( + "struct, k2:string, k3:string, k4:string, k5:list, k6:string>", # noqa + "struct,k2:string,k3:string,k4:string,k5:array,k6:string>", # noqa + None + ) ], ) def test_meta_to_glue_type(meta_type, glue_type, expect_raises): diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 1a705ad..2e6c8e8 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -282,6 +282,11 @@ def test_get_first_level(t, e): ":", ["a", 'timestamp["s", +07:30], b', "decimal128(3,5)"], ), + ( + "k1:list, k2:decimal128(0, 38), k3:struct", + ",", + ["k1:list", "k2:decimal128(0, 38)", "k3:struct"], + ) ], ) def test_parse_and_split(text, char, expected):