Skip to content

Commit

Permalink
Hot fix (#58)
Browse files Browse the repository at this point in the history
* patch working, still need to update package versions, etc

* Changelog and version bump

* Fixed another typo but code is still not working

* Fixing Glue issue

* Getting rid of flake issue

* Updating changelog
  • Loading branch information
Karik Isichei authored Jun 8, 2021
1 parent 02f67d6 commit a2e4f03
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 21 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v1.2.1 - 2021-06-07

### Changed

- Fixed bug where unpacking complex types (aka types with `<>`) did not correctly return contents of brackets.
- Added parameter `field_sep` to `converters._flatten_and_convert_complex_data_type` as Glue schemas fail if spaces are in complex data type definitions. Glue schemas now have no spaces before or after `,` when creating complex data types.

## v1.2.0 - 2021-05-17

### Changed
Expand Down
9 changes: 5 additions & 4 deletions mojap_metadata/converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def _flatten_and_convert_complex_data_type(
data_type: Union[dict, str],
converter_fun: Callable,
complex_dtype_names: Tuple[str] = None,
field_sep: str = ", ",
) -> str:
"""Recursive function to flattern a complex datatype in a dictionary
format i.e. output from (from Metadata.unpack_complex_data_type).
Expand Down Expand Up @@ -57,16 +58,16 @@ def _flatten_and_convert_complex_data_type(
for k, v in data_type.items():
if k in complex_dtype_names:
inner_data_type = _flatten_and_convert_complex_data_type(
v, converter_fun, complex_dtype_names
v, converter_fun, complex_dtype_names, field_sep=field_sep
)
return f"{converter_fun(k)}<{inner_data_type}>"
else:
new_v = _flatten_and_convert_complex_data_type(
v, converter_fun, complex_dtype_names
v, converter_fun, complex_dtype_names, field_sep=field_sep
)
fields.append(f"{k}:{new_v}")

return ", ".join(fields)
del new_v
return field_sep.join(fields)


@dataclass
Expand Down
2 changes: 1 addition & 1 deletion mojap_metadata/converters/glue_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def convert_col_type(self, coltype: str) -> str:
data_type = _unpack_complex_data_type(coltype)

return _flatten_and_convert_complex_data_type(
data_type, self.convert_basic_col_type
data_type, self.convert_basic_col_type, field_sep=","
)

def convert_basic_col_type(self, coltype: str) -> str:
Expand Down
26 changes: 17 additions & 9 deletions mojap_metadata/metadata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _parse_and_split(text: str, char: str) -> List[str]:
elif s == "<":
in_parentheses[2] += 1
elif s == ">":
in_parentheses[2] += 1
in_parentheses[2] -= 1

if s == char and not any([bool(p) for p in in_parentheses]):
yield text[start + 1 : i].strip()
Expand All @@ -80,16 +80,24 @@ def _parse_and_split(text: str, char: str) -> List[str]:

def _get_first_level(text: str) -> str:
"""Returns everything in first set of <>"""
start = 0
end = len(text)
bracket_counter = 0
start = -1
end = -1
found_first_bracket = False
for i, c in enumerate(text):
if c == "<":
start = i + 1
break
for i, c in enumerate(reversed(text)):
if c == ">":
end = len(text) - (i + 1)
break
bracket_counter += 1
if not found_first_bracket:
start = i + 1
found_first_bracket = True
elif c == ">":
bracket_counter -= 1
if bracket_counter == 0:
end = i
break

if start == -1 or end == -1:
raise ValueError(f"No closed brackets found in: {text}")

return text[start:end]

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool]
[tool.poetry]
name = "mojap-metadata"
version = "1.2.0"
version = "1.2.1"
description = "A python package to manage metadata"
license = "MIT"
authors = ["MoJ Data Engineering <dataengineering@digital.justice.gov.uk>"]
Expand Down
18 changes: 12 additions & 6 deletions tests/test_glue_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,30 @@ def test_converter_accepts_type(meta_type):
("list_<list<int64>>", "array<array<bigint>>", None),
("large_list<int64>", "array<bigint>", None),
("large_list<large_list<int64>>", "array<array<bigint>>", None),
("struct<num:int64, newnum:int64>", "struct<num:bigint, newnum:bigint>", None),
("struct<num:int64,newnum:int64>", "struct<num:bigint,newnum:bigint>", None),
("struct<num:int64, newnum:int64>", "struct<num:bigint,newnum:bigint>", None),
(
"struct<num:int64, arr:list_<int64>>",
"struct<num:bigint, arr:array<bigint>>",
"struct<num:bigint,arr:array<bigint>>",
None,
),
(
"list_<struct<num:int64,desc:string>>",
"array<struct<num:bigint, desc:string>>",
"array<struct<num:bigint,desc:string>>",
None,
),
("struct<num:int64,desc:string>", "struct<num:bigint, desc:string>", None),
("struct<num:int64,desc:string>", "struct<num:bigint,desc:string>", None),
("list_<decimal128(38,0)>", "array<decimal(38,0)>", None),
(
"struct<a:timestamp(s),b:struct<f1: int32, f2: string,f3:decimal128(3,5)>>",
"struct<a:timestamp, b:struct<f1:int, f2:string, f3:decimal(3,5)>>",
"struct<a:timestamp(s),b:struct<f1: int32, f2: string, f3:decimal128(3,5)>>", # noqa
"struct<a:timestamp,b:struct<f1:int,f2:string,f3:decimal(3,5)>>",
None,
),
(
"struct<k1:list<string>, k2:string, k3:string, k4:string, k5:list<string>, k6:string>", # noqa
"struct<k1:array<string>,k2:string,k3:string,k4:string,k5:array<string>,k6:string>", # noqa
None
)
],
)
def test_meta_to_glue_type(meta_type, glue_type, expect_raises):
Expand Down
5 changes: 5 additions & 0 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,11 @@ def test_get_first_level(t, e):
":",
["a", 'timestamp["s", +07:30], b', "decimal128(3,5)"],
),
(
"k1:list<string>, k2:decimal128(0, 38), k3:struct<a:int64, b:int64>",
",",
["k1:list<string>", "k2:decimal128(0, 38)", "k3:struct<a:int64, b:int64>"],
)
],
)
def test_parse_and_split(text, char, expected):
Expand Down

0 comments on commit a2e4f03

Please sign in to comment.