-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmanage.py
executable file
·210 lines (184 loc) · 7.01 KB
/
manage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!/usr/bin/env python
import json
from pathlib import Path
import click
import jsonschema
import nbformat
import sqlfluff
from nbmerge import merge_notebooks
from sqlfluff.core import FluffConfig
# A dict of notebooks and their components, identified by filename, excluding '.ipynb'
NOTEBOOKS = {
"template_meta_analysis": [
"component_environment",
"component_setup_kingfisher",
],
"template_publisher_analysis": [
"component_environment",
"component_setup_charts",
"component_setup_kingfisher",
"component_errors_kingfisher",
"component_scope_kingfisher",
],
"template_structure_and_format_feedback": [
"component_environment",
"component_setup_charts",
"component_setup_kingfisher",
"component_errors_kingfisher",
"component_check_conformance",
"component_scope_kingfisher",
"component_check_structure",
],
"template_data_quality_feedback": [
"component_environment",
"component_setup_charts",
"component_setup_kingfisher",
"component_errors_kingfisher",
"component_check_conformance",
"component_scope_kingfisher",
"component_check_structure",
"component_check_quality",
],
"template_usability_checks": [
"component_environment",
"component_setup_charts",
"component_setup_kingfisher",
"component_scope_usability",
"component_setup_usability",
"component_check_usability_kingfisher",
],
"template_usability_checks_fieldlist": [
"component_environment",
"component_setup_charts",
"component_setup_fieldlist",
"component_setup_usability",
"component_check_usability_external",
],
"template_usability_checks_registry": [
"component_environment",
"component_setup_charts",
"component_setup_cardinal",
"component_setup_download_data_from_registry",
"component_select_data_from_registry",
"component_setup_usability",
"component_check_usability_external",
],
"template_relevant_checks_registry": [
"component_environment",
"component_setup_cardinal",
"component_setup_usability",
"component_setup_download_data_from_registry",
"component_select_data_from_registry",
"component_check_relevant",
],
"template_relevant_checks_fieldlist": [
"component_environment",
"component_setup_fieldlist",
"component_setup_usability",
"component_check_relevant",
],
"template_basic_criteria_checks": [
"component_environment",
"component_setup_charts",
"component_setup_usability",
"component_setup_kingfisher",
"component_errors_kingfisher",
"component_check_structure",
"component_check_conformance",
"component_scope_usability",
"component_check_relevant",
],
"template_relevant_checks_registry_all": [
"component_environment",
"component_setup_cardinal",
"component_setup_usability",
"component_setup_download_data_from_registry",
"component_check_relevant_all_registry",
],
"template_red_flags_checks_registry": [
"component_environment",
"component_setup_cardinal",
"component_setup_download_data_from_registry",
"component_select_data_from_registry",
"component_setup_red_flags",
"component_check_red_flags_external",
],
"template_red_flags_checks": [
"component_environment",
"component_setup_kingfisher",
"component_setup_red_flags",
"component_check_red_flags_kingfisher",
],
"template_red_flags_checks_fieldlist": [
"component_environment",
"component_setup_fieldlist",
"component_setup_red_flags",
"component_check_red_flags_external",
],
"template_field_list_registry_all": [
"component_environment",
"component_setup_cardinal",
"component_setup_download_data_from_registry",
"component_get_field_list_all_registry",
],
}
BASEDIR = Path(__file__).resolve().parent
FLUFF_CONFIG = FluffConfig.from_path(BASEDIR)
class InvalidNotebookError(click.ClickException):
def __init__(self, filename):
super().__init__(f"{filename} is invalid")
def json_dump(path, notebook):
with path.open("w") as f:
# Use indent=2 like Google Colab for small diffs.
json.dump(notebook, f, ensure_ascii=False, indent=2)
f.write("\n")
def json_load(path):
with path.open() as f:
try:
return json.load(f)
except json.decoder.JSONDecodeError as e:
raise InvalidNotebookError(path) from e
@click.command()
@click.argument("filename", nargs=-1, type=click.Path(exists=True, dir_okay=False, path_type=Path))
def pre_commit(filename):
"""Format SQL cells in Jupyter Notebooks and merge components to build notebooks."""
has_warnings = False
filenames = [path for path in filename if path.name.startswith("component_")]
for path in filenames:
notebook = json_load(path)
for cell in notebook["cells"]:
if cell["cell_type"] != "code":
continue
source = cell["source"]
# In our notebooks, this is always on its own line: %%sql(?!( \w+ <<)?\\n",)
if "%%sql" not in source[0]:
continue
fix = sqlfluff.fix("".join(source[1:]), config=FLUFF_CONFIG)
cell["source"] = [source[0], *fix.splitlines(keepends=True)]
warnings = sqlfluff.lint(fix, config=FLUFF_CONFIG)
has_warnings |= bool(warnings)
for warning in warnings:
click.secho(f"{warning['code']}:{warning['name']} {warning['description']}", fg="yellow")
if "start_file_pos" in warning:
start = warning["start_file_pos"]
end = warning["end_file_pos"]
click.echo(f"{fix[:start]}{click.style(fix[start:end], fg='red')}{fix[end:]}")
json_dump(path, notebook)
for slug, components in NOTEBOOKS.items():
if any(path.stem in components for path in filenames):
template_path = Path(f"{slug}.ipynb")
with template_path.open("w", encoding="utf8") as f:
try:
notebook = merge_notebooks(BASEDIR, [f"{c}.ipynb" for c in NOTEBOOKS[slug]], verbose=False)
notebook["metadata"]["colab"]["name"] = slug
except jsonschema.exceptions.ValidationError as e:
raise InvalidNotebookError(f"{slug}.ipynb") from e
else:
nbformat.write(notebook, f)
# nbformat.write() uses indent=1. Rewrite with indent=2 like Google Colab.
# https://github.com/jupyter/nbformat/blob/ba2c6f5/nbformat/v4/nbjson.py#L51
json_dump(template_path, json_load(template_path))
if has_warnings:
raise click.Abort
if __name__ == "__main__":
pre_commit()