-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathargo-checker.py
195 lines (164 loc) · 7.08 KB
/
argo-checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import requests
from bs4 import BeautifulSoup
import sys
import argparse
import tempfile
import urllib.request
import shutil
import os
import re
import tarfile
import subprocess
import fileinput
messages_quiet = False
def inform_status(msg):
if not messages_quiet:
print(msg, file=sys.stderr)
def find_latest_tool_url():
doi_link = 'https://doi.org/10.17882/45538'
inform_status(f'Checking for latest tool at <{doi_link}>...')
page = requests.get(doi_link)
soup = BeautifulSoup(page.content, 'html.parser')
links = soup.find_all('a')
link_vals = [l.attrs['href'] for l in links if 'href' in l.attrs]
# these files are named sequentially, so newer version should
# in theory have a filename with a bigger number in front
file_re = re.compile(r'([0-9]+)\.tar\.gz$')
tar_gz_vals = [l for l in link_vals if file_re.search(l)]
which_max, val_max = None, float('-inf')
for i, url in enumerate(tar_gz_vals):
val = int(file_re.search(url).group(1))
if val > val_max:
which_max = i
val_max = val
if which_max is None:
raise ValueError(f'Failed to find latest tool source.')
inform_status(f'Latest tool source is located at <{tar_gz_vals[which_max]}>.')
return tar_gz_vals[which_max]
def install_tool_from_url(url, dest):
inform_status(f"Downloading tool from <{url}>")
fd, tmp = tempfile.mkstemp()
install_root = os.path.dirname(dest)
try:
with open(tmp, 'wb') as f:
shutil.copyfileobj(urllib.request.urlopen(url), f)
inform_status('Extracting...')
with tarfile.open(tmp) as tar:
tar.extractall(install_root)
finally:
os.close(fd)
os.unlink(tmp)
inform_status(f"Installing tool to '{dest}'")
current_root = [d for d in os.listdir(install_root) if d.startswith('format_control')]
if not current_root:
raise ValueError(f"Can't find extracted files in '{install_root}'")
elif len(current_root) > 1:
raise ValueError(f"More than one possible value for extracted files in '{install_root}'")
if os.path.exists(dest):
inform_status('Removing previous installation')
shutil.rmtree(dest)
current_root = os.path.join(install_root, current_root[0])
os.rename(current_root, dest)
return dest
def argo_checker_find(update=False, force=False):
dir_re = re.compile(r'tool_([0-9]+)')
file_re = re.compile(r'([0-9]+)\.tar\.gz$')
which_max, val_max = None, float('-inf')
this_dir = os.path.abspath(os.path.dirname(__file__))
inform_status(f"Searching for installed tool in '{this_dir}'")
dirs = [d for d in os.listdir(this_dir) if dir_re.match(d)]
for i, dirname in enumerate(dirs):
val = int(dir_re.match(dirname).group(1))
if val > val_max:
which_max = i
val_max = int(dir_re.match(dirname).group(1))
if which_max is None:
inform_status('Installed tool was not found.')
tool_dir = None
else:
tool_dir = os.path.join(this_dir, dirs[which_max])
inform_status(f"Installed tool found at '{tool_dir}'.")
if update:
inform_status('Checking for newer version, as requested...')
if update or tool_dir is None:
latest_url = find_latest_tool_url()
latest_val = int(file_re.search(latest_url).group(1))
latest_dest = os.path.join(this_dir, f'tool_{latest_val}')
if tool_dir is None:
inform_status('Installing latest version because no tool is installed')
return install_tool_from_url(latest_url, latest_dest)
elif latest_val > val_max:
inform_status('Latest version is newer: upgrading...')
return install_tool_from_url(latest_url, latest_dest)
elif force:
inform_status('Latest version is not newer but installing anyway (--force)')
return install_tool_from_url(latest_url, latest_dest)
else:
inform_status('Version is latest version.')
return tool_dir
def run_tool(tool_dir, files):
if os.name == 'nt':
classpath_sep = ';'
else:
classpath_sep = ':'
class_paths_rel = ('./resources', './jar/formatcheckerClassic-1.17-jar-with-dependencies.jar')
class_paths = classpath_sep.join(class_paths_rel)
more_args = ['-Dapplication.properties=application.properties', '-Dfile.encoding=UTF8', 'oco.FormatControl']
for file in files:
tmp_fd, tmp = None, None
try:
if file.startswith('http://') or file.startswith('https://') or file.startswith('ftp://'):
inform_status(f'Downloading <{file}>')
tmp_fd, tmp = tempfile.mkstemp(suffix=os.path.basename(file))
with open(tmp, 'wb') as f:
shutil.copyfileobj(urllib.request.urlopen(file), f)
file = tmp
file = os.path.abspath(file)
all_args = ['java', '-cp', class_paths] + more_args + [file, ]
command = ' '.join(all_args)
inform_status(f"Running '{command}'")
if subprocess.call(all_args, cwd=tool_dir) != 0:
raise ValueError(f"Command '{command}' had a non-zero exit code.")
finally:
if tmp is not None:
os.close(tmp_fd)
os.unlink(tmp)
if __name__ == '__main__':
description = """
Run, serve, or upgrade the Ifremer
NetCDF file format checker for Argo floats
<https://doi.org/10.17882/45538>.
"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument('action', type=str, nargs=1, choices=('check', 'serve'),
help="Use 'check' to export XML to stdout based on 'files' and 'serve' to serve a minimal web-based interface.")
parser.add_argument('files_or_urls', type=str, nargs='*',
help="Zero or more files or URLs if 'action' is 'check'")
parser.add_argument('--update', action='store_true',
help="Update to the latest distributed version of the tool before running 'action'")
parser.add_argument('--force', action='store_true',
help="Reinstall even if tool is at latest version.")
parser.add_argument('--quiet', action='store_true',
help="Suppress status messages.")
args = parser.parse_args()
if args.quiet:
messages_quiet = True
tool_dir = argo_checker_find(update=args.update, force=args.force)
if args.action[0] == 'check':
if args.files_or_urls:
run_tool(tool_dir, args.files_or_urls)
else:
# read from stdin
tmp_fd, tmp = None, None
try:
tmp_fd, tmp = tempfile.mkstemp(suffix='.nc')
with open(tmp, 'wb') as f:
shutil.copyfileobj(sys.stdin.buffer, f)
run_tool(tool_dir, [tmp])
finally:
if tmp is not None:
os.close(tmp_fd)
os.unlink(tmp)
else:
raise NotImplementedError(f"action '{args.action}' is not implemented")
sys.exit(0)