-
Notifications
You must be signed in to change notification settings - Fork 202
/
Copy pathfiletools.py
2388 lines (1929 loc) · 92 KB
/
filetools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# #
# Copyright 2009-2020 Ghent University
#
# This file is part of EasyBuild,
# originally created by the HPC team of Ghent University (http://ugent.be/hpc/en),
# with support of Ghent University (http://ugent.be/hpc),
# the Flemish Supercomputer Centre (VSC) (https://www.vscentrum.be),
# Flemish Research Foundation (FWO) (http://www.fwo.be/en)
# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en).
#
# https://github.com/easybuilders/easybuild
#
# EasyBuild is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation v2.
#
# EasyBuild is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with EasyBuild. If not, see <http://www.gnu.org/licenses/>.
# #
"""
Set of file tools.
:author: Stijn De Weirdt (Ghent University)
:author: Dries Verdegem (Ghent University)
:author: Kenneth Hoste (Ghent University)
:author: Pieter De Baets (Ghent University)
:author: Jens Timmerman (Ghent University)
:author: Toon Willems (Ghent University)
:author: Ward Poelmans (Ghent University)
:author: Fotis Georgatos (Uni.Lu, NTUA)
:author: Sotiris Fragkiskos (NTUA, CERN)
:author: Davide Vanzo (ACCRE, Vanderbilt University)
:author: Damian Alvarez (Forschungszentrum Juelich GmbH)
:author: Maxime Boissonneault (Compute Canada)
"""
import datetime
import difflib
import fileinput
import glob
import hashlib
import imp
import inspect
import os
import re
import shutil
import signal
import stat
import sys
import tempfile
import time
import zlib
from easybuild.base import fancylogger
from easybuild.tools import run
# import build_log must stay, to use of EasyBuildLog
from easybuild.tools.build_log import EasyBuildError, dry_run_msg, print_msg, print_warning
from easybuild.tools.config import DEFAULT_WAIT_ON_LOCK_INTERVAL, GENERIC_EASYBLOCK_PKG, build_option, install_path
from easybuild.tools.py2vs3 import HTMLParser, std_urllib, string_type
from easybuild.tools.utilities import nub, remove_unwanted_chars
try:
import requests
HAVE_REQUESTS = True
except ImportError:
HAVE_REQUESTS = False
_log = fancylogger.getLogger('filetools', fname=False)
# easyblock class prefix
EASYBLOCK_CLASS_PREFIX = 'EB_'
# character map for encoding strings
STRING_ENCODING_CHARMAP = {
r' ': "_space_",
r'!': "_exclamation_",
r'"': "_quotation_",
r'#': "_hash_",
r'$': "_dollar_",
r'%': "_percent_",
r'&': "_ampersand_",
r'(': "_leftparen_",
r')': "_rightparen_",
r'*': "_asterisk_",
r'+': "_plus_",
r',': "_comma_",
r'-': "_minus_",
r'.': "_period_",
r'/': "_slash_",
r':': "_colon_",
r';': "_semicolon_",
r'<': "_lessthan_",
r'=': "_equals_",
r'>': "_greaterthan_",
r'?': "_question_",
r'@': "_atsign_",
r'[': "_leftbracket_",
r'\'': "_apostrophe_",
r'\\': "_backslash_",
r']': "_rightbracket_",
r'^': "_circumflex_",
r'_': "_underscore_",
r'`': "_backquote_",
r'{': "_leftcurly_",
r'|': "_verticalbar_",
r'}': "_rightcurly_",
r'~': "_tilde_",
}
PATH_INDEX_FILENAME = '.eb-path-index'
CHECKSUM_TYPE_MD5 = 'md5'
CHECKSUM_TYPE_SHA256 = 'sha256'
DEFAULT_CHECKSUM = CHECKSUM_TYPE_MD5
# map of checksum types to checksum functions
CHECKSUM_FUNCTIONS = {
'adler32': lambda p: calc_block_checksum(p, ZlibChecksum(zlib.adler32)),
'crc32': lambda p: calc_block_checksum(p, ZlibChecksum(zlib.crc32)),
CHECKSUM_TYPE_MD5: lambda p: calc_block_checksum(p, hashlib.md5()),
'sha1': lambda p: calc_block_checksum(p, hashlib.sha1()),
CHECKSUM_TYPE_SHA256: lambda p: calc_block_checksum(p, hashlib.sha256()),
'sha512': lambda p: calc_block_checksum(p, hashlib.sha512()),
'size': lambda p: os.path.getsize(p),
}
CHECKSUM_TYPES = sorted(CHECKSUM_FUNCTIONS.keys())
EXTRACT_CMDS = {
# gzipped or gzipped tarball
'.gtgz': "tar xzf %(filepath)s",
'.gz': "gunzip -c %(filepath)s > %(target)s",
'.tar.gz': "tar xzf %(filepath)s",
'.tgz': "tar xzf %(filepath)s",
# bzipped or bzipped tarball
'.bz2': "bunzip2 -c %(filepath)s > %(target)s",
'.tar.bz2': "tar xjf %(filepath)s",
'.tb2': "tar xjf %(filepath)s",
'.tbz': "tar xjf %(filepath)s",
'.tbz2': "tar xjf %(filepath)s",
# xzipped or xzipped tarball
'.tar.xz': "unxz %(filepath)s --stdout | tar x",
'.txz': "unxz %(filepath)s --stdout | tar x",
'.xz': "unxz %(filepath)s",
# tarball
'.tar': "tar xf %(filepath)s",
# zip file
'.zip': "unzip -qq %(filepath)s",
# iso file
'.iso': "7z x %(filepath)s",
# tar.Z: using compress (LZW), but can be handled with gzip so use 'z'
'.tar.z': "tar xzf %(filepath)s",
}
# global set of names of locks that were created in this session
global_lock_names = set()
class ZlibChecksum(object):
"""
wrapper class for adler32 and crc32 checksums to
match the interface of the hashlib module
"""
def __init__(self, algorithm):
self.algorithm = algorithm
self.checksum = algorithm(b'') # use the same starting point as the module
self.blocksize = 64 # The same as md5/sha1
def update(self, data):
"""Calculates a new checksum using the old one and the new data"""
self.checksum = self.algorithm(data, self.checksum)
def hexdigest(self):
"""Return hex string of the checksum"""
return '0x%s' % (self.checksum & 0xffffffff)
def is_readable(path):
"""Return whether file at specified location exists and is readable."""
try:
return os.path.exists(path) and os.access(path, os.R_OK)
except OSError as err:
raise EasyBuildError("Failed to check whether %s is readable: %s", path, err)
def read_file(path, log_error=True, mode='r'):
"""Read contents of file at given path, in a robust way."""
txt = None
try:
with open(path, mode) as handle:
txt = handle.read()
except IOError as err:
if log_error:
raise EasyBuildError("Failed to read %s: %s", path, err)
return txt
def write_file(path, data, append=False, forced=False, backup=False, always_overwrite=True, verbose=False):
"""
Write given contents to file at given path;
overwrites current file contents without backup by default!
:param path: location of file
:param data: contents to write to file
:param append: append to existing file rather than overwrite
:param forced: force actually writing file in (extended) dry run mode
:param backup: back up existing file before overwriting or modifying it
:param always_overwrite: don't require --force to overwrite an existing file
:param verbose: be verbose, i.e. inform where backup file was created
"""
# early exit in 'dry run' mode
if not forced and build_option('extended_dry_run'):
dry_run_msg("file written: %s" % path, silent=build_option('silent'))
return
if os.path.exists(path):
if not append:
if always_overwrite or build_option('force'):
_log.info("Overwriting existing file %s", path)
else:
raise EasyBuildError("File exists, not overwriting it without --force: %s", path)
if backup:
backed_up_fp = back_up_file(path)
_log.info("Existing file %s backed up to %s", path, backed_up_fp)
if verbose:
print_msg("Backup of %s created at %s" % (path, backed_up_fp), silent=build_option('silent'))
# figure out mode to use for open file handle
# cfr. https://docs.python.org/3/library/functions.html#open
mode = 'a' if append else 'w'
# special care must be taken with binary data in Python 3
if sys.version_info[0] >= 3 and isinstance(data, bytes):
mode += 'b'
# note: we can't use try-except-finally, because Python 2.4 doesn't support it as a single block
try:
mkdir(os.path.dirname(path), parents=True)
with open(path, mode) as handle:
handle.write(data)
except IOError as err:
raise EasyBuildError("Failed to write to %s: %s", path, err)
def is_binary(contents):
"""
Check whether given bytestring represents the contents of a binary file or not.
"""
return isinstance(contents, bytes) and b'\00' in bytes(contents)
def resolve_path(path):
"""
Return fully resolved path for given path.
:param path: path that (maybe) contains symlinks
"""
try:
resolved_path = os.path.realpath(path)
except (AttributeError, OSError, TypeError) as err:
raise EasyBuildError("Resolving path %s failed: %s", path, err)
return resolved_path
def symlink(source_path, symlink_path, use_abspath_source=True):
"""
Create a symlink at the specified path to the given path.
:param source_path: source file path
:param symlink_path: symlink file path
:param use_abspath_source: resolves the absolute path of source_path
"""
if use_abspath_source:
source_path = os.path.abspath(source_path)
try:
os.symlink(source_path, symlink_path)
_log.info("Symlinked %s to %s", source_path, symlink_path)
except OSError as err:
raise EasyBuildError("Symlinking %s to %s failed: %s", source_path, symlink_path, err)
def remove_file(path):
"""Remove file at specified path."""
# early exit in 'dry run' mode
if build_option('extended_dry_run'):
dry_run_msg("file %s removed" % path, silent=build_option('silent'))
return
try:
# note: file may also be a broken symlink...
if os.path.exists(path) or os.path.islink(path):
os.remove(path)
except OSError as err:
raise EasyBuildError("Failed to remove file %s: %s", path, err)
def remove_dir(path):
"""Remove directory at specified path."""
# early exit in 'dry run' mode
if build_option('extended_dry_run'):
dry_run_msg("directory %s removed" % path, silent=build_option('silent'))
return
if os.path.exists(path):
ok = False
errors = []
# Try multiple times to cater for temporary failures on e.g. NFS mounted paths
max_attempts = 3
for i in range(0, max_attempts):
try:
shutil.rmtree(path)
ok = True
break
except OSError as err:
_log.debug("Failed to remove path %s with shutil.rmtree at attempt %d: %s" % (path, i, err))
errors.append(err)
time.sleep(2)
# make sure write permissions are enabled on entire directory
adjust_permissions(path, stat.S_IWUSR, add=True, recursive=True)
if ok:
_log.info("Path %s successfully removed." % path)
else:
raise EasyBuildError("Failed to remove directory %s even after %d attempts.\nReasons: %s",
path, max_attempts, errors)
def remove(paths):
"""
Remove single file/directory or list of files and directories
:param paths: path(s) to remove
"""
if isinstance(paths, string_type):
paths = [paths]
_log.info("Removing %d files & directories", len(paths))
for path in paths:
if os.path.isfile(path):
remove_file(path)
elif os.path.isdir(path):
remove_dir(path)
else:
raise EasyBuildError("Specified path to remove is not an existing file or directory: %s", path)
def change_dir(path):
"""
Change to directory at specified location.
:param path: location to change to
:return: previous location we were in
"""
# determining the current working directory can fail if we're in a non-existing directory
try:
cwd = os.getcwd()
except OSError as err:
_log.debug("Failed to determine current working directory (but proceeding anyway: %s", err)
cwd = None
try:
os.chdir(path)
except OSError as err:
raise EasyBuildError("Failed to change from %s to %s: %s", cwd, path, err)
return cwd
def extract_file(fn, dest, cmd=None, extra_options=None, overwrite=False, forced=False, change_into_dir=None):
"""
Extract file at given path to specified directory
:param fn: path to file to extract
:param dest: location to extract to
:param cmd: extract command to use (derived from filename if not specified)
:param extra_options: extra options to pass to extract command
:param overwrite: overwrite existing unpacked file
:param forced: force extraction in (extended) dry run mode
:param change_into_dir: change into resulting directory;
None (current default) implies True, but this is deprecated,
this named argument should be set to False or True explicitely
(in a future major release, default will be changed to False)
:return: path to directory (in case of success)
"""
if change_into_dir is None:
_log.deprecated("extract_file function was called without specifying value for change_into_dir", '5.0')
change_into_dir = True
if not os.path.isfile(fn) and not build_option('extended_dry_run'):
raise EasyBuildError("Can't extract file %s: no such file", fn)
mkdir(dest, parents=True)
# use absolute pathnames from now on
abs_dest = os.path.abspath(dest)
# change working directory
_log.debug("Unpacking %s in directory %s", fn, abs_dest)
cwd = change_dir(abs_dest)
if not cmd:
cmd = extract_cmd(fn, overwrite=overwrite)
else:
# complete command template with filename
cmd = cmd % fn
if not cmd:
raise EasyBuildError("Can't extract file %s with unknown filetype", fn)
if extra_options:
cmd = "%s %s" % (cmd, extra_options)
run.run_cmd(cmd, simple=True, force_in_dry_run=forced)
# note: find_base_dir also changes into the base dir!
base_dir = find_base_dir()
# if changing into obtained directory is not desired,
# change back to where we came from (unless that was a non-existing directory)
if not change_into_dir:
if cwd is None:
raise EasyBuildError("Can't change back to non-existing directory after extracting %s in %s", fn, dest)
else:
change_dir(cwd)
return base_dir
def which(cmd, retain_all=False, check_perms=True, log_ok=True, log_error=True):
"""
Return (first) path in $PATH for specified command, or None if command is not found
:param retain_all: returns *all* locations to the specified command in $PATH, not just the first one
:param check_perms: check whether candidate path has read/exec permissions before accepting it as a match
:param log_ok: Log an info message where the command has been found (if any)
:param log_error: Log a warning message when command hasn't been found
"""
if retain_all:
res = []
else:
res = None
paths = os.environ.get('PATH', '').split(os.pathsep)
for path in paths:
cmd_path = os.path.join(path, cmd)
# only accept path if command is there
if os.path.isfile(cmd_path):
if log_ok:
_log.info("Command %s found at %s", cmd, cmd_path)
if check_perms:
# check if read/executable permissions are available
if not os.access(cmd_path, os.R_OK | os.X_OK):
_log.info("No read/exec permissions for %s, so continuing search...", cmd_path)
continue
if retain_all:
res.append(cmd_path)
else:
res = cmd_path
break
if not res and log_error:
_log.warning("Could not find command '%s' (with permissions to read/execute it) in $PATH (%s)" % (cmd, paths))
return res
def det_common_path_prefix(paths):
"""Determine common path prefix for a given list of paths."""
if not isinstance(paths, list):
raise EasyBuildError("det_common_path_prefix: argument must be of type list (got %s: %s)", type(paths), paths)
elif not paths:
return None
# initial guess for common prefix
prefix = paths[0]
found_common = False
while not found_common and prefix != os.path.dirname(prefix):
prefix = os.path.dirname(prefix)
found_common = all([p.startswith(prefix) for p in paths])
if found_common:
# prefix may be empty string for relative paths with a non-common prefix
return prefix.rstrip(os.path.sep) or None
else:
return None
def is_alt_pypi_url(url):
"""Determine whether specified URL is already an alternate PyPI URL, i.e. whether it contains a hash."""
# example: .../packages/5b/03/e135b19fadeb9b1ccb45eac9f60ca2dc3afe72d099f6bd84e03cb131f9bf/easybuild-2.7.0.tar.gz
alt_url_regex = re.compile('/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/[^/]+$')
res = bool(alt_url_regex.search(url))
_log.debug("Checking whether '%s' is an alternate PyPI URL using pattern '%s'...: %s",
url, alt_url_regex.pattern, res)
return res
def pypi_source_urls(pkg_name):
"""
Fetch list of source URLs (incl. source filename) for specified Python package from PyPI, using 'simple' PyPI API.
"""
# example: https://pypi.python.org/simple/easybuild
# see also:
# - https://www.python.org/dev/peps/pep-0503/
# - https://wiki.python.org/moin/PyPISimple
simple_url = 'https://pypi.python.org/simple/%s' % re.sub(r'[-_.]+', '-', pkg_name.lower())
tmpdir = tempfile.mkdtemp()
urls_html = os.path.join(tmpdir, '%s_urls.html' % pkg_name)
if download_file(os.path.basename(urls_html), simple_url, urls_html) is None:
_log.debug("Failed to download %s to determine available PyPI URLs for %s", simple_url, pkg_name)
res = []
else:
urls_txt = read_file(urls_html)
res = []
# note: don't use xml.etree.ElementTree to parse HTML page served by PyPI's simple API
# cfr. https://github.com/pypa/warehouse/issues/7886
class HrefHTMLParser(HTMLParser):
"""HTML parser to extract 'href' attribute values from anchor tags (<a href='...'>)."""
def handle_starttag(self, tag, attrs):
if tag == 'a':
attrs = dict(attrs)
if 'href' in attrs:
res.append(attrs['href'])
parser = HrefHTMLParser()
parser.feed(urls_txt)
# links are relative, transform them into full URLs; for example:
# from: ../../packages/<dir1>/<dir2>/<hash>/easybuild-<version>.tar.gz#md5=<md5>
# to: https://pypi.python.org/packages/<dir1>/<dir2>/<hash>/easybuild-<version>.tar.gz#md5=<md5>
res = [re.sub('.*/packages/', 'https://pypi.python.org/packages/', x) for x in res]
return res
def derive_alt_pypi_url(url):
"""Derive alternate PyPI URL for given URL."""
alt_pypi_url = None
# example input URL: https://pypi.python.org/packages/source/e/easybuild/easybuild-2.7.0.tar.gz
pkg_name, pkg_source = url.strip().split('/')[-2:]
cand_urls = pypi_source_urls(pkg_name)
# md5 for old PyPI, sha256 for new PyPi (Warehouse)
regex = re.compile('.*/%s(?:#md5=[a-f0-9]{32}|#sha256=[a-f0-9]{64})$' % pkg_source.replace('.', '\\.'), re.M)
for cand_url in cand_urls:
res = regex.match(cand_url)
if res:
# e.g.: https://pypi.python.org/packages/<dir1>/<dir2>/<hash>/easybuild-<version>.tar.gz#md5=<md5>
alt_pypi_url = res.group(0).split('#sha256')[0].split('#md5')[0]
break
if not alt_pypi_url:
_log.debug("Failed to extract hash using pattern '%s' from list of URLs: %s", regex.pattern, cand_urls)
return alt_pypi_url
def download_file(filename, url, path, forced=False):
"""Download a file from the given URL, to the specified path."""
_log.debug("Trying to download %s from %s to %s", filename, url, path)
timeout = build_option('download_timeout')
if timeout is None:
# default to 10sec timeout if none was specified
# default system timeout (used is nothing is specified) may be infinite (?)
timeout = 10
_log.debug("Using timeout of %s seconds for initiating download" % timeout)
# make sure directory exists
basedir = os.path.dirname(path)
mkdir(basedir, parents=True)
# try downloading, three times max.
downloaded = False
max_attempts = 3
attempt_cnt = 0
# use custom HTTP header
headers = {'User-Agent': 'EasyBuild', 'Accept': '*/*'}
# for backward compatibility, and to avoid relying on 3rd party Python library 'requests'
url_req = std_urllib.Request(url, headers=headers)
used_urllib = std_urllib
switch_to_requests = False
while not downloaded and attempt_cnt < max_attempts:
attempt_cnt += 1
try:
if used_urllib is std_urllib:
# urllib2 (Python 2) / urllib.request (Python 3) does the right thing for http proxy setups,
# urllib does not!
url_fd = std_urllib.urlopen(url_req, timeout=timeout)
status_code = url_fd.getcode()
else:
response = requests.get(url, headers=headers, stream=True, timeout=timeout)
status_code = response.status_code
response.raise_for_status()
url_fd = response.raw
url_fd.decode_content = True
_log.debug('response code for given url %s: %s' % (url, status_code))
write_file(path, url_fd.read(), forced=forced, backup=True)
_log.info("Downloaded file %s from url %s to %s" % (filename, url, path))
downloaded = True
url_fd.close()
except used_urllib.HTTPError as err:
if used_urllib is std_urllib:
status_code = err.code
if status_code == 403 and attempt_cnt == 1:
switch_to_requests = True
elif 400 <= status_code <= 499:
_log.warning("URL %s was not found (HTTP response code %s), not trying again" % (url, status_code))
break
else:
_log.warning("HTTPError occurred while trying to download %s to %s: %s" % (url, path, err))
except IOError as err:
_log.warning("IOError occurred while trying to download %s to %s: %s" % (url, path, err))
error_re = re.compile(r"<urlopen error \[Errno 1\] _ssl.c:.*: error:.*:"
"SSL routines:SSL23_GET_SERVER_HELLO:sslv3 alert handshake failure>")
if error_re.match(str(err)):
switch_to_requests = True
except Exception as err:
raise EasyBuildError("Unexpected error occurred when trying to download %s to %s: %s", url, path, err)
if not downloaded and attempt_cnt < max_attempts:
_log.info("Attempt %d of downloading %s to %s failed, trying again..." % (attempt_cnt, url, path))
if used_urllib is std_urllib and switch_to_requests:
if not HAVE_REQUESTS:
raise EasyBuildError("SSL issues with urllib2. If you are using RHEL/CentOS 6.x please "
"install the python-requests and pyOpenSSL RPM packages and try again.")
_log.info("Downloading using requests package instead of urllib2")
used_urllib = requests
if downloaded:
_log.info("Successful download of file %s from url %s to path %s" % (filename, url, path))
return path
else:
_log.warning("Download of %s to %s failed, done trying" % (url, path))
return None
def create_index(path, ignore_dirs=None):
"""
Create index for files in specified path.
"""
if ignore_dirs is None:
ignore_dirs = []
index = set()
if not os.path.exists(path):
raise EasyBuildError("Specified path does not exist: %s", path)
elif not os.path.isdir(path):
raise EasyBuildError("Specified path is not a directory: %s", path)
for (dirpath, dirnames, filenames) in os.walk(path, topdown=True, followlinks=True):
for filename in filenames:
# use relative paths in index
rel_dirpath = os.path.relpath(dirpath, path)
# avoid that relative paths start with './'
if rel_dirpath == '.':
rel_dirpath = ''
index.add(os.path.join(rel_dirpath, filename))
# do not consider (certain) hidden directories
# note: we still need to consider e.g., .local !
# replace list elements using [:], so os.walk doesn't process deleted directories
# see https://stackoverflow.com/questions/13454164/os-walk-without-hidden-folders
dirnames[:] = [d for d in dirnames if d not in ignore_dirs]
return index
def dump_index(path, max_age_sec=None):
"""
Create index for files in specified path, and dump it to file (alphabetically sorted).
"""
if max_age_sec is None:
max_age_sec = build_option('index_max_age')
index_fp = os.path.join(path, PATH_INDEX_FILENAME)
index_contents = create_index(path)
curr_ts = datetime.datetime.now()
if max_age_sec == 0:
end_ts = datetime.datetime.max
else:
end_ts = curr_ts + datetime.timedelta(0, max_age_sec)
lines = [
"# created at: %s" % str(curr_ts),
"# valid until: %s" % str(end_ts),
]
lines.extend(sorted(index_contents))
write_file(index_fp, '\n'.join(lines), always_overwrite=False)
return index_fp
def load_index(path, ignore_dirs=None):
"""
Load index for specified path, and return contents (or None if no index exists).
"""
if ignore_dirs is None:
ignore_dirs = []
index_fp = os.path.join(path, PATH_INDEX_FILENAME)
index = set()
if build_option('ignore_index'):
_log.info("Ignoring index for %s...", path)
elif os.path.exists(index_fp):
lines = read_file(index_fp).splitlines()
valid_ts_regex = re.compile("^# valid until: (.*)", re.M)
valid_ts = None
for line in lines:
# extract "valid until" timestamp, so we can check whether index is still valid
if valid_ts is None:
res = valid_ts_regex.match(line)
else:
res = None
if res:
valid_ts = res.group(1)
try:
valid_ts = datetime.datetime.strptime(valid_ts, '%Y-%m-%d %H:%M:%S.%f')
except ValueError as err:
raise EasyBuildError("Failed to parse timestamp '%s' for index at %s: %s", valid_ts, path, err)
elif line.startswith('#'):
_log.info("Ignoring unknown header line '%s' in index for %s", line, path)
else:
# filter out files that are in an ignored directory
path_dirs = line.split(os.path.sep)[:-1]
if not any(d in path_dirs for d in ignore_dirs):
index.add(line)
# check whether index is still valid
if valid_ts:
curr_ts = datetime.datetime.now()
if curr_ts > valid_ts:
print_warning("Index for %s is no longer valid (too old), so ignoring it...", path)
index = None
else:
print_msg("found valid index for %s, so using it...", path)
return index or None
def find_easyconfigs(path, ignore_dirs=None):
"""
Find .eb easyconfig files in path
"""
if os.path.isfile(path):
return [path]
if ignore_dirs is None:
ignore_dirs = []
# walk through the start directory, retain all files that end in .eb
files = []
path = os.path.abspath(path)
for dirpath, dirnames, filenames in os.walk(path, topdown=True):
for f in filenames:
if not f.endswith('.eb') or f == 'TEMPLATE.eb':
continue
spec = os.path.join(dirpath, f)
_log.debug("Found easyconfig %s" % spec)
files.append(spec)
# ignore subdirs specified to be ignored by replacing items in dirnames list used by os.walk
dirnames[:] = [d for d in dirnames if d not in ignore_dirs]
return files
def find_glob_pattern(glob_pattern, fail_on_no_match=True):
"""Find unique file/dir matching glob_pattern (raises error if more than one match is found)"""
if build_option('extended_dry_run'):
return glob_pattern
res = glob.glob(glob_pattern)
if len(res) == 0 and not fail_on_no_match:
return None
if len(res) != 1:
raise EasyBuildError("Was expecting exactly one match for '%s', found %d: %s", glob_pattern, len(res), res)
return res[0]
def search_file(paths, query, short=False, ignore_dirs=None, silent=False, filename_only=False, terse=False,
case_sensitive=False):
"""
Search for files using in specified paths using specified search query (regular expression)
:param paths: list of paths to search in
:param query: search query to use (regular expression); will be used case-insensitive
:param short: figure out common prefix of hits, use variable to factor it out
:param ignore_dirs: list of directories to ignore (default: ['.git', '.svn'])
:param silent: whether or not to remain silent (don't print anything)
:param filename_only: only return filenames, not file paths
:param terse: stick to terse (machine-readable) output, as opposed to pretty-printing
"""
if ignore_dirs is None:
ignore_dirs = ['.git', '.svn']
if not isinstance(ignore_dirs, list):
raise EasyBuildError("search_file: ignore_dirs (%s) should be of type list, not %s",
ignore_dirs, type(ignore_dirs))
# escape some special characters in query that may also occur in actual software names: +
# do not use re.escape, since that breaks queries with genuine regex characters like ^ or .*
query = re.sub('([+])', r'\\\1', query)
# compile regex, case-insensitive
try:
if case_sensitive:
query = re.compile(query)
else:
# compile regex, case-insensitive
query = re.compile(query, re.I)
except re.error as err:
raise EasyBuildError("Invalid search query: %s", err)
var_defs = []
hits = []
var_index = 1
var = None
for path in paths:
path_hits = []
if not terse:
print_msg("Searching (case-insensitive) for '%s' in %s " % (query.pattern, path), log=_log, silent=silent)
path_index = load_index(path, ignore_dirs=ignore_dirs)
if path_index is None or build_option('ignore_index'):
if os.path.exists(path):
_log.info("No index found for %s, creating one...", path)
path_index = create_index(path, ignore_dirs=ignore_dirs)
else:
path_index = []
else:
_log.info("Index found for %s, so using it...", path)
for filepath in path_index:
filename = os.path.basename(filepath)
if query.search(filename):
if not path_hits:
var = "CFGS%d" % var_index
var_index += 1
if filename_only:
path_hits.append(filename)
else:
path_hits.append(os.path.join(path, filepath))
path_hits = sorted(path_hits)
if path_hits:
common_prefix = det_common_path_prefix(path_hits)
if not terse and short and common_prefix is not None and len(common_prefix) > len(var) * 2:
var_defs.append((var, common_prefix))
hits.extend([os.path.join('$%s' % var, fn[len(common_prefix) + 1:]) for fn in path_hits])
else:
hits.extend(path_hits)
return var_defs, hits
def dir_contains_files(path):
"""Return True if the given directory does contain any file in itself or any subdirectory"""
return any(files for _root, _dirs, files in os.walk(path))
def find_eb_script(script_name):
"""Find EasyBuild script with given name (in easybuild/scripts subdirectory)."""
filetools, eb_dir = __file__, None
if os.path.isabs(filetools):
eb_dir = os.path.dirname(os.path.dirname(filetools))
else:
# go hunting for absolute path to filetools module via sys.path;
# we can't rely on os.path.abspath or os.path.realpath, since they leverage os.getcwd()...
for path in sys.path:
path = os.path.abspath(path)
if os.path.exists(os.path.join(path, filetools)):
eb_dir = os.path.dirname(os.path.dirname(os.path.join(path, filetools)))
break
if eb_dir is None:
raise EasyBuildError("Failed to find parent directory for 'easybuild/scripts' subdirectory")
script_loc = os.path.join(eb_dir, 'scripts', script_name)
if not os.path.exists(script_loc):
prev_script_loc = script_loc
# fallback mechanism: check in location relative to location of 'eb'
eb_path = os.getenv('EB_SCRIPT_PATH') or which('eb')
if eb_path is None:
_log.warning("'eb' not found in $PATH, failed to determine installation prefix")
else:
install_prefix = os.path.dirname(os.path.dirname(resolve_path(eb_path)))
script_loc = os.path.join(install_prefix, 'easybuild', 'scripts', script_name)
if not os.path.exists(script_loc):
raise EasyBuildError("Script '%s' not found at expected location: %s or %s",
script_name, prev_script_loc, script_loc)
return script_loc
def compute_checksum(path, checksum_type=DEFAULT_CHECKSUM):
"""
Compute checksum of specified file.
:param path: Path of file to compute checksum for
:param checksum_type: type(s) of checksum ('adler32', 'crc32', 'md5' (default), 'sha1', 'sha256', 'sha512', 'size')
"""
if checksum_type not in CHECKSUM_FUNCTIONS:
raise EasyBuildError("Unknown checksum type (%s), supported types are: %s",
checksum_type, CHECKSUM_FUNCTIONS.keys())
try:
checksum = CHECKSUM_FUNCTIONS[checksum_type](path)
except IOError as err:
raise EasyBuildError("Failed to read %s: %s", path, err)
except MemoryError as err:
_log.warning("A memory error occurred when computing the checksum for %s: %s" % (path, err))
checksum = 'dummy_checksum_due_to_memory_error'
return checksum
def calc_block_checksum(path, algorithm):
"""Calculate a checksum of a file by reading it into blocks"""
# We pick a blocksize of 16 MB: it's a multiple of the internal
# blocksize of md5/sha1 (64) and gave the best speed results
try:
# in hashlib, blocksize is a class parameter
blocksize = algorithm.blocksize * 262144 # 2^18
except AttributeError:
blocksize = 16777216 # 2^24
_log.debug("Using blocksize %s for calculating the checksum" % blocksize)
try:
f = open(path, 'rb')
for block in iter(lambda: f.read(blocksize), b''):
algorithm.update(block)
f.close()
except IOError as err:
raise EasyBuildError("Failed to read %s: %s", path, err)
return algorithm.hexdigest()
def verify_checksum(path, checksums):
"""
Verify checksum of specified file.
:param file: path of file to verify checksum of
:param checksum: checksum value (and type, optionally, default is MD5), e.g., 'af314', ('sha', '5ec1b')
"""
filename = os.path.basename(path)
# if no checksum is provided, pretend checksum to be valid, unless presence of checksums to verify is enforced
if checksums is None:
if build_option('enforce_checksums'):
raise EasyBuildError("Missing checksum for %s", filename)
else:
return True
# make sure we have a list of checksums
if not isinstance(checksums, list):
checksums = [checksums]
for checksum in checksums:
if isinstance(checksum, dict):
if filename in checksum:
# Set this to a string-type checksum
checksum = checksum[filename]
elif build_option('enforce_checksums'):
raise EasyBuildError("Missing checksum for %s", filename)
else:
# Set to None and allow to fail elsewhere
checksum = None
if isinstance(checksum, string_type):