From 6b0e0628675d2acadb3f85ad8d78ebaefff73996 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Mon, 14 Oct 2019 17:47:45 +0900 Subject: [PATCH] fix file preambles for Python scripts (#357) * fix file preambles for Python scripts * respond to review comments --- integration-tests/test_184.py | 7 +++ integration-tests/test_207.py | 7 +++ integration-tests/test_209.py | 8 +++ integration-tests/test_hdfs.py | 11 +++- integration-tests/test_http.py | 6 +- integration-tests/test_minio.py | 7 +++ integration-tests/test_s3.py | 6 ++ integration-tests/test_webhdfs.py | 11 +++- release/check_preamble.py | 82 +++++++++++++++++++++++++ smart_open/__init__.py | 1 - smart_open/bytebuffer.py | 6 ++ smart_open/doctools.py | 4 +- smart_open/hdfs.py | 4 +- smart_open/http.py | 6 ++ smart_open/s3.py | 6 ++ smart_open/smart_open_lib.py | 3 +- smart_open/ssh.py | 4 +- smart_open/tests/__init__.py | 7 +++ smart_open/tests/test_bytebuffer.py | 6 ++ smart_open/tests/test_hdfs.py | 6 ++ smart_open/tests/test_http.py | 7 +++ smart_open/tests/test_s3.py | 6 ++ smart_open/tests/test_smart_open.py | 4 +- smart_open/tests/test_smart_open_old.py | 13 ++-- smart_open/webhdfs.py | 4 +- 25 files changed, 208 insertions(+), 24 deletions(-) create mode 100644 release/check_preamble.py diff --git a/integration-tests/test_184.py b/integration-tests/test_184.py index 992cd501..d58a9bdb 100644 --- a/integration-tests/test_184.py +++ b/integration-tests/test_184.py @@ -1,3 +1,10 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# import sys import time diff --git a/integration-tests/test_207.py b/integration-tests/test_207.py index 2621ed81..ac07c976 100644 --- a/integration-tests/test_207.py +++ b/integration-tests/test_207.py @@ -1,3 +1,10 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# import os import sys import tempfile diff --git a/integration-tests/test_209.py b/integration-tests/test_209.py index fecc33a2..03f44e17 100644 --- a/integration-tests/test_209.py +++ b/integration-tests/test_209.py @@ -1,3 +1,11 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# +import io import json import logging import os diff --git a/integration-tests/test_hdfs.py b/integration-tests/test_hdfs.py index 59a00941..f4386627 100644 --- a/integration-tests/test_hdfs.py +++ b/integration-tests/test_hdfs.py @@ -1,7 +1,14 @@ +# -*- coding: utf-8 -*- # -# Sample code for HDFS integration tests. -# Requires hadoop to be running on localhost, at the moment. +# Copyright (C) 2019 Radim Rehurek # +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# +""" +Sample code for HDFS integration tests. +Requires hadoop to be running on localhost, at the moment. +""" import smart_open with smart_open.smart_open("hdfs://user/root/input/core-site.xml") as fin: diff --git a/integration-tests/test_http.py b/integration-tests/test_http.py index ada70d2d..c8097c64 100644 --- a/integration-tests/test_http.py +++ b/integration-tests/test_http.py @@ -1,6 +1,10 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# from __future__ import unicode_literals import logging diff --git a/integration-tests/test_minio.py b/integration-tests/test_minio.py index 902aff3f..fa6c7752 100644 --- a/integration-tests/test_minio.py +++ b/integration-tests/test_minio.py @@ -1,3 +1,10 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# import logging import boto3 diff --git a/integration-tests/test_s3.py b/integration-tests/test_s3.py index cd18157f..78459af9 100644 --- a/integration-tests/test_s3.py +++ b/integration-tests/test_s3.py @@ -1,4 +1,10 @@ # -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# from __future__ import unicode_literals import io diff --git a/integration-tests/test_webhdfs.py b/integration-tests/test_webhdfs.py index 61c38f9e..ec029756 100644 --- a/integration-tests/test_webhdfs.py +++ b/integration-tests/test_webhdfs.py @@ -1,7 +1,14 @@ +# -*- coding: utf-8 -*- # -# Sample code for WebHDFS integration tests. -# Requires hadoop to be running on localhost, at the moment. +# Copyright (C) 2019 Radim Rehurek # +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# +""" +Sample code for WebHDFS integration tests. +Requires hadoop to be running on localhost, at the moment. +""" import smart_open with smart_open.smart_open("webhdfs://localhost:50070/user/root/input/core-site.xml") as fin: diff --git a/release/check_preamble.py b/release/check_preamble.py new file mode 100644 index 00000000..2f506583 --- /dev/null +++ b/release/check_preamble.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# + +"""Checks preambles of Python script files. + +We want to ensure they all contain the appropriate license and copyright. + +For the purposes of this script, the *preamble* is defined as the first +lines of the file starting with a hash (#). Any line that does not start +with a hash ends the preamble. + +Usage:: + + python check_preamble.py --replace /path/to/template.py script.py + +The above command reads the preamble from ``template.py``, and then copies +that preamble into ``script.py``. If ``script.py`` already contains a +preamble, then the existing preamble will be replaced **entirely**. + +Processing entire subdirectories with one command:: + + find subdir1 subdir2 -iname "*.py" | xargs -n 1 python check_preamble.py --replace template.py + +""" +import argparse +import logging +import os +import sys + + +def extract_preamble(fin): + end_preamble = False + preamble, body = [], [] + + for line in fin: + if end_preamble: + body.append(line) + elif line.startswith('#'): + preamble.append(line) + else: + end_preamble = True + body.append(line) + + return preamble, body + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('path', help='the path of the file to check') + parser.add_argument('--replace', help='replace the preamble with the one from this file') + parser.add_argument('--loglevel', default=logging.INFO) + args = parser.parse_args() + + logging.basicConfig(level=args.loglevel) + + with open(args.path) as fin: + preamble, body = extract_preamble(fin) + + for line in preamble: + logging.info('%s: %s', args.path, line.rstrip()) + + if not args.replace: + sys.exit(0) + + with open(args.replace) as fin: + preamble, _ = extract_preamble(fin) + + if os.access(args.path, os.X_OK): + preamble.insert(0, '#!/usr/bin/env python\n') + + with open(args.path, 'w') as fout: + for line in preamble + body: + fout.write(line) + + +if __name__ == '__main__': + main() diff --git a/smart_open/__init__.py b/smart_open/__init__.py index ed52b28d..3d7ed155 100644 --- a/smart_open/__init__.py +++ b/smart_open/__init__.py @@ -1,4 +1,3 @@ -# # -*- coding: utf-8 -*- # # Copyright (C) 2019 Radim Rehurek diff --git a/smart_open/bytebuffer.py b/smart_open/bytebuffer.py index f056a8e0..8df12435 100644 --- a/smart_open/bytebuffer.py +++ b/smart_open/bytebuffer.py @@ -1,4 +1,10 @@ # -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# """Implements ByteBuffer class for amortizing network transfer overhead.""" import io diff --git a/smart_open/doctools.py b/smart_open/doctools.py index 3195cf17..1906ee0b 100644 --- a/smart_open/doctools.py +++ b/smart_open/doctools.py @@ -1,9 +1,9 @@ -# # -*- coding: utf-8 -*- # # Copyright (C) 2019 Radim Rehurek # -# This code is distributed under the terms and conditions from the MIT License (MIT). +# This code is distributed under the terms and conditions +# from the MIT License (MIT). # """Common functions for working with docstrings. diff --git a/smart_open/hdfs.py b/smart_open/hdfs.py index f94612dc..2485685f 100644 --- a/smart_open/hdfs.py +++ b/smart_open/hdfs.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright (C) 2019 Radim Rehurek # -# This code is distributed under the terms and conditions from the MIT License (MIT). +# This code is distributed under the terms and conditions +# from the MIT License (MIT). # """Implements reading and writing to/from HDFS. diff --git a/smart_open/http.py b/smart_open/http.py index 90f65ec1..7530a942 100644 --- a/smart_open/http.py +++ b/smart_open/http.py @@ -1,4 +1,10 @@ # -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# """Implements file-like objects for reading from http.""" import io diff --git a/smart_open/s3.py b/smart_open/s3.py index e720e481..ba631317 100644 --- a/smart_open/s3.py +++ b/smart_open/s3.py @@ -1,4 +1,10 @@ # -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# """Implements file-like objects for reading and writing from/to S3.""" import io diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 9eab2b52..14192e96 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -1,7 +1,6 @@ -# # -*- coding: utf-8 -*- # -# Copyright (C) 2015 Radim Rehurek +# Copyright (C) 2019 Radim Rehurek # # This code is distributed under the terms and conditions # from the MIT License (MIT). diff --git a/smart_open/ssh.py b/smart_open/ssh.py index 557fc355..11de796e 100644 --- a/smart_open/ssh.py +++ b/smart_open/ssh.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright (C) 2019 Radim Rehurek # -# This code is distributed under the terms and conditions from the MIT License (MIT). +# This code is distributed under the terms and conditions +# from the MIT License (MIT). # """Implements I/O streams over SSH. diff --git a/smart_open/tests/__init__.py b/smart_open/tests/__init__.py index e69de29b..32080917 100644 --- a/smart_open/tests/__init__.py +++ b/smart_open/tests/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# diff --git a/smart_open/tests/test_bytebuffer.py b/smart_open/tests/test_bytebuffer.py index 21d98ed0..5f518a7f 100644 --- a/smart_open/tests/test_bytebuffer.py +++ b/smart_open/tests/test_bytebuffer.py @@ -1,4 +1,10 @@ # -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# import random import unittest diff --git a/smart_open/tests/test_hdfs.py b/smart_open/tests/test_hdfs.py index 8b68128a..c0fb8aab 100644 --- a/smart_open/tests/test_hdfs.py +++ b/smart_open/tests/test_hdfs.py @@ -1,4 +1,10 @@ # -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# from __future__ import print_function from __future__ import unicode_literals diff --git a/smart_open/tests/test_http.py b/smart_open/tests/test_http.py index c8fe86af..e61e9dea 100644 --- a/smart_open/tests/test_http.py +++ b/smart_open/tests/test_http.py @@ -1,3 +1,10 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# import unittest import responses diff --git a/smart_open/tests/test_s3.py b/smart_open/tests/test_s3.py index 3c3abdc9..c213a7cb 100644 --- a/smart_open/tests/test_s3.py +++ b/smart_open/tests/test_s3.py @@ -1,4 +1,10 @@ # -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# import gzip import io import logging diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 8d95baa8..cdb82112 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -1,10 +1,10 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2015 Radim Rehurek +# Copyright (C) 2019 Radim Rehurek # # This code is distributed under the terms and conditions # from the MIT License (MIT). +# import bz2 import io diff --git a/smart_open/tests/test_smart_open_old.py b/smart_open/tests/test_smart_open_old.py index 90647448..1bdc668f 100644 --- a/smart_open/tests/test_smart_open_old.py +++ b/smart_open/tests/test_smart_open_old.py @@ -1,15 +1,16 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2015 Radim Rehurek +# Copyright (C) 2019 Radim Rehurek # # This code is distributed under the terms and conditions # from the MIT License (MIT). # -# These are tests that test the deprecated smart_open.smart_open function. -# They mostly duplicate tests in test_smart_open.py and are here to guarantee -# backwards compatibility. -# +""" +These are tests that test the deprecated smart_open.smart_open function. +They mostly duplicate tests in test_smart_open.py and are here to guarantee +backwards compatibility. +""" + import io import logging diff --git a/smart_open/webhdfs.py b/smart_open/webhdfs.py index 4193da7b..8e0442e3 100644 --- a/smart_open/webhdfs.py +++ b/smart_open/webhdfs.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright (C) 2019 Radim Rehurek # -# This code is distributed under the terms and conditions from the MIT License (MIT). +# This code is distributed under the terms and conditions +# from the MIT License (MIT). # """Implements reading and writing to/from WebHDFS.