From fe64f2063578a47f5b0a9e3c4b7ea2f02b7f5757 Mon Sep 17 00:00:00 2001 From: Heli Aldridge Date: Fri, 14 Jun 2024 10:18:29 -0400 Subject: [PATCH] DOP-4086: mut-redirects: Warn on redirects with invalid redirect targets (#73) * DOP-4086: mut-redirects: Warn on redirects with invalid redirect targets * Test and release with Python 3.12 to fix macos CI * Allow target redirects to not have a scheme if they have an absolute path component * Update mut/redirects/redirect_main.py Co-authored-by: Allison Reinheimer Moore --------- Co-authored-by: Allison Reinheimer Moore --- .github/workflows/release.yml | 2 +- .github/workflows/test.yml | 2 +- mut/redirects/redirect_main.py | 28 ++++++++++++++++++++++++---- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 767c6c6..048726b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: '3.12' architecture: 'x64' - name: Setup poetry run: python3 -m pip install poetry diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index de9b063..6a48ad5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: platform: [ubuntu-20.04, macos-latest] - python-version: ['3.8', '3.12'] + python-version: ['3.12'] runs-on: ${{ matrix.platform }} steps: - uses: actions/checkout@v3 diff --git a/mut/redirects/redirect_main.py b/mut/redirects/redirect_main.py index 3e79844..a3413ab 100755 --- a/mut/redirects/redirect_main.py +++ b/mut/redirects/redirect_main.py @@ -14,6 +14,7 @@ import os import re import sys +import urllib.parse from typing import List, Optional, Dict, Tuple, Pattern, IO from docopt import docopt @@ -52,6 +53,16 @@ def generate_rule( old_url_sub = self.rule_substitute(old_url, version) new_url_sub = self.rule_substitute(new_url, version) + # S3 redirects must either have an HTTP scheme *or* have an absolute path + parsed_new_url = urllib.parse.urlparse(new_url_sub) + if parsed_new_url.scheme not in { + "http", + "https", + } and not parsed_new_url.path.startswith("/"): + raise ValueError( + f"Invalid redirect target: '{new_url_sub}'. Redirect targets must be absolute HTTP URLs." + ) + # reformatting the old url if len(old_url_sub) > 0: if old_url_sub[0] != "/": @@ -60,7 +71,7 @@ def generate_rule( new_rule = RuleDefinition(is_temp, version, old_url_sub, new_url_sub, False) # check for symlinks - if len(self.symlinks) > 0 and version is not "raw": + if len(self.symlinks) > 0 and version != "raw": for symlink in self.symlinks: if version == symlink[1]: self.generate_rule(is_temp, symlink[0], old_url, new_url, True) @@ -332,7 +343,8 @@ def parse_line( rc.generate_rule(is_temp, version, old_url, new_url) -def parse_source_file(source_path: str, output: Optional[str]) -> None: +def parse_source_file(source_path: str, output: Optional[str]) -> bool: + have_error = False version_regex = re.compile(r"([\[\(])([\w.\*]+)(?:-([\w.\*]+))?([\]\)](.))") url_regex = re.compile(r":(?:[ \t\f\v])(.*)(?:[ \t\f\v]->)(.*)") @@ -345,7 +357,12 @@ def parse_source_file(source_path: str, output: Optional[str]) -> None: for line_num, line in enumerate(file, start=1): if not line or line.startswith("#"): continue - parse_line(line, rc, line_num, version_regex, url_regex) + + try: + parse_line(line, rc, line_num, version_regex, url_regex) + except ValueError as err: + have_error = True + print(f"{line_num}: {str(err)}", file=sys.stderr) # Remove unknown symlinks if root is not None: @@ -368,6 +385,8 @@ def parse_source_file(source_path: str, output: Optional[str]) -> None: with open(output, "w") as f: write_to_file(rc.rules, f) + return have_error + def main() -> None: """Main entry point for mut redirects to create .htaccess file.""" @@ -376,7 +395,8 @@ def main() -> None: output = options["--output"] # Parse source_path and write to file - parse_source_file(source_path, output) + if parse_source_file(source_path, output): + sys.exit(1) if __name__ == "__main__":