diff --git a/CHANGELOG.md b/CHANGELOG.md index a03a1c60..de83826e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 6.19.2 - Jun 7, 2024 + +* Turn off archiving processed realtime bills by default. + ## 6.19.0 - April 5, 2024 * Adds a new CLI tool that can be called to resolve unresolved bill-to-bill relationships in the openstates DB diff --git a/openstates/cli/update.py b/openstates/cli/update.py index 21ffc9cd..aa5529d0 100644 --- a/openstates/cli/update.py +++ b/openstates/cli/update.py @@ -83,6 +83,7 @@ def do_scrape( strict_validation=args.strict, fastmode=args.fastmode, realtime=args.realtime, + file_archiving_enabled=args.archive, ) report["jurisdiction"] = jscraper.do_scrape() stats.write_stats( @@ -120,6 +121,7 @@ def do_scrape( strict_validation=args.strict, fastmode=args.fastmode, realtime=args.realtime, + file_archiving_enabled=args.archive, ) partial_report = scraper.do_scrape(**scrape_args, session=session) stats.write_stats( @@ -152,6 +154,7 @@ def do_scrape( strict_validation=args.strict, fastmode=args.fastmode, realtime=args.realtime, + file_archiving_enabled=args.archive, ) report[scraper_name] = scraper.do_scrape(**scrape_args) session = scrape_args.get("session", "") @@ -499,6 +502,13 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]: # realtime mode parser.add_argument("--realtime", action="store_true", help="enable realtime mode") + # Archiving realtime processing JSON files + parser.add_argument( + "--archive", + action="store_true", + help="enable archiving of realtime processing JSON files, defaults to false", + ) + # process args return parser.parse_known_args() diff --git a/openstates/scrape/base.py b/openstates/scrape/base.py index b340acec..ea79f087 100644 --- a/openstates/scrape/base.py +++ b/openstates/scrape/base.py @@ -77,6 +77,7 @@ def __init__( strict_validation=True, fastmode=False, realtime=False, + file_archiving_enabled=False, ): super(Scraper, self).__init__() @@ -84,6 +85,7 @@ def __init__( self.jurisdiction = jurisdiction self.datadir = datadir self.realtime = realtime + self.file_archiving_enabled = file_archiving_enabled # scrapelib setup self.timeout = settings.SCRAPELIB_TIMEOUT @@ -139,6 +141,7 @@ def push_to_queue(self): "bucket": bucket, "jurisdiction_id": self.jurisdiction.jurisdiction_id, "jurisdiction_name": self.jurisdiction.name, + "file_archiving_enabled": self.file_archiving_enabled, } ) @@ -183,7 +186,7 @@ def save_object(self, obj): # Remove redundant prefix try: upload_file_path = file_path[ - file_path.index("_data") + len("_data") + 1: + file_path.index("_data") + len("_data") + 1 : ] except Exception: upload_file_path = file_path diff --git a/pyproject.toml b/pyproject.toml index 7cc5e417..f8c2f5fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openstates" -version = "6.19.1" +version = "6.19.2" description = "core infrastructure for the openstates project" authors = ["James Turk "] license = "MIT"