From 7cb0cb3d22c3b2961cc2024a158243ddc3399840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Thu, 15 Aug 2024 17:35:20 -0600 Subject: [PATCH] chore: Replace black, isort and pyupgrade with Ruff --- .pre-commit-config.yaml | 18 ++----- pyproject.toml | 8 +++ tap_github/authenticator.py | 44 ++++++++-------- tap_github/client.py | 42 ++++++++------- tap_github/organization_streams.py | 12 +++-- tap_github/repository_streams.py | 72 +++++++++++++------------- tap_github/scraping.py | 14 ++--- tap_github/streams.py | 8 +-- tap_github/tap.py | 4 +- tap_github/tests/test_authenticator.py | 18 +------ tap_github/tests/test_tap.py | 4 +- tap_github/user_streams.py | 12 +++-- tap_github/utils/filter_stdout.py | 4 +- 13 files changed, 130 insertions(+), 130 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 16d3a598..2cb46b21 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,21 +14,13 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace -- repo: https://github.com/asottile/pyupgrade - rev: v3.17.0 - hooks: - - id: pyupgrade - args: [--py37-plus] - -- repo: https://github.com/psf/black - rev: 24.8.0 - hooks: - - id: black -- repo: https://github.com/pycqa/isort - rev: 5.13.2 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.0 hooks: - - id: isort + - id: ruff + args: [ --fix ] + - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.11.1 diff --git a/pyproject.toml b/pyproject.toml index 446128be..996bf58c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,3 +70,11 @@ markers = [ "repo_list: mark a test as using a list of repos in config", "username_list: mark a test as using a list of usernames in config", ] + +[tool.ruff.lint] +ignore = [] +select = [ + "I", # isort + "UP", # pyupgrade + "FA", # flake8-future-annotations +] diff --git a/tap_github/authenticator.py b/tap_github/authenticator.py index dcc169ae..cc837247 100644 --- a/tap_github/authenticator.py +++ b/tap_github/authenticator.py @@ -1,9 +1,11 @@ """Classes to assist in authenticating to the GitHub API.""" +from __future__ import annotations + import logging import time from copy import deepcopy -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta from os import environ from random import choice, shuffle from typing import Any, Dict, List, Optional, Set, Tuple @@ -27,16 +29,16 @@ class TokenManager: def __init__( self, - token: Optional[str], - rate_limit_buffer: Optional[int] = None, - logger: Optional[Any] = None, + token: str | None, + rate_limit_buffer: int | None = None, + logger: Any | None = None, ): """Init TokenManager info.""" self.token = token self.logger = logger self.rate_limit = self.DEFAULT_RATE_LIMIT self.rate_limit_remaining = self.DEFAULT_RATE_LIMIT - self.rate_limit_reset: Optional[datetime] = None + self.rate_limit_reset: datetime | None = None self.rate_limit_used = 0 self.rate_limit_buffer = ( rate_limit_buffer @@ -95,7 +97,7 @@ def has_calls_remaining(self) -> bool: class PersonalTokenManager(TokenManager): """A class to store token rate limiting information.""" - def __init__(self, token: str, rate_limit_buffer: Optional[int] = None, **kwargs): + def __init__(self, token: str, rate_limit_buffer: int | None = None, **kwargs): """Init PersonalTokenRateLimit info.""" super().__init__(token, rate_limit_buffer=rate_limit_buffer, **kwargs) @@ -124,8 +126,8 @@ def generate_jwt_token( def generate_app_access_token( github_app_id: str, github_private_key: str, - github_installation_id: Optional[str] = None, -) -> Tuple[str, datetime]: + github_installation_id: str | None = None, +) -> tuple[str, datetime]: produced_at = datetime.now() jwt_token = generate_jwt_token(github_app_id, github_private_key) @@ -143,9 +145,7 @@ def generate_app_access_token( github_installation_id = choice(list_installations)["id"] - url = "https://api.github.com/app/installations/{}/access_tokens".format( - github_installation_id - ) + url = f"https://api.github.com/app/installations/{github_installation_id}/access_tokens" resp = requests.post(url, headers=headers) if resp.status_code != 201: @@ -164,8 +164,8 @@ class AppTokenManager(TokenManager): def __init__( self, env_key: str, - rate_limit_buffer: Optional[int] = None, - expiry_time_buffer: Optional[int] = None, + rate_limit_buffer: int | None = None, + expiry_time_buffer: int | None = None, **kwargs, ): if rate_limit_buffer is None: @@ -175,15 +175,13 @@ def __init__( parts = env_key.split(";;") self.github_app_id = parts[0] self.github_private_key = (parts[1:2] or [""])[0].replace("\\n", "\n") - self.github_installation_id: Optional[str] = ( - parts[2] if len(parts) >= 3 else None - ) + self.github_installation_id: str | None = parts[2] if len(parts) >= 3 else None if expiry_time_buffer is None: expiry_time_buffer = self.DEFAULT_EXPIRY_BUFFER_MINS self.expiry_time_buffer = expiry_time_buffer - self.token_expires_at: Optional[datetime] = None + self.token_expires_at: datetime | None = None self.claim_token() def claim_token(self): @@ -247,14 +245,14 @@ class GitHubTokenAuthenticator(APIAuthenticatorBase): def get_env(): return dict(environ) - def prepare_tokens(self) -> List[TokenManager]: + def prepare_tokens(self) -> list[TokenManager]: """Prep GitHub tokens""" env_dict = self.get_env() rate_limit_buffer = self._config.get("rate_limit_buffer", None) expiry_time_buffer = self._config.get("expiry_time_buffer", None) - personal_tokens: Set[str] = set() + personal_tokens: set[str] = set() if "auth_token" in self._config: personal_tokens.add(self._config["auth_token"]) if "additional_auth_tokens" in self._config: @@ -274,7 +272,7 @@ def prepare_tokens(self) -> List[TokenManager]: ) personal_tokens = personal_tokens.union(env_tokens) - token_managers: List[TokenManager] = [] + token_managers: list[TokenManager] = [] for token in personal_tokens: token_manager = PersonalTokenManager( token, rate_limit_buffer=rate_limit_buffer, logger=self.logger @@ -315,9 +313,9 @@ def __init__(self, stream: RESTStream) -> None: super().__init__(stream=stream) self.logger: logging.Logger = stream.logger self.tap_name: str = stream.tap_name - self._config: Dict[str, Any] = dict(stream.config) + self._config: dict[str, Any] = dict(stream.config) self.token_managers = self.prepare_tokens() - self.active_token: Optional[TokenManager] = ( + self.active_token: TokenManager | None = ( choice(self.token_managers) if self.token_managers else None ) @@ -348,7 +346,7 @@ def update_rate_limit( self.active_token.update_rate_limit(response_headers) @property - def auth_headers(self) -> Dict[str, str]: + def auth_headers(self) -> dict[str, str]: """Return a dictionary of auth headers to be applied. These will be merged with any `http_headers` specified in the stream. diff --git a/tap_github/client.py b/tap_github/client.py index 7d3ed23a..ea486265 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -1,5 +1,7 @@ """REST client handling, including GitHubStream base class.""" +from __future__ import annotations + import collections import email.utils import inspect @@ -27,7 +29,7 @@ class GitHubRestStream(RESTStream): """GitHub Rest stream class.""" MAX_PER_PAGE = 100 # GitHub's limit is 100. - MAX_RESULTS_LIMIT: Optional[int] = None + MAX_RESULTS_LIMIT: int | None = None DEFAULT_API_BASE_URL = "https://api.github.com" LOG_REQUEST_METRIC_URLS = True @@ -37,7 +39,7 @@ class GitHubRestStream(RESTStream): # This only has effect on streams whose `replication_key` is `updated_at`. use_fake_since_parameter = False - _authenticator: Optional[GitHubTokenAuthenticator] = None + _authenticator: GitHubTokenAuthenticator | None = None @property def authenticator(self) -> GitHubTokenAuthenticator: @@ -50,19 +52,19 @@ def url_base(self) -> str: return self.config.get("api_url_base", self.DEFAULT_API_BASE_URL) primary_keys = ["id"] - replication_key: Optional[str] = None - tolerated_http_errors: List[int] = [] + replication_key: str | None = None + tolerated_http_errors: list[int] = [] @property - def http_headers(self) -> Dict[str, str]: + def http_headers(self) -> dict[str, str]: """Return the http headers needed.""" headers = {"Accept": "application/vnd.github.v3+json"} headers["User-Agent"] = cast(str, self.config.get("user_agent", "tap-github")) return headers def get_next_page_token( - self, response: requests.Response, previous_token: Optional[Any] - ) -> Optional[Any]: + self, response: requests.Response, previous_token: Any | None + ) -> Any | None: """Return a token for identifying next page or None if no more pages.""" if ( previous_token @@ -135,8 +137,8 @@ def get_next_page_token( return (previous_token or 1) + 1 def get_url_params( - self, context: Optional[Dict], next_page_token: Optional[Any] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: Any | None + ) -> dict[str, Any]: """Return a dictionary of values to be used in URL parameterization.""" params: dict = {"per_page": self.MAX_PER_PAGE} if next_page_token: @@ -265,7 +267,7 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: yield from results - def post_process(self, row: dict, context: Optional[Dict[str, str]] = None) -> dict: + def post_process(self, row: dict, context: dict[str, str] | None = None) -> dict: """Add `repo_id` by default to all streams.""" if context is not None and "repo_id" in context: row["repo_id"] = context["repo_id"] @@ -295,8 +297,8 @@ def calculate_sync_cost( self, request: requests.PreparedRequest, response: requests.Response, - context: Optional[dict], - ) -> Dict[str, int]: + context: dict | None, + ) -> dict[str, int]: """Return the cost of the last REST API call.""" return {"rest": 1, "graphql": 0, "search": 0} @@ -327,8 +329,8 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: yield from extract_jsonpath(self.query_jsonpath, input=resp_json) def get_next_page_token( - self, response: requests.Response, previous_token: Optional[Any] - ) -> Optional[Any]: + self, response: requests.Response, previous_token: Any | None + ) -> Any | None: """ Return a dict of cursors for identifying next page or None if no more pages. @@ -352,7 +354,7 @@ def get_next_page_token( with_keys=True, ) - has_next_page_indices: List[int] = [] + has_next_page_indices: list[int] = [] # Iterate over all the items and filter items with hasNextPage = True. for key, value in next_page_results.items(): # Check if key is even then add pair to new dictionary @@ -369,7 +371,7 @@ def get_next_page_token( # We leverage previous_token to remember the pagination cursors # for indices below max_pagination_index. - next_page_cursors: Dict[str, str] = dict() + next_page_cursors: dict[str, str] = dict() for key, value in (previous_token or {}).items(): # Only keep pagination info for indices below max_pagination_index. pagination_index = int(str(key).split("_")[1]) @@ -391,8 +393,8 @@ def get_next_page_token( return next_page_cursors def get_url_params( - self, context: Optional[Dict], next_page_token: Optional[Any] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: Any | None + ) -> dict[str, Any]: """Return a dictionary of values to be used in URL parameterization.""" params = context.copy() if context else dict() params["per_page"] = self.MAX_PER_PAGE @@ -409,8 +411,8 @@ def calculate_sync_cost( self, request: requests.PreparedRequest, response: requests.Response, - context: Optional[dict], - ) -> Dict[str, int]: + context: dict | None, + ) -> dict[str, int]: """Return the cost of the last graphql API call.""" costgen = extract_jsonpath("$.data.rateLimit.cost", input=response.json()) # calculate_sync_cost is called before the main response parsing. diff --git a/tap_github/organization_streams.py b/tap_github/organization_streams.py index b4222172..68220e05 100644 --- a/tap_github/organization_streams.py +++ b/tap_github/organization_streams.py @@ -1,5 +1,7 @@ """User Stream types classes for tap-github.""" +from __future__ import annotations + from typing import Any, Dict, Iterable, List, Optional from singer_sdk import typing as th # JSON Schema typing helpers @@ -16,15 +18,15 @@ class OrganizationStream(GitHubRestStream): path = "/orgs/{org}" @property - def partitions(self) -> Optional[List[Dict]]: + def partitions(self) -> list[dict] | None: return [{"org": org} for org in self.config["organizations"]] - def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: return { "org": record["login"], } - def get_records(self, context: Optional[Dict]) -> Iterable[Dict[str, Any]]: + def get_records(self, context: dict | None) -> Iterable[dict[str, Any]]: """ Override the parent method to allow skipping API calls if the stream is deselected and skip_parent_streams is True in config. @@ -74,7 +76,7 @@ class TeamsStream(GitHubRestStream): parent_stream_type = OrganizationStream state_partitioning_keys = ["org"] - def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: new_context = {"team_slug": record["slug"]} if context: return { @@ -129,7 +131,7 @@ class TeamMembersStream(GitHubRestStream): parent_stream_type = TeamsStream state_partitioning_keys = ["team_slug", "org"] - def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: new_context = {"username": record["login"]} if context: return { diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index fab32d60..56b6e499 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1,5 +1,7 @@ """Repository Stream types classes for tap-github.""" +from __future__ import annotations + from typing import Any, Dict, Iterable, List, Optional, Tuple from urllib.parse import parse_qs, urlparse @@ -29,8 +31,8 @@ class RepositoryStream(GitHubRestStream): replication_key = "updated_at" def get_url_params( - self, context: Optional[Dict], next_page_token: Optional[Any] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: Any | None + ) -> dict[str, Any]: """Return a dictionary of values to be used in URL parameterization.""" assert context is not None, f"Context cannot be empty for '{self.name}' stream." params = super().get_url_params(context, next_page_token) @@ -61,7 +63,7 @@ def records_jsonpath(self) -> str: # type: ignore else: return "$[*]" - def get_repo_ids(self, repo_list: List[Tuple[str]]) -> List[Dict[str, str]]: + def get_repo_ids(self, repo_list: list[tuple[str]]) -> list[dict[str, str]]: """Enrich the list of repos with their numeric ID from github. This helps maintain a stable id for context and bookmarks. @@ -147,7 +149,7 @@ def validate_response(self, response: requests.Response) -> None: return repos_with_ids @property - def partitions(self) -> Optional[List[Dict[str, str]]]: + def partitions(self) -> list[dict[str, str]] | None: """Return a list of partitions. This is called before syncing records, we use it to fetch some additional @@ -183,7 +185,7 @@ def partitions(self) -> Optional[List[Dict[str, str]]]: return [{"org": org} for org in self.config["organizations"]] return None - def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: """Return a child context object from the record and optional provided context. By default, will return context if provided and otherwise the record dict. @@ -196,7 +198,7 @@ def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict: "repo_id": record["id"], } - def get_records(self, context: Optional[Dict]) -> Iterable[Dict[str, Any]]: + def get_records(self, context: dict | None) -> Iterable[dict[str, Any]]: """ Override the parent method to allow skipping API calls if the stream is deselected and skip_parent_streams is True in config. @@ -472,7 +474,7 @@ class EventsStream(GitHubRestStream): # GitHub is missing the "since" parameter on this endpoint. use_fake_since_parameter = True - def get_records(self, context: Optional[Dict] = None) -> Iterable[Dict[str, Any]]: + def get_records(self, context: dict | None = None) -> Iterable[dict[str, Any]]: """Return a generator of row-type dictionary objects. Each row emitted should be a dictionary of property names to their values. """ @@ -482,7 +484,7 @@ def get_records(self, context: Optional[Dict] = None) -> Iterable[Dict[str, Any] return super().get_records(context) - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: row = super().post_process(row, context) # TODO - We should think about the best approach to handle this. An alternative would be to # do a 'dumb' tap that just keeps the same schemas as GitHub without renaming these @@ -821,8 +823,8 @@ class IssuesStream(GitHubRestStream): state_partitioning_keys = ["repo", "org"] def get_url_params( - self, context: Optional[Dict], next_page_token: Optional[Any] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: Any | None + ) -> dict[str, Any]: """Return a dictionary of values to be used in URL parameterization.""" assert context is not None, f"Context cannot be empty for '{self.name}' stream." params = super().get_url_params(context, next_page_token) @@ -850,7 +852,7 @@ def http_headers(self) -> dict: headers["Accept"] = "application/vnd.github.squirrel-girl-preview" return headers - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: row = super().post_process(row, context) row["type"] = "pull_request" if "pull_request" in row else "issue" if row["body"] is not None: @@ -933,7 +935,7 @@ class IssueCommentsStream(GitHubRestStream): # But it is too expensive on large repos and results in a lot of server errors. use_fake_since_parameter = True - def get_records(self, context: Optional[Dict] = None) -> Iterable[Dict[str, Any]]: + def get_records(self, context: dict | None = None) -> Iterable[dict[str, Any]]: """Return a generator of row-type dictionary objects. Each row emitted should be a dictionary of property names to their values. @@ -944,7 +946,7 @@ def get_records(self, context: Optional[Dict] = None) -> Iterable[Dict[str, Any] return super().get_records(context) - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: row = super().post_process(row, context) row["issue_number"] = int(row["issue_url"].split("/")[-1]) if row["body"] is not None: @@ -992,7 +994,7 @@ class IssueEventsStream(GitHubRestStream): # GitHub is missing the "since" parameter on this endpoint. use_fake_since_parameter = True - def get_records(self, context: Optional[Dict] = None) -> Iterable[Dict[str, Any]]: + def get_records(self, context: dict | None = None) -> Iterable[dict[str, Any]]: """Return a generator of row-type dictionary objects. Each row emitted should be a dictionary of property names to their values. @@ -1003,7 +1005,7 @@ def get_records(self, context: Optional[Dict] = None) -> Iterable[Dict[str, Any] return super().get_records(context) - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: row = super().post_process(row, context) if "issue" in row.keys(): row["issue_number"] = int(row["issue"].pop("number")) @@ -1045,7 +1047,7 @@ class CommitsStream(GitHubRestStream): state_partitioning_keys = ["repo", "org"] ignore_parent_replication_key = True - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: """ Add a timestamp top-level field to be used as state replication key. It's not clear from github's API docs which time (author or committer) @@ -1179,8 +1181,8 @@ class PullRequestsStream(GitHubRestStream): use_fake_since_parameter = True def get_url_params( - self, context: Optional[Dict], next_page_token: Optional[Any] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: Any | None + ) -> dict[str, Any]: """Return a dictionary of values to be used in URL parameterization.""" assert context is not None, f"Context cannot be empty for '{self.name}' stream." params = super().get_url_params(context, next_page_token) @@ -1199,7 +1201,7 @@ def http_headers(self) -> dict: headers["Accept"] = "application/vnd.github.squirrel-girl-preview" return headers - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: row = super().post_process(row, context) if row["body"] is not None: # some pr bodies include control characters such as \x00 @@ -1216,7 +1218,7 @@ def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: row["reactions"]["minus_one"] = row["reactions"].pop("-1", None) return row - def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: if context: return { "org": context["org"], @@ -1409,7 +1411,7 @@ class PullRequestCommits(GitHubRestStream): ), ).to_dict() - def post_process(self, row: dict, context: Optional[Dict[str, str]] = None) -> dict: + def post_process(self, row: dict, context: dict[str, str] | None = None) -> dict: row = super().post_process(row, context) if context is not None and "pull_number" in context: row["pull_number"] = context["pull_number"] @@ -1568,8 +1570,8 @@ class AnonymousContributorsStream(GitHubRestStream): tolerated_http_errors = [204] def get_url_params( - self, context: Optional[Dict], next_page_token: Optional[Any] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: Any | None + ) -> dict[str, Any]: """Return a dictionary of values to be used in URL parameterization.""" assert context is not None, f"Context cannot be empty for '{self.name}' stream." params = super().get_url_params(context, next_page_token) @@ -1625,7 +1627,7 @@ def http_headers(self) -> dict: headers["Accept"] = "application/vnd.github.v3.star+json" return headers - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: """ Add a user_id top-level field to be used as state replication key. """ @@ -1665,7 +1667,7 @@ def __init__(self, *args, **kwargs): "Looking for the older version? Use 'stargazers_rest'." ) - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: """ Add a user_id top-level field to be used as state replication key. """ @@ -1674,8 +1676,8 @@ def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: return row def get_next_page_token( - self, response: requests.Response, previous_token: Optional[Any] - ) -> Optional[Any]: + self, response: requests.Response, previous_token: Any | None + ) -> Any | None: """ Exit early if a since parameter is provided. """ @@ -1818,7 +1820,7 @@ class ProjectsStream(GitHubRestStream): parent_stream_type = RepositoryStream state_partitioning_keys = ["repo", "org"] - def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: return { "project_id": record["id"], "repo_id": context["repo_id"] if context else None, @@ -1857,7 +1859,7 @@ class ProjectColumnsStream(GitHubRestStream): parent_stream_type = ProjectsStream state_partitioning_keys = ["project_id", "repo", "org"] - def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: return { "column_id": record["id"], "repo_id": context["repo_id"] if context else None, @@ -2006,7 +2008,7 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: """Parse the response and return an iterator of result rows.""" yield from extract_jsonpath(self.records_jsonpath, input=response.json()) - def get_child_context(self, record: dict, context: Optional[dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: """Return a child context object from the record and optional provided context. By default, will return context if provided and otherwise the record dict. Developers may override this behavior to send specific information to child @@ -2082,8 +2084,8 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: yield from extract_jsonpath(self.records_jsonpath, input=response.json()) def get_url_params( - self, context: Optional[dict], next_page_token: Optional[Any] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: Any | None + ) -> dict[str, Any]: params = super().get_url_params(context, next_page_token) params["filter"] = "all" return params @@ -2115,7 +2117,7 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: """Parse the repository main page to extract extra metrics.""" yield from scrape_metrics(response, self.logger) - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: row = super().post_process(row, context) if context is not None: row["repo"] = context["repo"] @@ -2167,7 +2169,7 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: """Get the response for the first page and scrape results, potentially iterating through pages.""" yield from scrape_dependents(response, self.logger) - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: new_row = {"dependent": row} new_row = super().post_process(new_row, context) # we extract dependent_name_with_owner to be able to use it safely as a primary key, @@ -2227,7 +2229,7 @@ def http_headers(self) -> dict: headers["Accept"] = "application/vnd.github.hawkgirl-preview+json" return headers - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: """ Add a dependency_repo_id top-level field to be used as primary key. """ diff --git a/tap_github/scraping.py b/tap_github/scraping.py index b3cb0d46..2f41f675 100644 --- a/tap_github/scraping.py +++ b/tap_github/scraping.py @@ -3,6 +3,8 @@ Inspired by https://github.com/dogsheep/github-to-sqlite/pull/70 """ +from __future__ import annotations + import logging import re import time @@ -18,8 +20,8 @@ def scrape_dependents( - response: requests.Response, logger: Optional[logging.Logger] = None -) -> Iterable[Dict[str, Any]]: + response: requests.Response, logger: logging.Logger | None = None +) -> Iterable[dict[str, Any]]: from bs4 import BeautifulSoup logger = logger or logging.getLogger("scraping") @@ -41,7 +43,7 @@ def scrape_dependents( yield from _scrape_dependents(f"https://{base_url}/{link}", logger) -def _scrape_dependents(url: str, logger: logging.Logger) -> Iterable[Dict[str, Any]]: +def _scrape_dependents(url: str, logger: logging.Logger) -> Iterable[dict[str, Any]]: # Optional dependency: from bs4 import BeautifulSoup @@ -94,7 +96,7 @@ def _scrape_dependents(url: str, logger: logging.Logger) -> Iterable[Dict[str, A url = "" -def parse_counter(tag: Union[Tag, NavigableString, None]) -> int: +def parse_counter(tag: Tag | NavigableString | None) -> int: """ Extract a count of [issues|PR|contributors...] from an HTML tag. For very high numbers, we only get an approximate value as github @@ -118,8 +120,8 @@ def parse_counter(tag: Union[Tag, NavigableString, None]) -> int: def scrape_metrics( - response: requests.Response, logger: Optional[logging.Logger] = None -) -> Iterable[Dict[str, Any]]: + response: requests.Response, logger: logging.Logger | None = None +) -> Iterable[dict[str, Any]]: from bs4 import BeautifulSoup logger = logger or logging.getLogger("scraping") diff --git a/tap_github/streams.py b/tap_github/streams.py index e1b05e58..d10bfd4b 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from enum import Enum from typing import List, Set, Type @@ -57,10 +59,10 @@ class Streams(Enum): Represents all streams our tap supports, and which queries (by username, by organization, etc.) you can use. """ - valid_queries: Set[str] - streams: List[Type[Stream]] + valid_queries: set[str] + streams: list[type[Stream]] - def __init__(self, valid_queries: Set[str], streams: List[Type[Stream]]): + def __init__(self, valid_queries: set[str], streams: list[type[Stream]]): self.valid_queries = valid_queries self.streams = streams diff --git a/tap_github/tap.py b/tap_github/tap.py index 5dc028c5..67f2de38 100644 --- a/tap_github/tap.py +++ b/tap_github/tap.py @@ -1,5 +1,7 @@ """GitHub tap class.""" +from __future__ import annotations + import logging import os from typing import List @@ -77,7 +79,7 @@ def logger(cls) -> logging.Logger: ), ).to_dict() - def discover_streams(self) -> List[Stream]: + def discover_streams(self) -> list[Stream]: """Return a list of discovered streams for each query.""" # If the config is empty, assume we are running --help or --capabilities. diff --git a/tap_github/tests/test_authenticator.py b/tap_github/tests/test_authenticator.py index 516a6eea..cf1634ac 100644 --- a/tap_github/tests/test_authenticator.py +++ b/tap_github/tests/test_authenticator.py @@ -15,7 +15,6 @@ class TestTokenManager: - def test_default_rate_limits(self): token_manager = TokenManager("mytoken", rate_limit_buffer=700) @@ -122,7 +121,6 @@ def test_has_calls_remaining_fails_if_few_calls_remaining_and_reset_time_not_rea class TestAppTokenManager: - def test_initialization_with_3_part_env_key(self): with patch.object(AppTokenManager, "claim_token", return_value=None): token_manager = AppTokenManager("12345;;key\\ncontent;;67890") @@ -305,12 +303,10 @@ def mock_stream(): class TestGitHubTokenAuthenticator: - def test_prepare_tokens_returns_empty_if_none_found(self, mock_stream): with patch.object( GitHubTokenAuthenticator, "get_env", return_value={"GITHUB_TLJKJFDS": "gt1"} ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): - auth = GitHubTokenAuthenticator(stream=mock_stream) token_managers = auth.prepare_tokens() @@ -322,7 +318,6 @@ def test_config_auth_token_only(self, mock_stream): "get_env", return_value={"OTHER_TOKEN": "blah", "NOT_THE_RIGHT_TOKEN": "meh"}, ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): - stream = mock_stream stream.config.update({"auth_token": "gt5"}) auth = GitHubTokenAuthenticator(stream=stream) @@ -337,7 +332,6 @@ def test_config_additional_auth_tokens_only(self, mock_stream): "get_env", return_value={"OTHER_TOKEN": "blah", "NOT_THE_RIGHT_TOKEN": "meh"}, ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): - stream = mock_stream stream.config.update({"additional_auth_tokens": ["gt7", "gt8", "gt9"]}) auth = GitHubTokenAuthenticator(stream=stream) @@ -356,7 +350,6 @@ def test_env_personal_tokens_only(self, mock_stream): "OTHER_TOKEN": "blah", }, ), patch.object(PersonalTokenManager, "is_valid_token", return_value=True): - auth = GitHubTokenAuthenticator(stream=mock_stream) token_managers = auth.prepare_tokens() @@ -372,7 +365,6 @@ def test_env_app_key_only(self, mock_stream): "tap_github.authenticator.generate_app_access_token", return_value=("installationtoken12345", MagicMock()), ): - auth = GitHubTokenAuthenticator(stream=mock_stream) token_managers = auth.prepare_tokens() @@ -396,7 +388,6 @@ def test_all_token_types(self, mock_stream): "tap_github.authenticator.generate_app_access_token", return_value=("installationtoken12345", MagicMock()), ): - stream = mock_stream stream.config.update( {"auth_token": "gt5", "additional_auth_tokens": ["gt7", "gt8", "gt9"]} @@ -429,7 +420,6 @@ def test_all_token_types_except_additional_auth_tokens(self, mock_stream): "tap_github.authenticator.generate_app_access_token", return_value=("installationtoken12345", MagicMock()), ): - stream = mock_stream stream.config.update( { @@ -460,7 +450,6 @@ def test_auth_token_and_additional_auth_tokens_deduped(self, mock_stream): "tap_github.authenticator.generate_app_access_token", return_value=("installationtoken12345", MagicMock()), ): - stream = mock_stream stream.config.update( { @@ -488,7 +477,6 @@ def test_auth_token_and_env_tokens_deduped(self, mock_stream): "tap_github.authenticator.generate_app_access_token", return_value=("installationtoken12345", MagicMock()), ): - stream = mock_stream stream.config.update({"auth_token": "gt1"}) auth = GitHubTokenAuthenticator(stream=stream) @@ -525,7 +513,6 @@ def test_exclude_generated_app_token_if_invalid(self, mock_stream): "tap_github.authenticator.generate_app_access_token", return_value=("installationtoken12345", MagicMock()), ): - auth = GitHubTokenAuthenticator(stream=mock_stream) token_managers = auth.prepare_tokens() @@ -538,13 +525,10 @@ def test_prepare_tokens_returns_empty_if_all_tokens_invalid(self, mock_stream): return_value={"GITHUB_TOKEN1": "gt1", "GITHUB_APP_PRIVATE_KEY": "123;;key"}, ), patch.object( PersonalTokenManager, "is_valid_token", return_value=False - ), patch.object( - AppTokenManager, "is_valid_token", return_value=False - ), patch( + ), patch.object(AppTokenManager, "is_valid_token", return_value=False), patch( "tap_github.authenticator.generate_app_access_token", return_value=("installationtoken12345", MagicMock()), ): - stream = mock_stream stream.config.update( {"auth_token": "gt5", "additional_auth_tokens": ["gt7", "gt8", "gt9"]} diff --git a/tap_github/tests/test_tap.py b/tap_github/tests/test_tap.py index f8fcb096..f9988ee1 100644 --- a/tap_github/tests/test_tap.py +++ b/tap_github/tests/test_tap.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import json import os import re @@ -58,7 +60,7 @@ def test_validate_repo_list_config(repo_list_config): def run_tap_with_config( - capsys, config_obj: dict, skip_stream: Optional[str], single_stream: Optional[str] + capsys, config_obj: dict, skip_stream: str | None, single_stream: str | None ) -> str: """ Run the tap with the given config and capture stdout, optionally diff --git a/tap_github/user_streams.py b/tap_github/user_streams.py index 9603df61..8ae3146d 100644 --- a/tap_github/user_streams.py +++ b/tap_github/user_streams.py @@ -1,5 +1,7 @@ """User Stream types classes for tap-github.""" +from __future__ import annotations + import re from typing import Any, Dict, Iterable, List, Optional @@ -25,7 +27,7 @@ def path(self) -> str: # type: ignore return "/user/{id}" @property - def partitions(self) -> Optional[List[Dict]]: + def partitions(self) -> list[dict] | None: """Return a list of partitions.""" if "user_usernames" in self.config: input_user_list = self.config["user_usernames"] @@ -48,13 +50,13 @@ def partitions(self) -> Optional[List[Dict]]: return [{"id": id} for id in self.config["user_ids"]] return None - def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: return { "username": record["login"], "user_id": record["id"], } - def get_user_ids(self, user_list: List[str]) -> List[Dict[str, str]]: + def get_user_ids(self, user_list: list[str]) -> list[dict[str, str]]: """Enrich the list of userse with their numeric ID from github. This helps maintain a stable id for context and bookmarks. @@ -132,7 +134,7 @@ def query(self) -> str: self.logger.info(f"Running the tap on {len(users_with_ids)} users") return users_with_ids - def get_records(self, context: Optional[Dict]) -> Iterable[Dict[str, Any]]: + def get_records(self, context: dict | None) -> Iterable[dict[str, Any]]: """ Override the parent method to allow skipping API calls if the stream is deselected and skip_parent_streams is True in config. @@ -218,7 +220,7 @@ def http_headers(self) -> dict: headers["Accept"] = "application/vnd.github.v3.star+json" return headers - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + def post_process(self, row: dict, context: dict | None = None) -> dict: """ Add a repo_id top-level field to be used as state replication key. """ diff --git a/tap_github/utils/filter_stdout.py b/tap_github/utils/filter_stdout.py index bfddbe9b..988e49b1 100644 --- a/tap_github/utils/filter_stdout.py +++ b/tap_github/utils/filter_stdout.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import contextlib import io import re @@ -8,7 +10,7 @@ class FilterStdOutput: """Filter out stdout/sterr given a regex pattern.""" - def __init__(self, stream: TextIO, re_pattern: Union[str, Pattern]): + def __init__(self, stream: TextIO, re_pattern: str | Pattern): self.stream = stream self.pattern = ( re.compile(re_pattern) if isinstance(re_pattern, str) else re_pattern