From 288dd8113669928a17c6520db48e86fe492cd36a Mon Sep 17 00:00:00 2001 From: Eric Boucher Date: Thu, 19 May 2022 00:17:41 -0400 Subject: [PATCH] Override REST stream --- tap_github/repository_streams.py | 55 ++++---------------------------- tap_github/streams.py | 2 -- tap_github/user_streams.py | 2 +- 3 files changed, 8 insertions(+), 51 deletions(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index a870c048..ce9cae6a 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1476,59 +1476,15 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: ).to_dict() -class StargazersStream(GitHubRestStream): - """Defines 'Stargazers' stream. Warning: this stream does NOT track star deletions.""" - - name = "stargazers" - path = "/repos/{org}/{repo}/stargazers" - primary_keys = ["user_id", "repo", "org"] - parent_stream_type = RepositoryStream - state_partitioning_keys = ["repo", "org"] - replication_key = "starred_at" - # GitHub is missing the "since" parameter on this endpoint. - missing_since_parameter = True - - @property - def http_headers(self) -> dict: - """Return the http headers needed. - - Overridden to use an endpoint which includes starred_at property: - https://docs.github.com/en/rest/reference/activity#custom-media-types-for-starring - """ - headers = super().http_headers - headers["Accept"] = "application/vnd.github.v3.star+json" - return headers - - def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: - """ - Add a user_id top-level field to be used as state replication key. - """ - row["user_id"] = row["user"]["id"] - if context is not None: - row["repo_id"] = context["repo_id"] - return row - - schema = th.PropertiesList( - # Parent Keys - th.Property("repo", th.StringType), - th.Property("org", th.StringType), - th.Property("repo_id", th.IntegerType), - th.Property("user_id", th.IntegerType), - # Stargazer Info - th.Property("starred_at", th.DateTimeType), - th.Property("user", user_object), - ).to_dict() - - -class StargazersGraphqlStream(GitHubGraphqlStream): - """Defines 'UserContributedToStream' stream. Warning: this stream 'only' gets the first 100 projects (by stars).""" +class StargazersStream(GitHubGraphqlStream): + """Defines 'Stargazers' stream.""" name = "stargazers" query_jsonpath = "$.data.repository.stargazers.edges.[*]" - primary_keys = ["user_id", "repo_id"] + primary_keys = ["user_id", "repo", "org"] replication_key = "starred_at" parent_stream_type = RepositoryStream - state_partitioning_keys = ["repo_id"] + state_partitioning_keys = ["repo", "org"] # The parent repository object changes if the number of stargazers changes. ignore_parent_replication_key = False @@ -1538,7 +1494,10 @@ def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: """ row["user_id"] = row["user"]["id"] if context is not None: + print(context) row["repo_id"] = context["repo_id"] + row["repo"] = context["repo"] + row["org"] = context["org"] return row def get_next_page_token( diff --git a/tap_github/streams.py b/tap_github/streams.py index 5a265468..20769cec 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -28,7 +28,6 @@ RepositoryStream, ReviewCommentsStream, ReviewsStream, - StargazersGraphqlStream, StargazersStream, StatsContributorsStream, WorkflowRunJobsStream, @@ -87,7 +86,6 @@ def __init__(self, valid_queries: Set[str], streams: List[Type[Stream]]): RepositoryStream, ReviewCommentsStream, ReviewsStream, - StargazersGraphqlStream, StargazersStream, StatsContributorsStream, WorkflowRunJobsStream, diff --git a/tap_github/user_streams.py b/tap_github/user_streams.py index 971bca19..f1daa5df 100644 --- a/tap_github/user_streams.py +++ b/tap_github/user_streams.py @@ -256,7 +256,7 @@ def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: class UserContributedToStream(GitHubGraphqlStream): - """Defines 'UserContributedToStream' stream. Warning: this stream 'only' gets the first 100 projects (by stars).""" + """Defines 'UserContributedToStream' stream.""" name = "user_contributed_to" query_jsonpath = "$.data.user.repositoriesContributedTo.nodes.[*]"