Skip to content

Commit

Permalink
Override REST stream
Browse files Browse the repository at this point in the history
  • Loading branch information
ericboucher committed May 19, 2022
1 parent b58691a commit 288dd81
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 51 deletions.
55 changes: 7 additions & 48 deletions tap_github/repository_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -1476,59 +1476,15 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]:
).to_dict()


class StargazersStream(GitHubRestStream):
"""Defines 'Stargazers' stream. Warning: this stream does NOT track star deletions."""

name = "stargazers"
path = "/repos/{org}/{repo}/stargazers"
primary_keys = ["user_id", "repo", "org"]
parent_stream_type = RepositoryStream
state_partitioning_keys = ["repo", "org"]
replication_key = "starred_at"
# GitHub is missing the "since" parameter on this endpoint.
missing_since_parameter = True

@property
def http_headers(self) -> dict:
"""Return the http headers needed.
Overridden to use an endpoint which includes starred_at property:
https://docs.github.com/en/rest/reference/activity#custom-media-types-for-starring
"""
headers = super().http_headers
headers["Accept"] = "application/vnd.github.v3.star+json"
return headers

def post_process(self, row: dict, context: Optional[Dict] = None) -> dict:
"""
Add a user_id top-level field to be used as state replication key.
"""
row["user_id"] = row["user"]["id"]
if context is not None:
row["repo_id"] = context["repo_id"]
return row

schema = th.PropertiesList(
# Parent Keys
th.Property("repo", th.StringType),
th.Property("org", th.StringType),
th.Property("repo_id", th.IntegerType),
th.Property("user_id", th.IntegerType),
# Stargazer Info
th.Property("starred_at", th.DateTimeType),
th.Property("user", user_object),
).to_dict()


class StargazersGraphqlStream(GitHubGraphqlStream):
"""Defines 'UserContributedToStream' stream. Warning: this stream 'only' gets the first 100 projects (by stars)."""
class StargazersStream(GitHubGraphqlStream):
"""Defines 'Stargazers' stream."""

name = "stargazers"
query_jsonpath = "$.data.repository.stargazers.edges.[*]"
primary_keys = ["user_id", "repo_id"]
primary_keys = ["user_id", "repo", "org"]
replication_key = "starred_at"
parent_stream_type = RepositoryStream
state_partitioning_keys = ["repo_id"]
state_partitioning_keys = ["repo", "org"]
# The parent repository object changes if the number of stargazers changes.
ignore_parent_replication_key = False

Expand All @@ -1538,7 +1494,10 @@ def post_process(self, row: dict, context: Optional[Dict] = None) -> dict:
"""
row["user_id"] = row["user"]["id"]
if context is not None:
print(context)
row["repo_id"] = context["repo_id"]
row["repo"] = context["repo"]
row["org"] = context["org"]
return row

def get_next_page_token(
Expand Down
2 changes: 0 additions & 2 deletions tap_github/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
RepositoryStream,
ReviewCommentsStream,
ReviewsStream,
StargazersGraphqlStream,
StargazersStream,
StatsContributorsStream,
WorkflowRunJobsStream,
Expand Down Expand Up @@ -87,7 +86,6 @@ def __init__(self, valid_queries: Set[str], streams: List[Type[Stream]]):
RepositoryStream,
ReviewCommentsStream,
ReviewsStream,
StargazersGraphqlStream,
StargazersStream,
StatsContributorsStream,
WorkflowRunJobsStream,
Expand Down
2 changes: 1 addition & 1 deletion tap_github/user_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def post_process(self, row: dict, context: Optional[Dict] = None) -> dict:


class UserContributedToStream(GitHubGraphqlStream):
"""Defines 'UserContributedToStream' stream. Warning: this stream 'only' gets the first 100 projects (by stars)."""
"""Defines 'UserContributedToStream' stream."""

name = "user_contributed_to"
query_jsonpath = "$.data.user.repositoriesContributedTo.nodes.[*]"
Expand Down

0 comments on commit 288dd81

Please sign in to comment.