Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-submitting PR #141 (All repos for an organization) #146

Merged
merged 5 commits into from
Nov 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
'pylint==2.6.2',
'ipdb',
'nose',
'requests-mock==1.9.3'
]
},
entry_points='''
Expand Down
55 changes: 53 additions & 2 deletions tap_github/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,57 @@ def get_catalog():

return {'streams': streams}

def get_all_repos(organizations: list) -> list:
"""
Retrieves all repositories for the provided organizations and
verifies basic access for them.

Docs: https://docs.github.com/en/rest/reference/repos#list-organization-repositories
"""
repos = []

for org_path in organizations:
org = org_path.split('/')[0]
for response in authed_get_all_pages(
'get_all_repos',
'https://api.github.com/orgs/{}/repos?sort=created&direction=desc'.format(org)
):
org_repos = response.json()

for repo in org_repos:
repo_full_name = repo.get('full_name')

logger.info("Verifying access of repository: %s", repo_full_name)
verify_repo_access(
'https://api.github.com/repos/{}/commits'.format(repo_full_name),
repo
)

repos.append(repo_full_name)

return repos

def extract_repos_from_config(config: dict ) -> list:
"""
Extracts all repositories from the config and calls get_all_repos()
for organizations using the wildcard 'org/*' format.
"""
repo_paths = list(filter(None, config['repository'].split(' ')))

orgs_with_all_repos = list(filter(lambda x: x.split('/')[1] == '*', repo_paths))

if orgs_with_all_repos:
# remove any wildcard "org/*" occurrences from `repo_paths`
repo_paths = list(set(repo_paths).difference(set(orgs_with_all_repos)))

# get all repositores for an org in the config
all_repos = get_all_repos(orgs_with_all_repos)

# update repo_paths
repo_paths.extend(all_repos)

return repo_paths

def verify_repo_access(url_for_repo, repo):
try:
authed_get("verifying repository access", url_for_repo)
Expand All @@ -321,7 +372,7 @@ def verify_access_for_repo(config):
access_token = config['access_token']
session.headers.update({'authorization': 'token ' + access_token, 'per_page': '1', 'page': '1'})

repositories = list(filter(None, config['repository'].split(' ')))
repositories = extract_repos_from_config(config)

for repo in repositories:
logger.info("Verifying access of repository: %s", repo)
Expand Down Expand Up @@ -1043,7 +1094,7 @@ def do_sync(config, state, catalog):
selected_stream_ids = get_selected_streams(catalog)
validate_dependencies(selected_stream_ids)

repositories = list(filter(None, config['repository'].split(' ')))
repositories = extract_repos_from_config(config)

state = translate_state(state, catalog, repositories)
singer.write_state(state)
Expand Down
32 changes: 32 additions & 0 deletions tests/unittests/test_extract_repos_from_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import unittest
import tap_github


@unittest.mock.patch('tap_github.get_all_repos')
class TestExtractReposFromConfig(unittest.TestCase):

def test_single_repo(self, mocked_get_all_repos):
config = {'repository': 'singer-io/test-repo'}
expected_repositories = ['singer-io/test-repo']
self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config))

def test_multiple_repos(self, mocked_get_all_repos):
config = {'repository': 'singer-io/test-repo singer-io/tap-github'}
expected_repositories = ['singer-io/test-repo', 'singer-io/tap-github']
self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config))

def test_org_all_repos(self, mocked_get_all_repos):
config = {'repository': 'singer-io/test-repo test-org/*'}
expected_repositories = [
'singer-io/test-repo',
'test-org/repo1',
'test-org/repo2',
'test-org/repo3'
]
mocked_get_all_repos.return_value = [
'test-org/repo1',
'test-org/repo2',
'test-org/repo3'
]

self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config))
70 changes: 70 additions & 0 deletions tests/unittests/test_get_all_repos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import unittest
import requests
import requests_mock
import simplejson as json

import tap_github

from itertools import cycle, permutations, chain


SESSION = requests.Session()
ADAPTER = requests_mock.Adapter()
SESSION.mount('mock://', ADAPTER)


@unittest.mock.patch('tap_github.verify_repo_access')
@unittest.mock.patch('tap_github.authed_get_all_pages')
class TestGetAllRepos(unittest.TestCase):

def test_single_organization(self, mocked_authed_get_all_pages, mocked_verify_repo_access):
orgs = ['test-org/*']
repos = ['repo1', 'repo2', 'repo3']

mocked_url = 'mock://github.com/orgs/test-org/repos'
mocked_response_body = [
{'full_name': ''.join(r).replace('*', '')} for r in zip(cycle(orgs), repos)
]
mocked_response_text = json.dumps(mocked_response_body)
ADAPTER.register_uri(
'GET',
mocked_url,
text=mocked_response_text)
mocked_response = SESSION.get(mocked_url)

expected_repositories = [
'test-org/repo1',
'test-org/repo2',
'test-org/repo3'
]
mocked_authed_get_all_pages.return_value = [mocked_response]

self.assertEqual(expected_repositories, tap_github.get_all_repos(orgs))

def test_multiple_organizations(self, mocked_authed_get_all_pages, mocked_verify_repo_access):
orgs = ['test-org/*', 'singer-io/*']
repos = ['repo1', 'repo2', 'repo3']

mocked_url = 'mock://github.com/orgs/test-org/repos'
orgs_repos_permutations = [list(zip(orgs, perm)) for perm in permutations(repos, len(orgs))]
mocked_response_body = [
{'full_name': ''.join(r).replace('*', '')} for r in set(chain(*orgs_repos_permutations))
]
mocked_response_text = json.dumps(mocked_response_body)
ADAPTER.register_uri(
'GET',
mocked_url,
text=mocked_response_text)
mocked_response = SESSION.get(mocked_url)

expected_repositories = [
'test-org/repo1',
'test-org/repo2',
'test-org/repo3',
'singer-io/repo1',
'singer-io/repo2',
'singer-io/repo3'
]
mocked_authed_get_all_pages.return_value = [mocked_response]

self.assertSetEqual(set(expected_repositories), set(tap_github.get_all_repos(orgs)))