diff --git a/setup.py b/setup.py index f092de6e..640958d1 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ 'pylint==2.6.2', 'ipdb', 'nose', + 'requests-mock==1.9.3' ] }, entry_points=''' diff --git a/tap_github/__init__.py b/tap_github/__init__.py index 4f74e1a5..79308edd 100644 --- a/tap_github/__init__.py +++ b/tap_github/__init__.py @@ -308,6 +308,57 @@ def get_catalog(): return {'streams': streams} +def get_all_repos(organizations: list) -> list: + """ + Retrieves all repositories for the provided organizations and + verifies basic access for them. + + Docs: https://docs.github.com/en/rest/reference/repos#list-organization-repositories + """ + repos = [] + + for org_path in organizations: + org = org_path.split('/')[0] + for response in authed_get_all_pages( + 'get_all_repos', + 'https://api.github.com/orgs/{}/repos?sort=created&direction=desc'.format(org) + ): + org_repos = response.json() + + for repo in org_repos: + repo_full_name = repo.get('full_name') + + logger.info("Verifying access of repository: %s", repo_full_name) + verify_repo_access( + 'https://api.github.com/repos/{}/commits'.format(repo_full_name), + repo + ) + + repos.append(repo_full_name) + + return repos + +def extract_repos_from_config(config: dict ) -> list: + """ + Extracts all repositories from the config and calls get_all_repos() + for organizations using the wildcard 'org/*' format. + """ + repo_paths = list(filter(None, config['repository'].split(' '))) + + orgs_with_all_repos = list(filter(lambda x: x.split('/')[1] == '*', repo_paths)) + + if orgs_with_all_repos: + # remove any wildcard "org/*" occurrences from `repo_paths` + repo_paths = list(set(repo_paths).difference(set(orgs_with_all_repos))) + + # get all repositores for an org in the config + all_repos = get_all_repos(orgs_with_all_repos) + + # update repo_paths + repo_paths.extend(all_repos) + + return repo_paths + def verify_repo_access(url_for_repo, repo): try: authed_get("verifying repository access", url_for_repo) @@ -321,7 +372,7 @@ def verify_access_for_repo(config): access_token = config['access_token'] session.headers.update({'authorization': 'token ' + access_token, 'per_page': '1', 'page': '1'}) - repositories = list(filter(None, config['repository'].split(' '))) + repositories = extract_repos_from_config(config) for repo in repositories: logger.info("Verifying access of repository: %s", repo) @@ -1043,7 +1094,7 @@ def do_sync(config, state, catalog): selected_stream_ids = get_selected_streams(catalog) validate_dependencies(selected_stream_ids) - repositories = list(filter(None, config['repository'].split(' '))) + repositories = extract_repos_from_config(config) state = translate_state(state, catalog, repositories) singer.write_state(state) diff --git a/tests/unittests/test_extract_repos_from_config.py b/tests/unittests/test_extract_repos_from_config.py new file mode 100644 index 00000000..4a205696 --- /dev/null +++ b/tests/unittests/test_extract_repos_from_config.py @@ -0,0 +1,32 @@ +import unittest +import tap_github + + +@unittest.mock.patch('tap_github.get_all_repos') +class TestExtractReposFromConfig(unittest.TestCase): + + def test_single_repo(self, mocked_get_all_repos): + config = {'repository': 'singer-io/test-repo'} + expected_repositories = ['singer-io/test-repo'] + self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config)) + + def test_multiple_repos(self, mocked_get_all_repos): + config = {'repository': 'singer-io/test-repo singer-io/tap-github'} + expected_repositories = ['singer-io/test-repo', 'singer-io/tap-github'] + self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config)) + + def test_org_all_repos(self, mocked_get_all_repos): + config = {'repository': 'singer-io/test-repo test-org/*'} + expected_repositories = [ + 'singer-io/test-repo', + 'test-org/repo1', + 'test-org/repo2', + 'test-org/repo3' + ] + mocked_get_all_repos.return_value = [ + 'test-org/repo1', + 'test-org/repo2', + 'test-org/repo3' + ] + + self.assertEqual(expected_repositories, tap_github.extract_repos_from_config(config)) diff --git a/tests/unittests/test_get_all_repos.py b/tests/unittests/test_get_all_repos.py new file mode 100644 index 00000000..8fdb892f --- /dev/null +++ b/tests/unittests/test_get_all_repos.py @@ -0,0 +1,70 @@ +import unittest +import requests +import requests_mock +import simplejson as json + +import tap_github + +from itertools import cycle, permutations, chain + + +SESSION = requests.Session() +ADAPTER = requests_mock.Adapter() +SESSION.mount('mock://', ADAPTER) + + +@unittest.mock.patch('tap_github.verify_repo_access') +@unittest.mock.patch('tap_github.authed_get_all_pages') +class TestGetAllRepos(unittest.TestCase): + + def test_single_organization(self, mocked_authed_get_all_pages, mocked_verify_repo_access): + orgs = ['test-org/*'] + repos = ['repo1', 'repo2', 'repo3'] + + mocked_url = 'mock://github.com/orgs/test-org/repos' + mocked_response_body = [ + {'full_name': ''.join(r).replace('*', '')} for r in zip(cycle(orgs), repos) + ] + mocked_response_text = json.dumps(mocked_response_body) + ADAPTER.register_uri( + 'GET', + mocked_url, + text=mocked_response_text) + mocked_response = SESSION.get(mocked_url) + + expected_repositories = [ + 'test-org/repo1', + 'test-org/repo2', + 'test-org/repo3' + ] + mocked_authed_get_all_pages.return_value = [mocked_response] + + self.assertEqual(expected_repositories, tap_github.get_all_repos(orgs)) + + def test_multiple_organizations(self, mocked_authed_get_all_pages, mocked_verify_repo_access): + orgs = ['test-org/*', 'singer-io/*'] + repos = ['repo1', 'repo2', 'repo3'] + + mocked_url = 'mock://github.com/orgs/test-org/repos' + orgs_repos_permutations = [list(zip(orgs, perm)) for perm in permutations(repos, len(orgs))] + mocked_response_body = [ + {'full_name': ''.join(r).replace('*', '')} for r in set(chain(*orgs_repos_permutations)) + ] + mocked_response_text = json.dumps(mocked_response_body) + ADAPTER.register_uri( + 'GET', + mocked_url, + text=mocked_response_text) + mocked_response = SESSION.get(mocked_url) + + expected_repositories = [ + 'test-org/repo1', + 'test-org/repo2', + 'test-org/repo3', + 'singer-io/repo1', + 'singer-io/repo2', + 'singer-io/repo3' + ] + mocked_authed_get_all_pages.return_value = [mocked_response] + + self.assertSetEqual(set(expected_repositories), set(tap_github.get_all_repos(orgs)))