Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tdl 16280 implement request timeout #54

Merged
merged 13 commits into from
Dec 13, 2021
Merged
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,13 @@ The [**Google Sheets Setup & Authentication**](https://drive.google.com/open?id=
"refresh_token": "YOUR_REFRESH_TOKEN",
"spreadsheet_id": "YOUR_GOOGLE_SPREADSHEET_ID",
"start_date": "2019-01-01T00:00:00Z",
"user_agent": "tap-google-sheets <api_user_email@example.com>"
"user_agent": "tap-google-sheets <api_user_email@example.com>",
"request_timeout": 300
}
```

Optionally, also create a `state.json` file. `currently_syncing` is an optional attribute used for identifying the last object to be synced in case the job is interrupted mid-stream. The next run would begin where the last job left off.
Only the `performance_reports` uses a bookmark. The date-time bookmark is stored in a nested structure based on the endpoint, site, and sub_type.
Only the `performance_reports` uses a bookmark. The date-time bookmark is stored in a nested structure based on the endpoint, site, and sub_type.The `request_timeout` is an optional paramater to set timeout for requests. Default: 300 seconds

```json
{
Expand Down
4 changes: 3 additions & 1 deletion tap_google_sheets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def main():
with GoogleClient(parsed_args.config['client_id'],
parsed_args.config['client_secret'],
parsed_args.config['refresh_token'],
parsed_args.config['user_agent']) as client:
parsed_args.config.get('request_timeout'),
parsed_args.config['user_agent']
) as client:

state = {}
if parsed_args.state:
Expand Down
27 changes: 23 additions & 4 deletions tap_google_sheets/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import singer
from singer import metrics
from singer import utils
from requests.exceptions import Timeout, ConnectionError

BASE_URL = 'https://www.googleapis.com'
GOOGLE_TOKEN_URI = 'https://oauth2.googleapis.com/token'
LOGGER = singer.get_logger()

REQUEST_TIMEOUT = 300

class Server5xxError(Exception):
pass
Expand Down Expand Up @@ -131,6 +132,7 @@ def __init__(self,
client_id,
client_secret,
refresh_token,
request_timeout=REQUEST_TIMEOUT,
user_agent=None):
self.__client_id = client_id
self.__client_secret = client_secret
Expand All @@ -140,6 +142,12 @@ def __init__(self,
self.__expires = None
self.__session = requests.Session()
self.base_url = None
# if request_timeout is other than 0,"0" or "" then use request_timeout
if request_timeout and float(request_timeout):
request_timeout = float(request_timeout)
else: # If value is 0,"0" or "" then set default to 300 seconds.
request_timeout = REQUEST_TIMEOUT
self.request_timeout = request_timeout


def __enter__(self):
dbshah1212 marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -149,7 +157,11 @@ def __enter__(self):
def __exit__(self, exception_type, exception_value, traceback):
self.__session.close()


# backoff request for 5 times at an interval of 10 seconds in case of Timeout error
@backoff.on_exception(backoff.constant,
Timeout,
max_tries=5,
interval=10)
@backoff.on_exception(backoff.expo,
Server5xxError,
max_tries=5,
Expand All @@ -172,7 +184,8 @@ def get_access_token(self):
'client_id': self.__client_id,
'client_secret': self.__client_secret,
'refresh_token': self.__refresh_token,
})
},
timeout=self.request_timeout)

if response.status_code >= 500:
raise Server5xxError()
Expand All @@ -186,6 +199,11 @@ def get_access_token(self):
LOGGER.info('Authorized, token expires = {}'.format(self.__expires))


#backoff request for 5 times at an interval of 10 seconds when we get Timeout error
@backoff.on_exception(backoff.constant,
dbshah1212 marked this conversation as resolved.
Show resolved Hide resolved
(Timeout),
max_tries=5,
interval=10)
# Rate Limit: https://developers.google.com/sheets/api/limits
# 100 request per 100 seconds per User
@backoff.on_exception(backoff.expo,
Expand Down Expand Up @@ -221,7 +239,8 @@ def request(self, method, path=None, url=None, api=None, **kwargs):
kwargs['headers']['Content-Type'] = 'application/json'

with metrics.http_request_timer(endpoint) as timer:
response = self.__session.request(method, url, **kwargs)

response = self.__session.request(method, url, timeout=self.request_timeout, **kwargs)
timer.tags[metrics.Tag.http_status_code] = response.status_code

if response.status_code >= 500:
Expand Down
114 changes: 114 additions & 0 deletions tests/unittests/test_request_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from tap_google_sheets.client import GoogleClient
import unittest
from unittest import mock
from unittest.case import TestCase
from requests.exceptions import Timeout
class TestBackoffError(unittest.TestCase):
'''
Test that backoff logic works properly.
'''
@mock.patch('tap_google_sheets.client.requests.Session.request')
@mock.patch('tap_google_sheets.client.GoogleClient.get_access_token')
def test_request_timeout_and_backoff(self, mock_get_token, mock_request):
"""
Check whether the request backoffs properly for request() for 5 times in case of Timeout error.
"""
mock_request.side_effect = Timeout
with self.assertRaises(Timeout):
client = GoogleClient("dummy_client_id", "dummy_client_secret", "dummy_refresh_token", 300)
client.request("GET")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the client instantiation should be moved outside of this with block. It is unclear what is being tested otherwise.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, removed it from the with

self.assertEquals(mock_request.call_count, 5)

@mock.patch('tap_google_sheets.client.requests.Session.post')
def test_get_access_token_timeout_and_backoff(self, mock_post):
"""
Check whether the request backoffs properly for get_access_token() for 5 times in case of Timeout error.
"""
mock_post.side_effect = Timeout
with self.assertRaises(Timeout):
client = GoogleClient("dummy_client_id", "dummy_client_secret", "dummy_refresh_token", 300)
client.get_access_token()
self.assertEquals(mock_post.call_count, 5)

class MockResponse():
'''
Mock response object for the requests call
'''
def __init__(self, resp, status_code, content=[""], headers=None, raise_error=False, text={}):
self.json_data = resp
self.status_code = status_code
self.content = content
self.headers = headers
self.raise_error = raise_error
self.text = text
self.reason = "error"

def prepare(self):
return (self.json_data, self.status_code, self.content, self.headers, self.raise_error)

def json(self, object_pairs_hook):
return self.text

class TestRequestTimeoutValue(unittest.TestCase):
'''
Test that request timeout parameter works properly in various cases
'''
@mock.patch('tap_google_sheets.client.requests.Session.request', return_value = MockResponse("", status_code=200))
@mock.patch('tap_google_sheets.client.GoogleClient.get_access_token')
def test_config_provided_request_timeout(self, mock_get, mock_request):
"""
Unit tests to ensure that request timeout is set based on config value
"""
config = { "refresh_token": "dummy_token", "client_id": "dummy_client_id", "client_secret": "dummy_client_secret", "user_agent": "dummy_ua", "request_timeout": 100}
client = GoogleClient(**config)
client.request("GET", "dummy_path")

mock_request.assert_called_with('GET', 'https://sheets.googleapis.com/v4/dummy_path', headers={'Authorization': 'Bearer None', 'User-Agent': 'dummy_ua'}, timeout=100.0)

@mock.patch('tap_google_sheets.client.requests.Session.request', return_value = MockResponse("", status_code=200))
@mock.patch('tap_google_sheets.client.GoogleClient.get_access_token')
def test_default_value_request_timeout(self, mock_get, mock_request):
"""
Unit tests to ensure that request timeout is set based default value
"""
config = { "refresh_token": "dummy_token", "client_id": "dummy_client_id", "client_secret": "dummy_client_secret", "user_agent": "dummy_ua"}
client = GoogleClient(**config)
client.request("GET", "dummy_path")

mock_request.assert_called_with('GET', 'https://sheets.googleapis.com/v4/dummy_path', headers={'Authorization': 'Bearer None', 'User-Agent': 'dummy_ua'}, timeout=300.0)

@mock.patch('tap_google_sheets.client.requests.Session.request', return_value = MockResponse("", status_code=200))
@mock.patch('tap_google_sheets.client.GoogleClient.get_access_token')
def test_config_provided_empty_request_timeout(self, mock_get, mock_request):
"""
Unit tests to ensure that request timeout is set based on default value if empty value is given in config
"""
config = { "refresh_token": "dummy_token", "client_id": "dummy_client_id", "client_secret": "dummy_client_secret", "user_agent": "dummy_ua", "request_timeout": ""}
client = GoogleClient(**config)
client.request("GET", "dummy_path")

mock_request.assert_called_with('GET', 'https://sheets.googleapis.com/v4/dummy_path', headers={'Authorization': 'Bearer None', 'User-Agent': 'dummy_ua'}, timeout=300)

@mock.patch('tap_google_sheets.client.requests.Session.request', return_value = MockResponse("", status_code=200))
@mock.patch('tap_google_sheets.client.GoogleClient.get_access_token')
def test_config_provided_string_request_timeout(self, mock_get, mock_request):
"""
Unit tests to ensure that request timeout is set based on config string value
"""
config = { "refresh_token": "dummy_token", "client_id": "dummy_client_id", "client_secret": "dummy_client_secret", "user_agent": "dummy_ua", "request_timeout": "100"}
client = GoogleClient(**config)
client.request("GET", "dummy_path")

mock_request.assert_called_with('GET', 'https://sheets.googleapis.com/v4/dummy_path', headers={'Authorization': 'Bearer None', 'User-Agent': 'dummy_ua'}, timeout=100.0)

@mock.patch('tap_google_sheets.client.requests.Session.request', return_value = MockResponse("", status_code=200))
@mock.patch('tap_google_sheets.client.GoogleClient.get_access_token')
def test_config_provided_float_request_timeout(self, mock_get, mock_request):
"""
Unit tests to ensure that request timeout is set based on config float value
"""
config = { "refresh_token": "dummy_token", "client_id": "dummy_client_id", "client_secret": "dummy_client_secret", "user_agent": "dummy_ua", "request_timeout": 100.8}
client = GoogleClient(**config)
client.request("GET", "dummy_path")

mock_request.assert_called_with('GET', 'https://sheets.googleapis.com/v4/dummy_path', headers={'Authorization': 'Bearer None', 'User-Agent': 'dummy_ua'}, timeout=100.8)