From 7ff188666b86a660c179a2b08c2cc9fb54c65d11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= Date: Fri, 17 Jun 2022 11:08:51 -0500 Subject: [PATCH 1/9] feat: Implement paginators --- .../sample_tap_gitlab/gitlab_rest_streams.py | 36 +- singer_sdk/pagination.py | 417 ++++++++++++++++++ singer_sdk/streams/rest.py | 117 ++--- tests/core/rest/test_pagination.py | 339 ++++++++++++++ tests/core/test_streams.py | 36 +- tox.ini | 2 +- 6 files changed, 852 insertions(+), 95 deletions(-) create mode 100644 singer_sdk/pagination.py create mode 100644 tests/core/rest/test_pagination.py diff --git a/samples/sample_tap_gitlab/gitlab_rest_streams.py b/samples/sample_tap_gitlab/gitlab_rest_streams.py index 425348e6e..5c0bd44c3 100644 --- a/samples/sample_tap_gitlab/gitlab_rest_streams.py +++ b/samples/sample_tap_gitlab/gitlab_rest_streams.py @@ -1,11 +1,12 @@ """Sample tap stream test for tap-gitlab.""" -from pathlib import Path -from typing import Any, Dict, List, Optional, cast +from __future__ import annotations -import requests +from pathlib import Path +from typing import Any, cast from singer_sdk.authenticators import SimpleAuthenticator +from singer_sdk.pagination import SimpleHeaderPaginator from singer_sdk.streams.rest import RESTStream from singer_sdk.typing import ( ArrayType, @@ -21,7 +22,7 @@ DEFAULT_URL_BASE = "https://gitlab.com/api/v4" -class GitlabStream(RESTStream): +class GitlabStream(RESTStream[str]): """Sample tap test for gitlab.""" _LOG_REQUEST_METRIC_URLS = True @@ -39,8 +40,8 @@ def authenticator(self) -> SimpleAuthenticator: ) def get_url_params( - self, context: Optional[dict], next_page_token: Optional[Any] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: str | None + ) -> dict[str, Any]: """Return a dictionary of values to be used in URL parameterization.""" params: dict = {} if next_page_token: @@ -50,21 +51,20 @@ def get_url_params( params["order_by"] = self.replication_key return params - def get_next_page_token( - self, response: requests.Response, previous_token: Optional[Any] - ) -> Optional[Any]: - """Return token for identifying next page or None if not applicable.""" - next_page_token = response.headers.get("X-Next-Page", None) - if next_page_token: - self.logger.debug(f"Next page token retrieved: {next_page_token}") - return next_page_token + def get_new_paginator(self) -> SimpleHeaderPaginator: + """Return a new paginator for GitLab API endpoints. + + Returns: + A new paginator. + """ + return SimpleHeaderPaginator("X-Next-Page") class ProjectBasedStream(GitlabStream): """Base class for streams that are keys based on project ID.""" @property - def partitions(self) -> List[dict]: + def partitions(self) -> list[dict]: """Return a list of partition key dicts (if applicable), otherwise None.""" if "{project_id}" in self.path: return [ @@ -162,7 +162,7 @@ class EpicsStream(ProjectBasedStream): # schema_filepath = SCHEMAS_DIR / "epics.json" - def get_child_context(self, record: dict, context: Optional[dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: """Perform post processing, including queuing up any child stream types.""" # Ensure child state record(s) are created return { @@ -183,8 +183,8 @@ class EpicIssuesStream(GitlabStream): parent_stream_type = EpicsStream # Stream should wait for parents to complete. def get_url_params( - self, context: Optional[dict], next_page_token: Optional[Any] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: str | None + ) -> dict[str, Any]: """Return a dictionary of values to be used in parameterization.""" result = super().get_url_params(context, next_page_token) if not context or "epic_id" not in context: diff --git a/singer_sdk/pagination.py b/singer_sdk/pagination.py new file mode 100644 index 000000000..5ee106470 --- /dev/null +++ b/singer_sdk/pagination.py @@ -0,0 +1,417 @@ +"""Generic paginator classes.""" + +from __future__ import annotations + +import sys +from abc import ABCMeta, abstractmethod +from typing import Any, Generic, Iterable, Optional, TypeVar +from urllib.parse import ParseResult, urlparse + +from requests import Response + +from singer_sdk.helpers.jsonpath import extract_jsonpath + +if sys.version_info >= (3, 8): + from typing import Protocol +else: + from typing_extensions import Protocol + +T = TypeVar("T") +TPageToken = TypeVar("TPageToken") + + +def first(iterable: Iterable[T]) -> T: + """Return the first element of an iterable or raise an exception. + + Args: + iterable: An iterable. + + Returns: + The first element of the iterable. + + >>> first('ABC') + 'A' + """ + return next(iter(iterable)) + + +class BaseAPIPaginator(Generic[TPageToken], metaclass=ABCMeta): + """An API paginator object.""" + + def __init__( + self, + start_value: TPageToken, + *args: Any, + **kwargs: Any, + ) -> None: + """Create a new paginator. + + Args: + start_value: Initial value. + args: Paginator positional arguments. + kwargs: Paginator keyword arguments. + """ + self._value: TPageToken = start_value + self._page_count = 0 + self._finished = False + self._last_seen_record: dict | None = None + + @property + def current_value(self) -> TPageToken: + """Get the current pagination value. + + Returns: + Current page value. + """ + return self._value + + @property + def finished(self) -> bool: + """Get a flag that indicates if the last page of data has been reached. + + Returns: + True if there are no more pages. + """ + return self._finished + + @property + def count(self) -> int: + """Count the number of pages traversed so far. + + Returns: + Number of pages. + """ + return self._page_count + + def __str__(self) -> str: + """Stringify this object. + + Returns: + String representation. + """ + return f"{self.__class__.__name__}<{self.current_value}>" + + def __repr__(self) -> str: + """Stringify this object. + + Returns: + String representation. + """ + return str(self) + + def advance(self, response: Response) -> None: + """Get a new page value and advance the current one. + + Args: + response: API response object. + + Raises: + RuntimeError: If a loop in pagination is detected. That is, when two + consecutive pagination tokens are identical. + """ + self._page_count += 1 + + if not self.has_more(response): + self._finished = True + return + + new_value = self.get_next(response) + + if new_value and new_value == self._value: + raise RuntimeError( + f"Loop detected in pagination. " + f"Pagination token {new_value} is identical to prior token." + ) + + # Stop if new value None, empty string, 0, etc. + if not new_value: + self._finished = True + else: + self._value = new_value + + def has_more(self, response: Response) -> bool: + """Override this method to check if the endpoint has any pages left. + + Args: + response: API response object. + + Returns: + Boolean flag used to indicate if the endpoint has more pages. + """ + return True + + @abstractmethod + def get_next(self, response: Response) -> TPageToken | None: + """Get the next pagination token or index from the API response. + + Args: + response: API response object. + + Returns: + The next page token or index. Return `None` from this method to indicate + the end of pagination. + """ + ... + + +class SinglePagePaginator(BaseAPIPaginator[None]): + """A paginator that does works with single-page endpoints.""" + + def __init__(self) -> None: + """Create a new paginator.""" + super().__init__(None) + + def get_next(self, response: Response) -> None: + """Get the next pagination token or index from the API response. + + Args: + response: API response object. + + Returns: + The next page token or index. Return `None` from this method to indicate + the end of pagination. + """ + return None + + +class BaseHATEOASPaginator(BaseAPIPaginator[Optional[ParseResult]], metaclass=ABCMeta): + """Paginator class for APIs supporting HATEOAS links in their responses.""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + """Create a new paginator. + + Args: + args: Paginator positional arguments. + kwargs: Paginator keyword arguments. + """ + super().__init__(None, *args, **kwargs) + + @abstractmethod + def get_next_url(self, response: Response) -> str | None: + """Override this method to extract a HATEOAS link from the response. + + Args: + response: API response object. + """ + ... + + def get_next(self, response: Response) -> ParseResult | None: + """Get the next pagination token or index from the API response. + + Args: + response: API response object. + + Returns: + A parsed HATEOAS link if the response has one, otherwise `None`. + """ + next_url = self.get_next_url(response) + return urlparse(next_url) if next_url else None + + +class HeaderLinkPaginator(BaseHATEOASPaginator): + """Paginator class for APIs supporting HATEOAS links in their headers. + + Links: + - https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Link + - https://datatracker.ietf.org/doc/html/rfc8288#section-3 + """ + + def get_next_url(self, response: Response) -> str | None: + """Override this method to extract a HATEOAS link from the response. + + Args: + response: API response object. + + Returns: + A HATEOAS link parsed from the response headers. + """ + url: str | None = response.links.get("next", {}).get("url") + return url + + +class JSONPathPaginator(BaseAPIPaginator[Optional[str]]): + """Paginator class for APIs returning a pagination token in the response body.""" + + def __init__( + self, + jsonpath: str, + *args: Any, + **kwargs: Any, + ) -> None: + """Create a new paginator. + + Args: + jsonpath: A JSONPath expression. + args: Paginator positional arguments. + kwargs: Paginator keyword arguments. + """ + super().__init__(None) + self._jsonpath = jsonpath + + def get_next(self, response: Response) -> str | None: + """Get the next page token. + + Args: + response: API response object. + + Returns: + The next page token. + """ + all_matches = extract_jsonpath(self._jsonpath, response.json()) + return next(all_matches, None) + + +class SimpleHeaderPaginator(BaseAPIPaginator[Optional[str]]): + """Paginator class for APIs returning a pagination token in the response headers.""" + + def __init__( + self, + key: str, + *args: Any, + **kwargs: Any, + ) -> None: + """Create a new paginator. + + Args: + key: Header key that contains the next page token. + args: Paginator positional arguments. + kwargs: Paginator keyword arguments. + """ + super().__init__(None) + self._key = key + + def get_next(self, response: Response) -> str | None: + """Get the next page token. + + Args: + response: API response object. + + Returns: + The next page token. + """ + return response.headers.get(self._key, None) + + +class BasePageNumberPaginator(BaseAPIPaginator[int], metaclass=ABCMeta): + """Paginator class for APIs that use page number.""" + + @abstractmethod + def has_more(self, response: Response) -> bool: + """Override this method to check if the endpoint has any pages left. + + Args: + response: API response object. + + Returns: + Boolean flag used to indicate if the endpoint has more pages. + + """ + ... + + def get_next(self, response: Response) -> int | None: + """Get the next page number. + + Args: + response: API response object. + + Returns: + The next page number. + """ + return self._value + 1 + + +class BaseOffsetPaginator(BaseAPIPaginator[int], metaclass=ABCMeta): + """Paginator class for APIs that use page offset.""" + + def __init__( + self, + start_value: int, + page_size: int, + *args: Any, + **kwargs: Any, + ) -> None: + """Create a new paginator. + + Args: + start_value: Initial value. + page_size: Constant page size. + args: Paginator positional arguments. + kwargs: Paginator keyword arguments. + """ + super().__init__(start_value) + self._page_size = page_size + + @abstractmethod + def has_more(self, response: Response) -> bool: + """Override this method to check if the endpoint has any pages left. + + Args: + response: API response object. + + Returns: + Boolean flag used to indicate if the endpoint has more pages. + """ + ... + + def get_next(self, response: Response) -> int | None: + """Get the next page offset. + + Args: + response: API response object. + + Returns: + The next page offset. + """ + return self._value + self._page_size + + +class LegacyPaginatedStreamProtocol(Protocol[TPageToken]): + """Protocol for legacy paginated streams classes.""" + + def get_next_page_token( + self, + response: Response, + previous_token: TPageToken | None, + ) -> TPageToken | None: + """Get the next page token. + + Args: + response: API response object. + previous_token: Previous page token. + """ + ... + + +class LegacyStreamPaginator( + BaseAPIPaginator[Optional[TPageToken]], + Generic[TPageToken], +): + """Paginator that works with REST streams as they exist today.""" + + def __init__( + self, + stream: LegacyPaginatedStreamProtocol[TPageToken], + *args: Any, + **kwargs: Any, + ) -> None: + """Create a new paginator. + + Args: + stream: A RESTStream instance. + args: Paginator positional arguments. + kwargs: Paginator keyword arguments. + """ + super().__init__(None, *args, **kwargs) + self.stream = stream + + def get_next(self, response: Response) -> TPageToken | None: + """Get next page value by calling the stream method. + + Args: + response: API response object. + + Returns: + The next page token or index. Return `None` from this method to indicate + the end of pagination. + """ + return self.stream.get_next_page_token(response, self.current_value) diff --git a/singer_sdk/streams/rest.py b/singer_sdk/streams/rest.py index 752ff334c..a3975ebfa 100644 --- a/singer_sdk/streams/rest.py +++ b/singer_sdk/streams/rest.py @@ -1,21 +1,13 @@ """Abstract base class for API-type streams.""" +from __future__ import annotations + import abc import copy import logging from datetime import datetime -from typing import ( - Any, - Callable, - Dict, - Generator, - Generic, - Iterable, - List, - Optional, - TypeVar, - Union, -) +from typing import Any, Callable, Generator, Generic, Iterable, TypeVar +from warnings import warn import backoff import requests @@ -24,6 +16,12 @@ from singer_sdk.authenticators import APIAuthenticatorBase, SimpleAuthenticator from singer_sdk.exceptions import FatalAPIError, RetriableAPIError from singer_sdk.helpers.jsonpath import extract_jsonpath +from singer_sdk.pagination import ( + BaseAPIPaginator, + JSONPathPaginator, + LegacyStreamPaginator, + SimpleHeaderPaginator, +) from singer_sdk.plugin_base import PluginBase as TapBaseClass from singer_sdk.streams.core import Stream @@ -31,24 +29,25 @@ DEFAULT_REQUEST_TIMEOUT = 300 # 5 minutes _TToken = TypeVar("_TToken") +# _TPaginator = TypeVar("_TPaginator", bound=BaseAPIPaginator) class RESTStream(Stream, Generic[_TToken], metaclass=abc.ABCMeta): """Abstract base class for REST API streams.""" _page_size: int = DEFAULT_PAGE_SIZE - _requests_session: Optional[requests.Session] + _requests_session: requests.Session | None rest_method = "GET" #: JSONPath expression to extract records from the API response. records_jsonpath: str = "$[*]" #: Response code reference for rate limit retries - extra_retry_statuses: List[int] = [429] + extra_retry_statuses: list[int] = [429] #: Optional JSONPath expression to extract a pagination token from the API response. #: Example: `"$.next_page"` - next_page_token_jsonpath: Optional[str] = None + next_page_token_jsonpath: str | None = None # Private constants. May not be supported in future releases: _LOG_REQUEST_METRICS: bool = True @@ -64,9 +63,9 @@ def url_base(self) -> str: def __init__( self, tap: TapBaseClass, - name: Optional[str] = None, - schema: Optional[Union[Dict[str, Any], Schema]] = None, - path: Optional[str] = None, + name: str | None = None, + schema: dict[str, Any] | Schema | None = None, + path: str | None = None, ) -> None: """Initialize the REST stream. @@ -85,7 +84,7 @@ def __init__( self._next_page_token_compiled_jsonpath = None @staticmethod - def _url_encode(val: Union[str, datetime, bool, int, List[str]]) -> str: + def _url_encode(val: str | datetime | bool | int | list[str]) -> str: """Encode the val argument as url-compatible string. Args: @@ -100,7 +99,7 @@ def _url_encode(val: Union[str, datetime, bool, int, List[str]]) -> str: result = str(val) return result - def get_url(self, context: Optional[dict]) -> str: + def get_url(self, context: dict | None) -> str: """Get stream entity URL. Developers override this method to perform dynamic URL generation. @@ -223,7 +222,7 @@ def request_decorator(self, func: Callable) -> Callable: return decorator def _request( - self, prepared_request: requests.PreparedRequest, context: Optional[dict] + self, prepared_request: requests.PreparedRequest, context: dict | None ) -> requests.Response: """TODO. @@ -250,8 +249,8 @@ def _request( return response def get_url_params( - self, context: Optional[dict], next_page_token: Optional[_TToken] - ) -> Dict[str, Any]: + self, context: dict | None, next_page_token: _TToken | None + ) -> dict[str, Any]: """Return a dictionary of values to be used in URL parameterization. If paging is supported, developers may override with specific paging logic. @@ -267,7 +266,7 @@ def get_url_params( return {} def prepare_request( - self, context: Optional[dict], next_page_token: Optional[_TToken] + self, context: dict | None, next_page_token: _TToken | None ) -> requests.PreparedRequest: """Prepare a request object. @@ -306,7 +305,7 @@ def prepare_request( ) return request - def request_records(self, context: Optional[dict]) -> Iterable[dict]: + def request_records(self, context: dict | None) -> Iterable[dict]: """Request records from REST endpoint(s), returning response records. If pagination is detected, pages will be recursed automatically. @@ -316,38 +315,25 @@ def request_records(self, context: Optional[dict]) -> Iterable[dict]: Yields: An item for every record in the response. - - Raises: - RuntimeError: If a loop in pagination is detected. That is, when two - consecutive pagination tokens are identical. """ - next_page_token: Optional[_TToken] = None - finished = False + paginator = self.get_new_paginator() decorated_request = self.request_decorator(self._request) - while not finished: + while not paginator.finished: prepared_request = self.prepare_request( - context, next_page_token=next_page_token + context, + next_page_token=paginator.current_value, ) resp = decorated_request(prepared_request, context) yield from self.parse_response(resp) - previous_token = copy.deepcopy(next_page_token) - next_page_token = self.get_next_page_token( - response=resp, previous_token=previous_token - ) - if next_page_token and next_page_token == previous_token: - raise RuntimeError( - f"Loop detected in pagination. " - f"Pagination token {next_page_token} is identical to prior token." - ) - # Cycle until get_next_page_token() no longer returns a value - finished = not next_page_token + + paginator.advance(resp) # Overridable: def prepare_request_payload( - self, context: Optional[dict], next_page_token: Optional[_TToken] - ) -> Optional[dict]: + self, context: dict | None, next_page_token: _TToken | None + ) -> dict | None: """Prepare the data payload for the REST API request. By default, no payload will be sent (return None). @@ -366,33 +352,24 @@ def prepare_request_payload( """ return None - def get_next_page_token( - self, - response: requests.Response, - previous_token: Optional[_TToken], - ) -> Optional[_TToken]: - """Return token identifying next page or None if all records have been read. - - Args: - response: A raw `requests.Response`_ object. - previous_token: Previous pagination reference. + def get_new_paginator(self) -> BaseAPIPaginator: + """Get a fresh paginator for this API endpoint. Returns: - Reference value to retrieve next page. - - .. _requests.Response: - https://docs.python-requests.org/en/latest/api/#requests.Response + A paginator instance. """ - if self.next_page_token_jsonpath: - all_matches = extract_jsonpath( - self.next_page_token_jsonpath, response.json() + if hasattr(self, "get_next_page_token"): + warn( + "`RESTStream.get_next_page_token` is deprecated and will not be used " + + "in a future version of the Meltano SDK. " + + "Override `RESTStream.get_new_paginator` instead.", + DeprecationWarning, ) - first_match = next(iter(all_matches), None) - next_page_token = first_match + return LegacyStreamPaginator(self) # type: ignore + elif self.next_page_token_jsonpath: + return JSONPathPaginator(self.next_page_token_jsonpath) else: - next_page_token = response.headers.get("X-Next-Page", None) - - return next_page_token + return SimpleHeaderPaginator("X-Next-Page") @property def http_headers(self) -> dict: @@ -422,7 +399,7 @@ def timeout(self) -> int: # Records iterator - def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]: + def get_records(self, context: dict | None) -> Iterable[dict[str, Any]]: """Return a generator of row-type dictionary objects. Each row emitted should be a dictionary of property names to their values. @@ -457,7 +434,7 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: # Abstract methods: @property - def authenticator(self) -> Optional[APIAuthenticatorBase]: + def authenticator(self) -> APIAuthenticatorBase | None: """Return or set the authenticator for managing HTTP auth headers. If an authenticator is not specified, REST-based taps will simply pass diff --git a/tests/core/rest/test_pagination.py b/tests/core/rest/test_pagination.py new file mode 100644 index 000000000..ca9e856fd --- /dev/null +++ b/tests/core/rest/test_pagination.py @@ -0,0 +1,339 @@ +"""Tests generic paginator classes.""" + +from __future__ import annotations + +import json + +import pytest +from requests import Response + +from singer_sdk.helpers.jsonpath import extract_jsonpath +from singer_sdk.pagination import ( + BaseAPIPaginator, + BaseHATEOASPaginator, + BaseOffsetPaginator, + BasePageNumberPaginator, + HeaderLinkPaginator, + JSONPathPaginator, + SimpleHeaderPaginator, + SinglePagePaginator, + first, +) + + +def test_paginator_base_missing_implementation(): + """Validate that `BaseAPIPaginator` implementation requires `get_next`.""" + + with pytest.raises( + TypeError, + match="Can't instantiate abstract class .* get_next", + ): + BaseAPIPaginator(0) + + +def test_single_page_paginator(): + """Validate single page paginator.""" + + response = Response() + paginator = SinglePagePaginator() + assert not paginator.finished + assert paginator.current_value is None + assert paginator.count == 0 + + paginator.advance(response) + assert paginator.finished + assert paginator.current_value is None + assert paginator.count == 1 + + +def test_paginator_page_number_missing_implementation(): + """Validate that `BasePageNumberPaginator` implementation requires `has_more`.""" + + with pytest.raises( + TypeError, + match="Can't instantiate abstract class .* has_more", + ): + BasePageNumberPaginator(1) + + +def test_paginator_offset_missing_implementation(): + """Validate that `BaseOffsetPaginator` implementation requires `has_more`.""" + + with pytest.raises( + TypeError, + match="Can't instantiate abstract class .* has_more", + ): + BaseOffsetPaginator(0, 100) + + +def test_paginator_hateoas_missing_implementation(): + """Validate that `BaseHATEOASPaginator` implementation requires `get_next_url`.""" + + with pytest.raises( + TypeError, + match="Can't instantiate abstract class .* get_next_url", + ): + BaseHATEOASPaginator() + + +def test_paginator_loop(): + """Validate paginator that uses the page number.""" + + response = Response() + paginator = JSONPathPaginator(jsonpath="$.nextPageToken") + assert not paginator.finished + assert paginator.current_value is None + assert paginator.count == 0 + + response._content = b'{"nextPageToken": "abc"}' + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value == "abc" + assert paginator.count == 1 + + response._content = b'{"nextPageToken": "abc"}' + with pytest.raises(RuntimeError, match="Loop detected in pagination"): + paginator.advance(response) + + +def test_paginator_page_number(): + """Validate paginator that uses the page number.""" + + class _TestPageNumberPaginator(BasePageNumberPaginator): + def has_more(self, response: Response) -> bool: + return response.json()["hasMore"] + + has_more_response = b'{"hasMore": true}' + no_more_response = b'{"hasMore": false}' + + response = Response() + paginator = _TestPageNumberPaginator(0) + assert not paginator.finished + assert paginator.current_value == 0 + assert paginator.count == 0 + + response._content = has_more_response + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value == 1 + assert paginator.count == 1 + + response._content = has_more_response + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value == 2 + assert paginator.count == 2 + + response._content = no_more_response + paginator.advance(response) + assert paginator.finished + assert paginator.count == 3 + + +def test_paginator_offset(): + """Validate paginator that uses the page offset.""" + + class _TestOffsetPaginator(BaseOffsetPaginator): + def __init__( + self, + start_value: int, + page_size: int, + records_jsonpath: str, + ) -> None: + super().__init__(start_value, page_size) + self._records_jsonpath = records_jsonpath + + def has_more(self, response: Response) -> bool: + """Check if response has any records. + + Args: + response: API response object. + + Returns: + Boolean flag used to indicate if the endpoint has more pages. + """ + try: + first( + extract_jsonpath( + self._records_jsonpath, + response.json(), + ) + ) + except StopIteration: + return False + + return True + + response = Response() + paginator = _TestOffsetPaginator(0, 2, "$[*]") + assert not paginator.finished + assert paginator.current_value == 0 + assert paginator.count == 0 + + response._content = b'[{"id": 1}, {"id": 2}]' + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value == 2 + assert paginator.count == 1 + + response._content = b'[{"id": 3}]' + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value == 4 + assert paginator.count == 2 + + response._content = b"[]" + paginator.advance(response) + assert paginator.finished + assert paginator.count == 3 + + +def test_paginator_jsonpath(): + """Validate paginator that uses JSONPath.""" + + response = Response() + paginator = JSONPathPaginator(jsonpath="$.nextPageToken") + assert not paginator.finished + assert paginator.current_value is None + assert paginator.count == 0 + + response._content = b'{"nextPageToken": "abc"}' + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value == "abc" + assert paginator.count == 1 + + response._content = b'{"nextPageToken": null}' + paginator.advance(response) + assert paginator.finished + assert paginator.count == 2 + + +def test_paginator_header(): + """Validate paginator that uses response headers.""" + + key = "X-Next-Page" + response = Response() + paginator = SimpleHeaderPaginator(key=key) + assert not paginator.finished + assert paginator.current_value is None + assert paginator.count == 0 + + response.headers[key] = "abc" + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value == "abc" + assert paginator.count == 1 + + response.headers[key] = None + paginator.advance(response) + assert paginator.finished + assert paginator.count == 2 + + +def test_paginator_header_links(): + """Validate paginator that uses HATEOAS links.""" + + api_hostname = "my.api.test" + resource_path = "/path/to/resource" + + response = Response() + paginator = HeaderLinkPaginator() + assert not paginator.finished + assert paginator.current_value is None + assert paginator.count == 0 + + response.headers.update( + {"Link": f"; rel=next"}, + ) + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value.hostname == api_hostname + assert paginator.current_value.path == resource_path + assert paginator.current_value.query == "page=2&limit=100" + assert paginator.count == 1 + + response.headers.update( + { + "Link": ( + f";rel=next," + f";rel=back" + ) + }, + ) + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value.hostname == api_hostname + assert paginator.current_value.path == resource_path + assert paginator.current_value.query == "page=3&limit=100" + assert paginator.count == 2 + + response.headers.update( + {"Link": ";rel=back"}, + ) + paginator.advance(response) + assert paginator.finished + assert paginator.count == 3 + + +def test_paginator_custom_hateoas(): + """Validate paginator that uses HATEOAS links.""" + + class _CustomHATEOASPaginator(BaseHATEOASPaginator): + def get_next_url(self, response: Response) -> str | None: + """Get a parsed HATEOAS link for the next, if the response has one.""" + + try: + return first( + extract_jsonpath( + "$.links[?(@.rel=='next')].href", + response.json(), + ) + ) + except StopIteration: + return None + + resource_path = "/path/to/resource" + + response = Response() + paginator = _CustomHATEOASPaginator() + assert not paginator.finished + assert paginator.current_value is None + assert paginator.count == 0 + + response._content = json.dumps( + { + "links": [ + { + "rel": "next", + "href": f"{resource_path}?page=2&limit=100", + } + ] + } + ).encode() + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value.path == resource_path + assert paginator.current_value.query == "page=2&limit=100" + assert paginator.count == 1 + + response._content = json.dumps( + { + "links": [ + { + "rel": "next", + "href": f"{resource_path}?page=3&limit=100", + } + ] + } + ).encode() + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value.path == resource_path + assert paginator.current_value.query == "page=3&limit=100" + assert paginator.count == 2 + + response._content = json.dumps({"links": []}).encode() + paginator.advance(response) + assert paginator.finished + assert paginator.count == 3 diff --git a/tests/core/test_streams.py b/tests/core/test_streams.py index 8e1d44030..e63520301 100644 --- a/tests/core/test_streams.py +++ b/tests/core/test_streams.py @@ -1,13 +1,16 @@ """Stream tests.""" -from typing import Any, Dict, Iterable, List, Optional, cast +from __future__ import annotations + +from typing import Any, Iterable, cast import pendulum import pytest import requests from singer_sdk.helpers._classproperty import classproperty -from singer_sdk.helpers.jsonpath import _compile_jsonpath +from singer_sdk.helpers.jsonpath import _compile_jsonpath, extract_jsonpath +from singer_sdk.pagination import first from singer_sdk.streams.core import ( REPLICATION_FULL_TABLE, REPLICATION_INCREMENTAL, @@ -40,7 +43,7 @@ def __init__(self, tap: Tap): """Create a new stream.""" super().__init__(tap, schema=self.schema, name=self.name) - def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]: + def get_records(self, context: dict | None) -> Iterable[dict[str, Any]]: """Generate records.""" yield {"id": 1, "value": "Egypt"} yield {"id": 2, "value": "Germany"} @@ -59,6 +62,24 @@ class RestTestStream(RESTStream): ).to_dict() replication_key = "updatedAt" + def get_next_page_token( + self, + response: requests.Response, + previous_token: str | None, + ) -> str | None: + if self.next_page_token_jsonpath: + all_matches = extract_jsonpath( + self.next_page_token_jsonpath, + response.json(), + ) + try: + return first(all_matches) + except StopIteration: + return None + + else: + return response.headers.get("X-Next-Page", None) + class GraphqlTestStream(GraphQLStream): """Test Graphql stream class.""" @@ -79,7 +100,7 @@ class SimpleTestTap(Tap): name = "test-tap" settings_jsonschema = PropertiesList(Property("start_date", DateTimeType)).to_dict() - def discover_streams(self) -> List[Stream]: + def discover_streams(self) -> list[Stream]: """List all streams.""" return [SimpleTestStream(self)] @@ -213,7 +234,7 @@ def test_stream_starting_timestamp(tap: SimpleTestTap, stream: SimpleTestStream) ], ) def test_jsonpath_rest_stream( - tap: SimpleTestTap, path: str, content: str, result: List[dict] + tap: SimpleTestTap, path: str, content: str, result: list[dict] ): """Validate records are extracted correctly from the API response.""" fake_response = requests.Response() @@ -344,7 +365,10 @@ def test_next_page_token_jsonpath( RestTestStream.next_page_token_jsonpath = path stream = RestTestStream(tap) - next_page = stream.get_next_page_token(fake_response, previous_token=None) + with pytest.warns(DeprecationWarning): + paginator = stream.get_new_paginator() + + next_page = paginator.get_next(fake_response) assert next_page == result diff --git a/tox.ini b/tox.ini index c8d7ee3ad..fb60961c2 100644 --- a/tox.ini +++ b/tox.ini @@ -57,7 +57,7 @@ max-line-length = 88 exclude = cookiecutter per-file-ignores = # Don't require docstrings or type annotations in tests - tests/*:D100,D102,D103,DAR,ANN + tests/*:D100,D102,D103,D202,DAR,ANN # Don't require docstrings conventions or type annotations in SDK samples samples/*:ANN,DAR # Don't require docstrings conventions or type annotations in private modules From 029410d608a89f7c349dd4af8f05d046d3ce9047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= Date: Fri, 17 Jun 2022 11:41:36 -0500 Subject: [PATCH 2/9] docs: Add documentation for new implementation --- ...singer_sdk.pagination.BaseAPIPaginator.rst | 7 ++ ...er_sdk.pagination.BaseHATEOASPaginator.rst | 7 ++ ...ger_sdk.pagination.BaseOffsetPaginator.rst | 7 ++ ...sdk.pagination.BasePageNumberPaginator.rst | 7 ++ ...ger_sdk.pagination.HeaderLinkPaginator.rst | 7 ++ ...inger_sdk.pagination.JSONPathPaginator.rst | 7 ++ ...gination.LegacyPaginatedStreamProtocol.rst | 7 ++ ...r_sdk.pagination.LegacyStreamPaginator.rst | 7 ++ ...r_sdk.pagination.SimpleHeaderPaginator.rst | 7 ++ ...ger_sdk.pagination.SinglePagePaginator.rst | 7 ++ docs/porting.md | 68 ++++++++++++++++++- docs/reference.rst | 19 ++++++ 12 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 docs/classes/singer_sdk.pagination.BaseAPIPaginator.rst create mode 100644 docs/classes/singer_sdk.pagination.BaseHATEOASPaginator.rst create mode 100644 docs/classes/singer_sdk.pagination.BaseOffsetPaginator.rst create mode 100644 docs/classes/singer_sdk.pagination.BasePageNumberPaginator.rst create mode 100644 docs/classes/singer_sdk.pagination.HeaderLinkPaginator.rst create mode 100644 docs/classes/singer_sdk.pagination.JSONPathPaginator.rst create mode 100644 docs/classes/singer_sdk.pagination.LegacyPaginatedStreamProtocol.rst create mode 100644 docs/classes/singer_sdk.pagination.LegacyStreamPaginator.rst create mode 100644 docs/classes/singer_sdk.pagination.SimpleHeaderPaginator.rst create mode 100644 docs/classes/singer_sdk.pagination.SinglePagePaginator.rst diff --git a/docs/classes/singer_sdk.pagination.BaseAPIPaginator.rst b/docs/classes/singer_sdk.pagination.BaseAPIPaginator.rst new file mode 100644 index 000000000..f44e8d617 --- /dev/null +++ b/docs/classes/singer_sdk.pagination.BaseAPIPaginator.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.BaseAPIPaginator +====================================== + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: BaseAPIPaginator + :members: \ No newline at end of file diff --git a/docs/classes/singer_sdk.pagination.BaseHATEOASPaginator.rst b/docs/classes/singer_sdk.pagination.BaseHATEOASPaginator.rst new file mode 100644 index 000000000..b5a6f2b5e --- /dev/null +++ b/docs/classes/singer_sdk.pagination.BaseHATEOASPaginator.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.BaseHATEOASPaginator +========================================== + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: BaseHATEOASPaginator + :members: \ No newline at end of file diff --git a/docs/classes/singer_sdk.pagination.BaseOffsetPaginator.rst b/docs/classes/singer_sdk.pagination.BaseOffsetPaginator.rst new file mode 100644 index 000000000..ea7370d5b --- /dev/null +++ b/docs/classes/singer_sdk.pagination.BaseOffsetPaginator.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.BaseOffsetPaginator +========================================= + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: BaseOffsetPaginator + :members: \ No newline at end of file diff --git a/docs/classes/singer_sdk.pagination.BasePageNumberPaginator.rst b/docs/classes/singer_sdk.pagination.BasePageNumberPaginator.rst new file mode 100644 index 000000000..d33515ec3 --- /dev/null +++ b/docs/classes/singer_sdk.pagination.BasePageNumberPaginator.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.BasePageNumberPaginator +============================================= + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: BasePageNumberPaginator + :members: \ No newline at end of file diff --git a/docs/classes/singer_sdk.pagination.HeaderLinkPaginator.rst b/docs/classes/singer_sdk.pagination.HeaderLinkPaginator.rst new file mode 100644 index 000000000..f651a4116 --- /dev/null +++ b/docs/classes/singer_sdk.pagination.HeaderLinkPaginator.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.HeaderLinkPaginator +========================================= + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: HeaderLinkPaginator + :members: \ No newline at end of file diff --git a/docs/classes/singer_sdk.pagination.JSONPathPaginator.rst b/docs/classes/singer_sdk.pagination.JSONPathPaginator.rst new file mode 100644 index 000000000..59be0a5bf --- /dev/null +++ b/docs/classes/singer_sdk.pagination.JSONPathPaginator.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.JSONPathPaginator +======================================= + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: JSONPathPaginator + :members: \ No newline at end of file diff --git a/docs/classes/singer_sdk.pagination.LegacyPaginatedStreamProtocol.rst b/docs/classes/singer_sdk.pagination.LegacyPaginatedStreamProtocol.rst new file mode 100644 index 000000000..7a3f529de --- /dev/null +++ b/docs/classes/singer_sdk.pagination.LegacyPaginatedStreamProtocol.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.LegacyPaginatedStreamProtocol +=================================================== + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: LegacyPaginatedStreamProtocol + :members: \ No newline at end of file diff --git a/docs/classes/singer_sdk.pagination.LegacyStreamPaginator.rst b/docs/classes/singer_sdk.pagination.LegacyStreamPaginator.rst new file mode 100644 index 000000000..9d96831cd --- /dev/null +++ b/docs/classes/singer_sdk.pagination.LegacyStreamPaginator.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.LegacyStreamPaginator +=========================================== + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: LegacyStreamPaginator + :members: \ No newline at end of file diff --git a/docs/classes/singer_sdk.pagination.SimpleHeaderPaginator.rst b/docs/classes/singer_sdk.pagination.SimpleHeaderPaginator.rst new file mode 100644 index 000000000..4fce626e3 --- /dev/null +++ b/docs/classes/singer_sdk.pagination.SimpleHeaderPaginator.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.SimpleHeaderPaginator +=========================================== + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: SimpleHeaderPaginator + :members: \ No newline at end of file diff --git a/docs/classes/singer_sdk.pagination.SinglePagePaginator.rst b/docs/classes/singer_sdk.pagination.SinglePagePaginator.rst new file mode 100644 index 000000000..dc15a5d74 --- /dev/null +++ b/docs/classes/singer_sdk.pagination.SinglePagePaginator.rst @@ -0,0 +1,7 @@ +singer_sdk.pagination.SinglePagePaginator +========================================= + +.. currentmodule:: singer_sdk.pagination + +.. autoclass:: SinglePagePaginator + :members: \ No newline at end of file diff --git a/docs/porting.md b/docs/porting.md index 91e27796f..1c884435b 100644 --- a/docs/porting.md +++ b/docs/porting.md @@ -103,10 +103,76 @@ _Important: If you've gotten this far, this is a good time to commit your code b Pagination is generally unique for almost every API. There's no single method that solves for very different API's approach to pagination. -Most likely you will use `get_next_page_token` to parse and return whatever the "next page" token is for your source, and you'll use `get_url_params` to define how to pass the "next page" token back to the API when asking for subsequent pages. +Most likely you will use [get_new_paginator](singer_sdk.RESTStream.get_new_paginator) to instantiate a [pagination class](./classes/singer_sdk.pagination.BaseAPIPaginator) for your source, and you'll use `get_url_params` to define how to pass the "next page" token back to the API when asking for subsequent pages. When you think you have it right, run `poetry run tap-mysource` again, and debug until you are confident the result is including multiple pages back from the API. +You can also add unit tests for your pagination implementation for additional confidence: + +```python +from singer_sdk.pagination import BaseHATEOASPaginator, first + + +class CustomHATEOASPaginator(BaseHATEOASPaginator): + def get_next_url(self, response: Response) -> str | None: + """Get a parsed HATEOAS link for the next, if the response has one.""" + + try: + return first( + extract_jsonpath("$.links[?(@.rel=='next')].href", response.json()) + ) + except StopIteration: + return None + + +def test_paginator_custom_hateoas(): + """Validate paginator that my custom paginator.""" + + resource_path = "/path/to/resource" + response = Response() + paginator = CustomHATEOASPaginator() + assert not paginator.finished + assert paginator.current_value is None + assert paginator.count == 0 + + response._content = json.dumps( + { + "links": [ + { + "rel": "next", + "href": f"{resource_path}?page=2&limit=100", + } + ] + } + ).encode() + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value.path == resource_path + assert paginator.current_value.query == "page=2&limit=100" + assert paginator.count == 1 + + response._content = json.dumps( + { + "links": [ + { + "rel": "next", + "href": f"{resource_path}?page=3&limit=100", + } + ] + } + ).encode() + paginator.advance(response) + assert not paginator.finished + assert paginator.current_value.path == resource_path + assert paginator.current_value.query == "page=3&limit=100" + assert paginator.count == 2 + + response._content = json.dumps({"links": []}).encode() + paginator.advance(response) + assert paginator.finished + assert paginator.count == 3 +``` + Note: Depending on how well the API is designed, this could take 5 minutes or multiple hours. If you need help, sometimes [PostMan](https://postman.com) or [Thunder Client](https://marketplace.visualstudio.com/items?itemName=rangav.vscode-thunder-client) can be helpful in debugging the APIs specific quirks. ## Run pytest diff --git a/docs/reference.rst b/docs/reference.rst index 1641d357c..09e7ec51b 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -88,3 +88,22 @@ JSON Schema builder classes :template: module.rst typing + + +Pagination +---------- + +.. autosummary:: + :toctree: classes + :template: class.rst + + pagination.BaseAPIPaginator + pagination.SinglePagePaginator + pagination.BaseHATEOASPaginator + pagination.HeaderLinkPaginator + pagination.JSONPathPaginator + pagination.SimpleHeaderPaginator + pagination.BasePageNumberPaginator + pagination.BaseOffsetPaginator + pagination.LegacyPaginatedStreamProtocol + pagination.LegacyStreamPaginator From 0b191b2012a59554d79ed321b73a7f3e732d0e7b Mon Sep 17 00:00:00 2001 From: "Edgar R. M" Date: Fri, 17 Jun 2022 14:11:57 -0500 Subject: [PATCH 3/9] Update singer_sdk/pagination.py --- singer_sdk/pagination.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/singer_sdk/pagination.py b/singer_sdk/pagination.py index 5ee106470..3a7920a3e 100644 --- a/singer_sdk/pagination.py +++ b/singer_sdk/pagination.py @@ -175,7 +175,14 @@ def get_next(self, response: Response) -> None: class BaseHATEOASPaginator(BaseAPIPaginator[Optional[ParseResult]], metaclass=ABCMeta): - """Paginator class for APIs supporting HATEOAS links in their responses.""" + """Paginator class for APIs supporting HATEOAS links in their response bodies. + + HATEOAS stands for "Hypermedia as the Engine of Application State". See + https://en.wikipedia.org/wiki/HATEOAS. + + This paginator expects responses to have a key "next" with a value + like "https://api.com/link/to/next-item". + """ def __init__(self, *args: Any, **kwargs: Any) -> None: """Create a new paginator. From 259cf1af18c32d82e77335c24045110d68223e1e Mon Sep 17 00:00:00 2001 From: "Edgar R. M" Date: Fri, 17 Jun 2022 14:12:10 -0500 Subject: [PATCH 4/9] Update docs/porting.md Co-authored-by: Aaron ("AJ") Steers --- docs/porting.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/porting.md b/docs/porting.md index 1c884435b..856f18332 100644 --- a/docs/porting.md +++ b/docs/porting.md @@ -114,6 +114,12 @@ from singer_sdk.pagination import BaseHATEOASPaginator, first class CustomHATEOASPaginator(BaseHATEOASPaginator): + """Paginator for HATEOAS APIs - or "Hypermedia as the Engine of Application State". + + This paginator expects responses to have a key "next" with a value + like "https://api.com/link/to/next-item". + """" + def get_next_url(self, response: Response) -> str | None: """Get a parsed HATEOAS link for the next, if the response has one.""" From d1ce155eea52bf3525e127d0fc1fd1d37f73535f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= Date: Fri, 17 Jun 2022 14:20:04 -0500 Subject: [PATCH 5/9] Make linter happy --- singer_sdk/pagination.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singer_sdk/pagination.py b/singer_sdk/pagination.py index 3a7920a3e..01ca30f87 100644 --- a/singer_sdk/pagination.py +++ b/singer_sdk/pagination.py @@ -176,10 +176,10 @@ def get_next(self, response: Response) -> None: class BaseHATEOASPaginator(BaseAPIPaginator[Optional[ParseResult]], metaclass=ABCMeta): """Paginator class for APIs supporting HATEOAS links in their response bodies. - + HATEOAS stands for "Hypermedia as the Engine of Application State". See https://en.wikipedia.org/wiki/HATEOAS. - + This paginator expects responses to have a key "next" with a value like "https://api.com/link/to/next-item". """ From 1a38579781f139af157d083dc0edb5cd65870b2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= Date: Fri, 17 Jun 2022 14:26:17 -0500 Subject: [PATCH 6/9] Make pre-commit happy --- docs/porting.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/porting.md b/docs/porting.md index 856f18332..05b88a582 100644 --- a/docs/porting.md +++ b/docs/porting.md @@ -114,11 +114,11 @@ from singer_sdk.pagination import BaseHATEOASPaginator, first class CustomHATEOASPaginator(BaseHATEOASPaginator): - """Paginator for HATEOAS APIs - or "Hypermedia as the Engine of Application State". - - This paginator expects responses to have a key "next" with a value - like "https://api.com/link/to/next-item". - """" + """Paginator for HATEOAS APIs - or "Hypermedia as the Engine of Application State". + + This paginator expects responses to have a key "next" with a value + like "https://api.com/link/to/next-item". + """" def get_next_url(self, response: Response) -> str | None: """Get a parsed HATEOAS link for the next, if the response has one.""" From 2c5c8427da9a2713688f0ef1bb5f52aea34e83bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= Date: Fri, 17 Jun 2022 15:50:31 -0500 Subject: [PATCH 7/9] Use args and kwargs for base class --- singer_sdk/pagination.py | 42 ++++++++++++++---------------- tests/core/rest/test_pagination.py | 5 +++- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/singer_sdk/pagination.py b/singer_sdk/pagination.py index 01ca30f87..f546bfeb2 100644 --- a/singer_sdk/pagination.py +++ b/singer_sdk/pagination.py @@ -38,18 +38,11 @@ def first(iterable: Iterable[T]) -> T: class BaseAPIPaginator(Generic[TPageToken], metaclass=ABCMeta): """An API paginator object.""" - def __init__( - self, - start_value: TPageToken, - *args: Any, - **kwargs: Any, - ) -> None: + def __init__(self, start_value: TPageToken) -> None: """Create a new paginator. Args: start_value: Initial value. - args: Paginator positional arguments. - kwargs: Paginator keyword arguments. """ self._value: TPageToken = start_value self._page_count = 0 @@ -157,9 +150,14 @@ def get_next(self, response: Response) -> TPageToken | None: class SinglePagePaginator(BaseAPIPaginator[None]): """A paginator that does works with single-page endpoints.""" - def __init__(self) -> None: - """Create a new paginator.""" - super().__init__(None) + def __init__(self, *args: Any, **kwargs: Any) -> None: + """Create a new paginator. + + Args: + args: Paginator positional arguments for base class. + kwargs: Paginator keyword arguments for base class. + """ + super().__init__(None, *args, **kwargs) def get_next(self, response: Response) -> None: """Get the next pagination token or index from the API response. @@ -188,8 +186,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: """Create a new paginator. Args: - args: Paginator positional arguments. - kwargs: Paginator keyword arguments. + args: Paginator positional arguments for base class. + kwargs: Paginator keyword arguments for base class. """ super().__init__(None, *args, **kwargs) @@ -249,10 +247,10 @@ def __init__( Args: jsonpath: A JSONPath expression. - args: Paginator positional arguments. - kwargs: Paginator keyword arguments. + args: Paginator positional arguments for base class. + kwargs: Paginator keyword arguments for base class. """ - super().__init__(None) + super().__init__(None, *args, **kwargs) self._jsonpath = jsonpath def get_next(self, response: Response) -> str | None: @@ -281,10 +279,10 @@ def __init__( Args: key: Header key that contains the next page token. - args: Paginator positional arguments. - kwargs: Paginator keyword arguments. + args: Paginator positional arguments for base class. + kwargs: Paginator keyword arguments for base class. """ - super().__init__(None) + super().__init__(None, *args, **kwargs) self._key = key def get_next(self, response: Response) -> str | None: @@ -345,7 +343,7 @@ def __init__( args: Paginator positional arguments. kwargs: Paginator keyword arguments. """ - super().__init__(start_value) + super().__init__(start_value, *args, **kwargs) self._page_size = page_size @abstractmethod @@ -405,8 +403,8 @@ def __init__( Args: stream: A RESTStream instance. - args: Paginator positional arguments. - kwargs: Paginator keyword arguments. + args: Paginator positional arguments for base class. + kwargs: Paginator keyword arguments for base class. """ super().__init__(None, *args, **kwargs) self.stream = stream diff --git a/tests/core/rest/test_pagination.py b/tests/core/rest/test_pagination.py index ca9e856fd..04f27169d 100644 --- a/tests/core/rest/test_pagination.py +++ b/tests/core/rest/test_pagination.py @@ -3,6 +3,7 @@ from __future__ import annotations import json +from typing import Any import pytest from requests import Response @@ -139,8 +140,10 @@ def __init__( start_value: int, page_size: int, records_jsonpath: str, + *args: Any, + **kwargs: Any, ) -> None: - super().__init__(start_value, page_size) + super().__init__(start_value, page_size, *args, **kwargs) self._records_jsonpath = records_jsonpath def has_more(self, response: Response) -> bool: From 1da104dbaf28d661233afd339993444c7a685d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= Date: Fri, 17 Jun 2022 16:10:50 -0500 Subject: [PATCH 8/9] Full coverage for new code --- singer_sdk/pagination.py | 2 +- tests/core/rest/test_pagination.py | 12 ++++++++++++ tox.ini | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/singer_sdk/pagination.py b/singer_sdk/pagination.py index f546bfeb2..8e294c3a5 100644 --- a/singer_sdk/pagination.py +++ b/singer_sdk/pagination.py @@ -384,7 +384,7 @@ def get_next_page_token( response: API response object. previous_token: Previous page token. """ - ... + ... # pragma: no cover class LegacyStreamPaginator( diff --git a/tests/core/rest/test_pagination.py b/tests/core/rest/test_pagination.py index 04f27169d..c61124610 100644 --- a/tests/core/rest/test_pagination.py +++ b/tests/core/rest/test_pagination.py @@ -77,6 +77,18 @@ def test_paginator_hateoas_missing_implementation(): BaseHATEOASPaginator() +def test_paginator_attributes(): + """Validate paginator that uses the page number.""" + + response = Response() + paginator = JSONPathPaginator(jsonpath="$.nextPageToken") + assert str(paginator) == "JSONPathPaginator" + + response._content = b'{"nextPageToken": "abc"}' + paginator.advance(response) + assert str(paginator) == "JSONPathPaginator" + + def test_paginator_loop(): """Validate paginator that uses the page number.""" diff --git a/tox.ini b/tox.ini index a586d3d9e..b78590114 100644 --- a/tox.ini +++ b/tox.ini @@ -86,4 +86,5 @@ exclude_lines = raise NotImplementedError if __name__ == .__main__.: class .*\bProtocol\): + @(abc\.)?abstractmethod fail_under = 85 From a34445bdaea358352ef4c761a0ef3a9da91cbd2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= Date: Wed, 29 Jun 2022 01:44:24 -0500 Subject: [PATCH 9/9] Remove commented type var --- singer_sdk/streams/rest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/singer_sdk/streams/rest.py b/singer_sdk/streams/rest.py index 98e042f1b..f9b216eed 100644 --- a/singer_sdk/streams/rest.py +++ b/singer_sdk/streams/rest.py @@ -29,7 +29,6 @@ DEFAULT_REQUEST_TIMEOUT = 300 # 5 minutes _TToken = TypeVar("_TToken") -# _TPaginator = TypeVar("_TPaginator", bound=BaseAPIPaginator) class RESTStream(Stream, Generic[_TToken], metaclass=abc.ABCMeta):