From ae4c8a952309059e5c83d40088ebc538e4b534c2 Mon Sep 17 00:00:00 2001 From: Mike Rossetti Date: Fri, 1 Sep 2023 17:28:06 -0400 Subject: [PATCH 01/16] Pull Statuses since datetime --- truthbrush/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/truthbrush/api.py b/truthbrush/api.py index b55dea7..1095c7a 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -279,7 +279,7 @@ def pull_statuses( most_recent_date = ( date_parse.parse(posts[-1]["created_at"]) .replace(tzinfo=timezone.utc) - .date() + # .date() # let's be more precise, using datetime instead of date, so we can still get statuses posted later in the same day as the latest previously collected status ) if created_after and most_recent_date < created_after: # Current and all future batches are too old From 64e838d5b518cd5ffc22c395d17ff2b17a076e0e Mon Sep 17 00:00:00 2001 From: Mike Rossetti Date: Fri, 1 Sep 2023 17:31:52 -0400 Subject: [PATCH 02/16] Update api.py --- truthbrush/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/truthbrush/api.py b/truthbrush/api.py index 1095c7a..ac1b91c 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -290,7 +290,7 @@ def pull_statuses( date_created = ( date_parse.parse(post["created_at"]) .replace(tzinfo=timezone.utc) - .date() + #.date() # let's be more precise, using datetime instead of date, so we can still get statuses posted later in the same day as the latest previously collected status ) if created_after and date_created < created_after: continue From 10ce1451ab9a39c728b69d91449fc8c285b98e24 Mon Sep 17 00:00:00 2001 From: Mike Rossetti Date: Fri, 1 Sep 2023 17:39:02 -0400 Subject: [PATCH 03/16] Update api.py this is still letting the latest status come through, so need to investigate --- truthbrush/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/truthbrush/api.py b/truthbrush/api.py index ac1b91c..5f19516 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -276,6 +276,7 @@ def pull_statuses( posts = sorted(result, key=lambda k: k["id"]) params["max_id"] = posts[0]["id"] + breakpoint() most_recent_date = ( date_parse.parse(posts[-1]["created_at"]) .replace(tzinfo=timezone.utc) From 53584d0341fd6e5de9bd4142d705608ed212be0c Mon Sep 17 00:00:00 2001 From: Mike Rossetti Date: Fri, 1 Sep 2023 18:19:07 -0400 Subject: [PATCH 04/16] Update api.py --- truthbrush/api.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/truthbrush/api.py b/truthbrush/api.py index 5f19516..bea2fed 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -276,23 +276,15 @@ def pull_statuses( posts = sorted(result, key=lambda k: k["id"]) params["max_id"] = posts[0]["id"] - breakpoint() - most_recent_date = ( - date_parse.parse(posts[-1]["created_at"]) - .replace(tzinfo=timezone.utc) - # .date() # let's be more precise, using datetime instead of date, so we can still get statuses posted later in the same day as the latest previously collected status - ) + #breakpoint() + most_recent_date = date_parse.parse(posts[-1]["created_at"]).replace(microsecond=0, tzinfo=timezone.utc) if created_after and most_recent_date < created_after: # Current and all future batches are too old break for post in posts: post["_pulled"] = datetime.now().isoformat() - date_created = ( - date_parse.parse(post["created_at"]) - .replace(tzinfo=timezone.utc) - #.date() # let's be more precise, using datetime instead of date, so we can still get statuses posted later in the same day as the latest previously collected status - ) + date_created = date_parse.parse(post["created_at"]).replace(microsecond=0, tzinfo=timezone.utc) if created_after and date_created < created_after: continue From 6a80f466f6d3787fa9788412d7987bc647a13186 Mon Sep 17 00:00:00 2001 From: Mike Rossetti Date: Fri, 1 Sep 2023 18:23:56 -0400 Subject: [PATCH 05/16] Update api.py Break if we are looking at the same timestamp as the latest post (when they are equal) --- truthbrush/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/truthbrush/api.py b/truthbrush/api.py index bea2fed..eee3880 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -278,7 +278,7 @@ def pull_statuses( #breakpoint() most_recent_date = date_parse.parse(posts[-1]["created_at"]).replace(microsecond=0, tzinfo=timezone.utc) - if created_after and most_recent_date < created_after: + if created_after and most_recent_date <= created_after: # Current and all future batches are too old break From 5c98e5801ce596b07d3ae6ce886a372d1e76cd54 Mon Sep 17 00:00:00 2001 From: Mike Rossetti Date: Fri, 1 Sep 2023 18:33:05 -0400 Subject: [PATCH 06/16] Update api.py Update docstring --- truthbrush/api.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/truthbrush/api.py b/truthbrush/api.py index eee3880..ada39b5 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -242,9 +242,12 @@ def user_following( return def pull_statuses( - self, username: str, created_after: date, replies: bool + self, username: str, created_after: datetime, replies: bool ) -> List[dict]: - """Pull the given user's statuses. Returns an empty list if not found.""" + """Pull the given user's statuses. Returns an empty list if not found. + + Params: created_after : currently needs to be a timezone-aware datetime object + """ params = {} id = self.lookup(username)["id"] @@ -276,7 +279,6 @@ def pull_statuses( posts = sorted(result, key=lambda k: k["id"]) params["max_id"] = posts[0]["id"] - #breakpoint() most_recent_date = date_parse.parse(posts[-1]["created_at"]).replace(microsecond=0, tzinfo=timezone.utc) if created_after and most_recent_date <= created_after: # Current and all future batches are too old From 33f197e48427a9972461c08c71088938b859c8c4 Mon Sep 17 00:00:00 2001 From: MJ Rossetti Date: Fri, 1 Sep 2023 20:34:04 -0400 Subject: [PATCH 07/16] Simplify and test timeline pulling --- README.md | 17 ++++++++++++++++ test/test_api.py | 50 +++++++++++++++++++++++++++++++++++++++++++++ truthbrush/api.py | 52 +++++++++++++++++++++++++++++++---------------- truthbrush/cli.py | 8 +------- 4 files changed, 102 insertions(+), 25 deletions(-) create mode 100644 test/test_api.py diff --git a/README.md b/README.md index c36e86d..ab47eb9 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,23 @@ truthbrush user HANDLE Contributions are encouraged! For small bug fixes and minor improvements, feel free to just open a PR. For larger changes, please open an issue first so that other contributors can discuss your plan, avoid duplicated work, and ensure it aligns with the goals of the project. Be sure to also follow the [code of conduct](CODE_OF_CONDUCT.md). Thanks! +Development setup: + +```sh +conda create -n truthbrush-env python=3.9 +conda activate truthbrush-env + +conda install -c conda-forge poetry +poetry install +``` + +Running tests: + +```sh +pytest +``` + + ## Wishlist Support for the following capabilities is planned: diff --git a/test/test_api.py b/test/test_api.py new file mode 100644 index 0000000..fc24c44 --- /dev/null +++ b/test/test_api.py @@ -0,0 +1,50 @@ + +from datetime import datetime, timezone +from dateutil import parser as date_parse + +import pytest + +from truthbrush import Api + +@pytest.fixture(scope="module") +def api(): + return Api() + + +def test_lookup(api): + user = api.lookup(user_handle="realDonaldTrump") + assert list(user.keys()) == [ + 'id', 'username', 'acct', 'display_name', + 'locked', 'bot', 'discoverable', 'group', 'created_at', + 'note', 'url', 'avatar', 'avatar_static', 'header', 'header_static', + 'followers_count', 'following_count', 'statuses_count', 'last_status_at', + 'verified', 'location', 'website', + 'accepting_messages', 'chats_onboarded', 'feeds_onboarded', + 'show_nonmember_group_statuses', 'pleroma', 'emojis', 'fields' + ] + + +def test_pull_statuses(api): + username = "truthsocial" + + # it fetches a timeline of the user's posts: + full_timeline = list(api.pull_statuses(username=username, replies=False)) + latest = full_timeline[-1] + assert any(full_timeline) + print(len(full_timeline)) + assert list(latest.keys()) == ['id', 'created_at', + 'in_reply_to_id', 'quote_id', 'in_reply_to_account_id', + 'sensitive', 'spoiler_text', 'visibility', 'language', 'uri', 'url', + 'content', 'account', 'media_attachments', 'mentions', 'tags', 'card', + 'group', 'quote', 'in_reply_to', 'reblog', 'sponsored', + 'replies_count', 'reblogs_count', 'favourites_count', 'favourited', 'reblogged', + 'muted', 'pinned', 'bookmarked', 'poll', 'emojis', '_pulled' + ] + + # can use created_after param for filtering out posts: + # (this test assumes no posts generated between the previous pull and now) + latest_at = latest["created_at"] + latest_at = date_parse.parse(latest_at).replace(tzinfo=timezone.utc) + next_pull = list(api.pull_statuses(username=username, replies=False, created_after=latest_at)) + print(len(next_pull)) + assert not any(next_pull) diff --git a/truthbrush/api.py b/truthbrush/api.py index ada39b5..879a214 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -7,6 +7,10 @@ import json import logging import os +from dotenv import load_dotenv +from pprint import pprint + +load_dotenv() # take environment variables from .env. logging.basicConfig( level=( @@ -16,6 +20,7 @@ ) ) + BASE_URL = "https://truthsocial.com" API_BASE_URL = "https://truthsocial.com/api" USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36" @@ -24,15 +29,19 @@ CLIENT_ID = "9X1Fdd-pxNsAgEDNi_SfhJWi8T-vLuV2WVzKIbkTCw4" CLIENT_SECRET = "ozF8jzI4968oTKFkEnsBC-UbLPCdrSv0MkXGQu2o_-M" + proxies = {"http": os.getenv("http_proxy"), "https": os.getenv("https_proxy")} +TRUTHSOCIAL_USERNAME = os.getenv("TRUTHSOCIAL_USERNAME") +TRUTHSOCIAL_PASSWORD = os.getenv("TRUTHSOCIAL_PASSWORD") +TRUTHSOCIAL_TOKEN = os.getenv("TRUTHSOCIAL_TOKEN") class LoginErrorException(Exception): pass class Api: - def __init__(self, username: str = None, password: str = None, token: str = None): + def __init__(self, username: str = TRUTHSOCIAL_USERNAME, password: str = TRUTHSOCIAL_PASSWORD, token: str = TRUTHSOCIAL_TOKEN): self.ratelimit_max = 300 self.ratelimit_remaining = None self.ratelimit_reset = None @@ -242,31 +251,35 @@ def user_following( return def pull_statuses( - self, username: str, created_after: datetime, replies: bool + self, username: str, replies: bool, created_after: datetime=None, since_id=None ) -> List[dict]: - """Pull the given user's statuses. Returns an empty list if not found. + """Pull the given user's statuses. + + Returns a list of statuses (in ascending order), or an empty list if not found. Params: created_after : currently needs to be a timezone-aware datetime object """ params = {} - id = self.lookup(username)["id"] + user_id = self.lookup(username)["id"] + page_counter=1 while True: try: - url = f"/v1/accounts/{id}/statuses" + url = f"/v1/accounts/{user_id}/statuses" if not replies: url += "?exclude_replies=true" result = self._get(url, params=params) + page_counter+=1 except json.JSONDecodeError as e: - logger.error(f"Unable to pull user #{id}'s statuses': {e}") + logger.error(f"Unable to pull user #{user_id}'s statuses': {e}") break except Exception as e: - logger.error(f"Misc. error while pulling statuses for {id}: {e}") + logger.error(f"Misc. error while pulling statuses for {user_id}: {e}") break if "error" in result: logger.error( - f"API returned an error while pulling user #{id}'s statuses: {result}" + f"API returned an error while pulling user #{user_id}'s statuses: {result}" ) break @@ -276,22 +289,25 @@ def pull_statuses( if not isinstance(result, list): logger.error(f"Result is not a list (it's a {type(result)}): {result}") - posts = sorted(result, key=lambda k: k["id"]) - params["max_id"] = posts[0]["id"] - - most_recent_date = date_parse.parse(posts[-1]["created_at"]).replace(microsecond=0, tzinfo=timezone.utc) - if created_after and most_recent_date <= created_after: - # Current and all future batches are too old - break + posts = sorted(result, key=lambda k: k["id"], reverse=True) # latest first, earliest last + params["max_id"] = posts[-1]["id"] # max for next pull is the earliest + #print("----------") + #print("PAGE:", page_counter, "...") for post in posts: post["_pulled"] = datetime.now().isoformat() - date_created = date_parse.parse(post["created_at"]).replace(microsecond=0, tzinfo=timezone.utc) - if created_after and date_created < created_after: - continue + + #print(post["created_at"]) + post_at = date_parse.parse(post["created_at"]).replace(tzinfo=timezone.utc) + if created_after and post_at >= created_after: + break yield post + + + + def get_auth_id(self, username: str, password: str) -> str: """Logs in to Truth account and returns the session token""" url = BASE_URL + "/oauth/token" diff --git a/truthbrush/cli.py b/truthbrush/cli.py index 0292d0c..44d2abb 100644 --- a/truthbrush/cli.py +++ b/truthbrush/cli.py @@ -4,16 +4,10 @@ import os import click from datetime import date -from dotenv import load_dotenv from .api import Api -load_dotenv() # take environment variables from .env. -api = Api( - os.getenv("TRUTHSOCIAL_USERNAME"), - os.getenv("TRUTHSOCIAL_PASSWORD"), - os.getenv("TRUTHSOCIAL_TOKEN"), -) +api = Api() @click.group() From 738709d7e940dff5458e0388a0a4793c72f5bacc Mon Sep 17 00:00:00 2001 From: MJ Rossetti Date: Fri, 1 Sep 2023 21:14:45 -0400 Subject: [PATCH 08/16] WIP --- test/test_api.py | 32 +++++++++++++++++++++++--------- truthbrush/api.py | 26 +++++++++++++++++++------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/test/test_api.py b/test/test_api.py index fc24c44..c542e60 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -28,10 +28,15 @@ def test_pull_statuses(api): username = "truthsocial" # it fetches a timeline of the user's posts: - full_timeline = list(api.pull_statuses(username=username, replies=False)) - latest = full_timeline[-1] - assert any(full_timeline) - print(len(full_timeline)) + full_timeline = list(api.pull_statuses(username=username, replies=False, verbose=True)) + assert len(full_timeline) > 20 # more than one page + + latest = full_timeline[0] + latest_at = date_parse.parse(latest["created_at"]).replace(tzinfo=timezone.utc) + earliest = full_timeline[-1] + earliest_at = date_parse.parse(earliest["created_at"]).replace(tzinfo=timezone.utc) + assert earliest_at < latest_at + assert list(latest.keys()) == ['id', 'created_at', 'in_reply_to_id', 'quote_id', 'in_reply_to_account_id', 'sensitive', 'spoiler_text', 'visibility', 'language', 'uri', 'url', @@ -42,9 +47,18 @@ def test_pull_statuses(api): ] # can use created_after param for filtering out posts: - # (this test assumes no posts generated between the previous pull and now) - latest_at = latest["created_at"] - latest_at = date_parse.parse(latest_at).replace(tzinfo=timezone.utc) - next_pull = list(api.pull_statuses(username=username, replies=False, created_after=latest_at)) - print(len(next_pull)) + # (this test assumes no posts generated between the first pull and now) + next_pull = list(api.pull_statuses(username=username, replies=False, created_after=latest_at, verbose=True)) assert not any(next_pull) + + n_lag = 50 + recent = full_timeline[n_lag+1] + recent_at = date_parse.parse(recent["created_at"]).replace(tzinfo=timezone.utc) + partial_pull = list(api.pull_statuses(username=username, replies=False, created_after=recent_at, verbose=True)) + assert len(partial_pull) == len(full_timeline) - + 1 + + # can use id_after param for filtering out posts: + # (this test assumes no posts generated between the first pull and now) + #latest_id = latest["id"] + #next_pull = list(api.pull_statuses(username=username, replies=False, id_after=latest_id, verbose=True)) + #assert not any(next_pull) diff --git a/truthbrush/api.py b/truthbrush/api.py index 879a214..e861ed3 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -251,7 +251,8 @@ def user_following( return def pull_statuses( - self, username: str, replies: bool, created_after: datetime=None, since_id=None + self, username: str, replies: bool, + created_after: datetime=None, id_after=None, verbose=False ) -> List[dict]: """Pull the given user's statuses. @@ -262,7 +263,7 @@ def pull_statuses( params = {} user_id = self.lookup(username)["id"] - page_counter=1 + page_counter = 0 while True: try: url = f"/v1/accounts/{user_id}/statuses" @@ -292,15 +293,26 @@ def pull_statuses( posts = sorted(result, key=lambda k: k["id"], reverse=True) # latest first, earliest last params["max_id"] = posts[-1]["id"] # max for next pull is the earliest - #print("----------") - #print("PAGE:", page_counter, "...") + if verbose: + print("----------") + print("PAGE:", page_counter, "...") + pprint([post["created_at"] for post in posts]) + + earliest_at = date_parse.parse(posts[-1]["created_at"]).replace(tzinfo=timezone.utc) + if created_after and earliest_at <= created_after: + # Current and all future batches are too old + break + + # loop through posts in descending order, as long as posts are later than the specified date for post in posts: post["_pulled"] = datetime.now().isoformat() - #print(post["created_at"]) post_at = date_parse.parse(post["created_at"]).replace(tzinfo=timezone.utc) - if created_after and post_at >= created_after: - break + if created_after and post_at < created_after: + continue + + #if id_after and post["id"] < id_after: + # break yield post From e9005226cd910b07587028e54c404309d224fbe4 Mon Sep 17 00:00:00 2001 From: MJ Rossetti Date: Sat, 2 Sep 2023 15:02:37 -0400 Subject: [PATCH 09/16] Return only posts after the specified date --- test/test_api.py | 34 ++++++++++++++------------- truthbrush/api.py | 59 ++++++++++++++++++++++------------------------- 2 files changed, 46 insertions(+), 47 deletions(-) diff --git a/test/test_api.py b/test/test_api.py index c542e60..0d94d18 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -4,7 +4,7 @@ import pytest -from truthbrush import Api +from truthbrush.api import Api @pytest.fixture(scope="module") def api(): @@ -29,7 +29,7 @@ def test_pull_statuses(api): # it fetches a timeline of the user's posts: full_timeline = list(api.pull_statuses(username=username, replies=False, verbose=True)) - assert len(full_timeline) > 20 # more than one page + assert len(full_timeline) > 25 # more than one page latest = full_timeline[0] latest_at = date_parse.parse(latest["created_at"]).replace(tzinfo=timezone.utc) @@ -37,28 +37,30 @@ def test_pull_statuses(api): earliest_at = date_parse.parse(earliest["created_at"]).replace(tzinfo=timezone.utc) assert earliest_at < latest_at - assert list(latest.keys()) == ['id', 'created_at', - 'in_reply_to_id', 'quote_id', 'in_reply_to_account_id', - 'sensitive', 'spoiler_text', 'visibility', 'language', 'uri', 'url', - 'content', 'account', 'media_attachments', 'mentions', 'tags', 'card', - 'group', 'quote', 'in_reply_to', 'reblog', 'sponsored', - 'replies_count', 'reblogs_count', 'favourites_count', 'favourited', 'reblogged', - 'muted', 'pinned', 'bookmarked', 'poll', 'emojis', '_pulled' - ] - # can use created_after param for filtering out posts: # (this test assumes no posts generated between the first pull and now) next_pull = list(api.pull_statuses(username=username, replies=False, created_after=latest_at, verbose=True)) assert not any(next_pull) - n_lag = 50 - recent = full_timeline[n_lag+1] - recent_at = date_parse.parse(recent["created_at"]).replace(tzinfo=timezone.utc) - partial_pull = list(api.pull_statuses(username=username, replies=False, created_after=recent_at, verbose=True)) - assert len(partial_pull) == len(full_timeline) - + 1 + #n_lag = 50 + #recent = full_timeline[n_lag+1] + #recent_at = date_parse.parse(recent["created_at"]).replace(tzinfo=timezone.utc) + #partial_pull = list(api.pull_statuses(username=username, replies=False, created_after=recent_at, verbose=True)) + #assert len(partial_pull) == len(full_timeline) - + 1 # can use id_after param for filtering out posts: # (this test assumes no posts generated between the first pull and now) #latest_id = latest["id"] #next_pull = list(api.pull_statuses(username=username, replies=False, id_after=latest_id, verbose=True)) #assert not any(next_pull) + + + # contains status info + assert list(latest.keys()) == ['id', 'created_at', + 'in_reply_to_id', 'quote_id', 'in_reply_to_account_id', + 'sensitive', 'spoiler_text', 'visibility', 'language', 'uri', 'url', + 'content', 'account', 'media_attachments', 'mentions', 'tags', 'card', + 'group', 'quote', 'in_reply_to', 'reblog', 'sponsored', + 'replies_count', 'reblogs_count', 'favourites_count', 'favourited', 'reblogged', + 'muted', 'pinned', 'bookmarked', 'poll', 'emojis', '_pulled' + ] diff --git a/truthbrush/api.py b/truthbrush/api.py index e861ed3..52e5358 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -8,7 +8,6 @@ import logging import os from dotenv import load_dotenv -from pprint import pprint load_dotenv() # take environment variables from .env. @@ -20,7 +19,6 @@ ) ) - BASE_URL = "https://truthsocial.com" API_BASE_URL = "https://truthsocial.com/api" USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36" @@ -29,7 +27,6 @@ CLIENT_ID = "9X1Fdd-pxNsAgEDNi_SfhJWi8T-vLuV2WVzKIbkTCw4" CLIENT_SECRET = "ozF8jzI4968oTKFkEnsBC-UbLPCdrSv0MkXGQu2o_-M" - proxies = {"http": os.getenv("http_proxy"), "https": os.getenv("https_proxy")} TRUTHSOCIAL_USERNAME = os.getenv("TRUTHSOCIAL_USERNAME") @@ -41,7 +38,7 @@ class LoginErrorException(Exception): class Api: - def __init__(self, username: str = TRUTHSOCIAL_USERNAME, password: str = TRUTHSOCIAL_PASSWORD, token: str = TRUTHSOCIAL_TOKEN): + def __init__(self, username=TRUTHSOCIAL_USERNAME, password=TRUTHSOCIAL_PASSWORD, token=TRUTHSOCIAL_TOKEN): self.ratelimit_max = 300 self.ratelimit_remaining = None self.ratelimit_reset = None @@ -250,27 +247,36 @@ def user_following( if maximum is not None and n_output >= maximum: return + #@classmethod / @staticmethod + #def as_datetime(self, dt): + # return date_parse.parse(dt).replace(tzinfo=timezone.utc) + def pull_statuses( - self, username: str, replies: bool, - created_after: datetime=None, id_after=None, verbose=False + self, username: str, replies=False, created_after: datetime=None, verbose=False ) -> List[dict]: """Pull the given user's statuses. - Returns a list of statuses (in ascending order), or an empty list if not found. + Params: + created_after : timezone aware datetime object - Params: created_after : currently needs to be a timezone-aware datetime object + Returns a list of posts in reverse chronological order, + or an empty list if not found. """ params = {} user_id = self.lookup(username)["id"] page_counter = 0 - while True: + keep_going = True + while keep_going: try: url = f"/v1/accounts/{user_id}/statuses" if not replies: url += "?exclude_replies=true" + if verbose: + print("--------------------------") + print(url, params) result = self._get(url, params=params) - page_counter+=1 + page_counter +=1 except json.JSONDecodeError as e: logger.error(f"Unable to pull user #{user_id}'s statuses': {e}") break @@ -279,9 +285,7 @@ def pull_statuses( break if "error" in result: - logger.error( - f"API returned an error while pulling user #{user_id}'s statuses: {result}" - ) + logger.error(f"API returned an error while pulling user #{user_id}'s statuses: {result}") break if len(result) == 0: @@ -290,36 +294,29 @@ def pull_statuses( if not isinstance(result, list): logger.error(f"Result is not a list (it's a {type(result)}): {result}") - posts = sorted(result, key=lambda k: k["id"], reverse=True) # latest first, earliest last - params["max_id"] = posts[-1]["id"] # max for next pull is the earliest + posts = sorted(result, key=lambda k: k["id"], reverse=True) # reverse chronological order (recent first, older last) + params["max_id"] = posts[-1]["id"] # when pulling the next page, get posts before this (the oldest) if verbose: - print("----------") - print("PAGE:", page_counter, "...") - pprint([post["created_at"] for post in posts]) + print("PAGE", page_counter) - earliest_at = date_parse.parse(posts[-1]["created_at"]).replace(tzinfo=timezone.utc) - if created_after and earliest_at <= created_after: - # Current and all future batches are too old - break - - # loop through posts in descending order, as long as posts are later than the specified date for post in posts: post["_pulled"] = datetime.now().isoformat() + # only keep posts created after the specified date + # exclude posts created before the specified date + # since the page is listed in reverse chronology, we don't need any remaining posts on this page either post_at = date_parse.parse(post["created_at"]).replace(tzinfo=timezone.utc) - if created_after and post_at < created_after: - continue + if created_after and post_at <= created_after: + keep_going = False # stop the loop, request no more pages + break # do not yeild this post or remaining (older) posts on this page - #if id_after and post["id"] < id_after: - # break + if verbose: + print(post["id"], post["created_at"]) yield post - - - def get_auth_id(self, username: str, password: str) -> str: """Logs in to Truth account and returns the session token""" url = BASE_URL + "/oauth/token" From 04c82cc321d44a80db33a1f89ad60646f1dbe50d Mon Sep 17 00:00:00 2001 From: MJ Rossetti Date: Sat, 2 Sep 2023 15:11:38 -0400 Subject: [PATCH 10/16] Test subsequent timeline pulls --- test/test_api.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/test/test_api.py b/test/test_api.py index 0d94d18..1a18b0a 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -6,11 +6,16 @@ from truthbrush.api import Api + @pytest.fixture(scope="module") def api(): return Api() +def as_datetime(date_str): + return date_parse.parse(date_str).replace(tzinfo=timezone.utc) + + def test_lookup(api): user = api.lookup(user_handle="realDonaldTrump") assert list(user.keys()) == [ @@ -32,21 +37,23 @@ def test_pull_statuses(api): assert len(full_timeline) > 25 # more than one page latest = full_timeline[0] - latest_at = date_parse.parse(latest["created_at"]).replace(tzinfo=timezone.utc) + latest_at = as_datetime(latest["created_at"]) earliest = full_timeline[-1] - earliest_at = date_parse.parse(earliest["created_at"]).replace(tzinfo=timezone.utc) + earliest_at = as_datetime(earliest["created_at"]) assert earliest_at < latest_at # can use created_after param for filtering out posts: # (this test assumes no posts generated between the first pull and now) + next_pull = list(api.pull_statuses(username=username, replies=False, created_after=latest_at, verbose=True)) assert not any(next_pull) - #n_lag = 50 - #recent = full_timeline[n_lag+1] - #recent_at = date_parse.parse(recent["created_at"]).replace(tzinfo=timezone.utc) - #partial_pull = list(api.pull_statuses(username=username, replies=False, created_after=recent_at, verbose=True)) - #assert len(partial_pull) == len(full_timeline) - + 1 + n_posts = 50 + recent = full_timeline[n_posts] + recent_at = as_datetime(recent["created_at"]) + partial_pull = list(api.pull_statuses(username=username, replies=False, created_after=recent_at, verbose=True)) + assert len(partial_pull) == n_posts + assert recent["id"] not in [post["id"] for post in partial_pull] # can use id_after param for filtering out posts: # (this test assumes no posts generated between the first pull and now) From fd5b212367327abbaf14f0f1668e546b52406006 Mon Sep 17 00:00:00 2001 From: MJ Rossetti Date: Sat, 2 Sep 2023 15:31:18 -0400 Subject: [PATCH 11/16] Filter posts based on id --- test/test_api.py | 36 ++++++++++++++++++++++++------------ truthbrush/api.py | 6 +++--- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/test/test_api.py b/test/test_api.py index 1a18b0a..d98fc74 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -27,41 +27,52 @@ def test_lookup(api): 'accepting_messages', 'chats_onboarded', 'feeds_onboarded', 'show_nonmember_group_statuses', 'pleroma', 'emojis', 'fields' ] + assert isinstance(user["id"], str) def test_pull_statuses(api): username = "truthsocial" + # COMPLETE PULLS + # it fetches a timeline of the user's posts: full_timeline = list(api.pull_statuses(username=username, replies=False, verbose=True)) assert len(full_timeline) > 25 # more than one page - latest = full_timeline[0] - latest_at = as_datetime(latest["created_at"]) - earliest = full_timeline[-1] - earliest_at = as_datetime(earliest["created_at"]) + # the posts are in reverse chronological order: + latest, earliest = full_timeline[0], full_timeline[-1] + latest_at, earliest_at = as_datetime(latest["created_at"]), as_datetime(earliest["created_at"]) assert earliest_at < latest_at - # can use created_after param for filtering out posts: - # (this test assumes no posts generated between the first pull and now) + # EMPTY PULLS + # can use created_after param for filtering out posts: next_pull = list(api.pull_statuses(username=username, replies=False, created_after=latest_at, verbose=True)) assert not any(next_pull) - n_posts = 50 + # can use since_id param for filtering out posts: + next_pull = list(api.pull_statuses(username=username, replies=False, since_id=latest["id"], verbose=True)) + assert not any(next_pull) + + # PARTIAL PULLS + + n_posts = 50 # two and a half pages worth, to verify everything is ok recent = full_timeline[n_posts] recent_at = as_datetime(recent["created_at"]) + + # can use created_after param for filtering out posts: partial_pull = list(api.pull_statuses(username=username, replies=False, created_after=recent_at, verbose=True)) assert len(partial_pull) == n_posts assert recent["id"] not in [post["id"] for post in partial_pull] - # can use id_after param for filtering out posts: - # (this test assumes no posts generated between the first pull and now) - #latest_id = latest["id"] - #next_pull = list(api.pull_statuses(username=username, replies=False, id_after=latest_id, verbose=True)) - #assert not any(next_pull) + # can use since_id param for filtering out posts: + partial_pull = list(api.pull_statuses(username=username, replies=False, since_id=recent["id"], verbose=True)) + assert len(partial_pull) == n_posts + assert recent["id"] not in [post["id"] for post in partial_pull] + + # POST INFO # contains status info assert list(latest.keys()) == ['id', 'created_at', 'in_reply_to_id', 'quote_id', 'in_reply_to_account_id', @@ -71,3 +82,4 @@ def test_pull_statuses(api): 'replies_count', 'reblogs_count', 'favourites_count', 'favourited', 'reblogged', 'muted', 'pinned', 'bookmarked', 'poll', 'emojis', '_pulled' ] + assert isinstance(latest["id"], str) diff --git a/truthbrush/api.py b/truthbrush/api.py index 52e5358..57a40da 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -252,12 +252,13 @@ def user_following( # return date_parse.parse(dt).replace(tzinfo=timezone.utc) def pull_statuses( - self, username: str, replies=False, created_after: datetime=None, verbose=False + self, username: str, replies=False, verbose=False, created_after: datetime=None, since_id=None, ) -> List[dict]: """Pull the given user's statuses. Params: created_after : timezone aware datetime object + since_id : number or string Returns a list of posts in reverse chronological order, or an empty list if not found. @@ -307,7 +308,7 @@ def pull_statuses( # exclude posts created before the specified date # since the page is listed in reverse chronology, we don't need any remaining posts on this page either post_at = date_parse.parse(post["created_at"]).replace(tzinfo=timezone.utc) - if created_after and post_at <= created_after: + if (created_after and post_at <= created_after) or (since_id and post["id"] <= since_id): keep_going = False # stop the loop, request no more pages break # do not yeild this post or remaining (older) posts on this page @@ -316,7 +317,6 @@ def pull_statuses( yield post - def get_auth_id(self, username: str, password: str) -> str: """Logs in to Truth account and returns the session token""" url = BASE_URL + "/oauth/token" From c8dd21b7117a2d5bb1412cc8c66fbbb95ca5f8f7 Mon Sep 17 00:00:00 2001 From: MJ Rossetti Date: Sat, 2 Sep 2023 15:39:39 -0400 Subject: [PATCH 12/16] Code review --- test/test_api.py | 1 + truthbrush/api.py | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/test/test_api.py b/test/test_api.py index d98fc74..e3eaa23 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -13,6 +13,7 @@ def api(): def as_datetime(date_str): + """Datetime formatter function. Ensures timezone is UTC. Consider moving to Api class.""" return date_parse.parse(date_str).replace(tzinfo=timezone.utc) diff --git a/truthbrush/api.py b/truthbrush/api.py index 57a40da..aaaad06 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -247,10 +247,6 @@ def user_following( if maximum is not None and n_output >= maximum: return - #@classmethod / @staticmethod - #def as_datetime(self, dt): - # return date_parse.parse(dt).replace(tzinfo=timezone.utc) - def pull_statuses( self, username: str, replies=False, verbose=False, created_after: datetime=None, since_id=None, ) -> List[dict]: From a0cc39d0e1c2813bf086d93f0c27d99c67ef2178 Mon Sep 17 00:00:00 2001 From: "R. Miles McCain" Date: Sat, 30 Sep 2023 20:00:16 -0700 Subject: [PATCH 13/16] Fix formatting --- test/test_api.py | 116 ++++++++++++++++++++++++++++++++--------- truthbrush/__init__.py | 2 +- truthbrush/api.py | 53 +++++++++++++------ 3 files changed, 129 insertions(+), 42 deletions(-) diff --git a/test/test_api.py b/test/test_api.py index e3eaa23..c05d0c1 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -1,4 +1,3 @@ - from datetime import datetime, timezone from dateutil import parser as date_parse @@ -20,13 +19,35 @@ def as_datetime(date_str): def test_lookup(api): user = api.lookup(user_handle="realDonaldTrump") assert list(user.keys()) == [ - 'id', 'username', 'acct', 'display_name', - 'locked', 'bot', 'discoverable', 'group', 'created_at', - 'note', 'url', 'avatar', 'avatar_static', 'header', 'header_static', - 'followers_count', 'following_count', 'statuses_count', 'last_status_at', - 'verified', 'location', 'website', - 'accepting_messages', 'chats_onboarded', 'feeds_onboarded', - 'show_nonmember_group_statuses', 'pleroma', 'emojis', 'fields' + "id", + "username", + "acct", + "display_name", + "locked", + "bot", + "discoverable", + "group", + "created_at", + "note", + "url", + "avatar", + "avatar_static", + "header", + "header_static", + "followers_count", + "following_count", + "statuses_count", + "last_status_at", + "verified", + "location", + "website", + "accepting_messages", + "chats_onboarded", + "feeds_onboarded", + "show_nonmember_group_statuses", + "pleroma", + "emojis", + "fields", ] assert isinstance(user["id"], str) @@ -37,50 +58,95 @@ def test_pull_statuses(api): # COMPLETE PULLS # it fetches a timeline of the user's posts: - full_timeline = list(api.pull_statuses(username=username, replies=False, verbose=True)) - assert len(full_timeline) > 25 # more than one page + full_timeline = list( + api.pull_statuses(username=username, replies=False, verbose=True) + ) + assert len(full_timeline) > 25 # more than one page # the posts are in reverse chronological order: latest, earliest = full_timeline[0], full_timeline[-1] - latest_at, earliest_at = as_datetime(latest["created_at"]), as_datetime(earliest["created_at"]) + latest_at, earliest_at = as_datetime(latest["created_at"]), as_datetime( + earliest["created_at"] + ) assert earliest_at < latest_at # EMPTY PULLS # can use created_after param for filtering out posts: - next_pull = list(api.pull_statuses(username=username, replies=False, created_after=latest_at, verbose=True)) + next_pull = list( + api.pull_statuses( + username=username, replies=False, created_after=latest_at, verbose=True + ) + ) assert not any(next_pull) # can use since_id param for filtering out posts: - next_pull = list(api.pull_statuses(username=username, replies=False, since_id=latest["id"], verbose=True)) + next_pull = list( + api.pull_statuses( + username=username, replies=False, since_id=latest["id"], verbose=True + ) + ) assert not any(next_pull) # PARTIAL PULLS - n_posts = 50 # two and a half pages worth, to verify everything is ok + n_posts = 50 # two and a half pages worth, to verify everything is ok recent = full_timeline[n_posts] recent_at = as_datetime(recent["created_at"]) # can use created_after param for filtering out posts: - partial_pull = list(api.pull_statuses(username=username, replies=False, created_after=recent_at, verbose=True)) + partial_pull = list( + api.pull_statuses( + username=username, replies=False, created_after=recent_at, verbose=True + ) + ) assert len(partial_pull) == n_posts assert recent["id"] not in [post["id"] for post in partial_pull] # can use since_id param for filtering out posts: - partial_pull = list(api.pull_statuses(username=username, replies=False, since_id=recent["id"], verbose=True)) + partial_pull = list( + api.pull_statuses( + username=username, replies=False, since_id=recent["id"], verbose=True + ) + ) assert len(partial_pull) == n_posts assert recent["id"] not in [post["id"] for post in partial_pull] - - # POST INFO # contains status info - assert list(latest.keys()) == ['id', 'created_at', - 'in_reply_to_id', 'quote_id', 'in_reply_to_account_id', - 'sensitive', 'spoiler_text', 'visibility', 'language', 'uri', 'url', - 'content', 'account', 'media_attachments', 'mentions', 'tags', 'card', - 'group', 'quote', 'in_reply_to', 'reblog', 'sponsored', - 'replies_count', 'reblogs_count', 'favourites_count', 'favourited', 'reblogged', - 'muted', 'pinned', 'bookmarked', 'poll', 'emojis', '_pulled' + assert list(latest.keys()) == [ + "id", + "created_at", + "in_reply_to_id", + "quote_id", + "in_reply_to_account_id", + "sensitive", + "spoiler_text", + "visibility", + "language", + "uri", + "url", + "content", + "account", + "media_attachments", + "mentions", + "tags", + "card", + "group", + "quote", + "in_reply_to", + "reblog", + "sponsored", + "replies_count", + "reblogs_count", + "favourites_count", + "favourited", + "reblogged", + "muted", + "pinned", + "bookmarked", + "poll", + "emojis", + "_pulled", ] assert isinstance(latest["id"], str) diff --git a/truthbrush/__init__.py b/truthbrush/__init__.py index deec674..227cbab 100644 --- a/truthbrush/__init__.py +++ b/truthbrush/__init__.py @@ -1 +1 @@ -from truthbrush.api import Api \ No newline at end of file +from truthbrush.api import Api diff --git a/truthbrush/api.py b/truthbrush/api.py index aaaad06..a8d80e2 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -9,7 +9,7 @@ import os from dotenv import load_dotenv -load_dotenv() # take environment variables from .env. +load_dotenv() # take environment variables from .env. logging.basicConfig( level=( @@ -33,12 +33,18 @@ TRUTHSOCIAL_PASSWORD = os.getenv("TRUTHSOCIAL_PASSWORD") TRUTHSOCIAL_TOKEN = os.getenv("TRUTHSOCIAL_TOKEN") + class LoginErrorException(Exception): pass class Api: - def __init__(self, username=TRUTHSOCIAL_USERNAME, password=TRUTHSOCIAL_PASSWORD, token=TRUTHSOCIAL_TOKEN): + def __init__( + self, + username=TRUTHSOCIAL_USERNAME, + password=TRUTHSOCIAL_PASSWORD, + token=TRUTHSOCIAL_TOKEN, + ): self.ratelimit_max = 300 self.ratelimit_remaining = None self.ratelimit_reset = None @@ -248,16 +254,21 @@ def user_following( return def pull_statuses( - self, username: str, replies=False, verbose=False, created_after: datetime=None, since_id=None, + self, + username: str, + replies=False, + verbose=False, + created_after: datetime = None, + since_id=None, ) -> List[dict]: """Pull the given user's statuses. - Params: - created_after : timezone aware datetime object - since_id : number or string + Params: + created_after : timezone aware datetime object + since_id : number or string - Returns a list of posts in reverse chronological order, - or an empty list if not found. + Returns a list of posts in reverse chronological order, + or an empty list if not found. """ params = {} @@ -273,7 +284,7 @@ def pull_statuses( print("--------------------------") print(url, params) result = self._get(url, params=params) - page_counter +=1 + page_counter += 1 except json.JSONDecodeError as e: logger.error(f"Unable to pull user #{user_id}'s statuses': {e}") break @@ -282,7 +293,9 @@ def pull_statuses( break if "error" in result: - logger.error(f"API returned an error while pulling user #{user_id}'s statuses: {result}") + logger.error( + f"API returned an error while pulling user #{user_id}'s statuses: {result}" + ) break if len(result) == 0: @@ -291,8 +304,12 @@ def pull_statuses( if not isinstance(result, list): logger.error(f"Result is not a list (it's a {type(result)}): {result}") - posts = sorted(result, key=lambda k: k["id"], reverse=True) # reverse chronological order (recent first, older last) - params["max_id"] = posts[-1]["id"] # when pulling the next page, get posts before this (the oldest) + posts = sorted( + result, key=lambda k: k["id"], reverse=True + ) # reverse chronological order (recent first, older last) + params["max_id"] = posts[-1][ + "id" + ] # when pulling the next page, get posts before this (the oldest) if verbose: print("PAGE", page_counter) @@ -303,10 +320,14 @@ def pull_statuses( # only keep posts created after the specified date # exclude posts created before the specified date # since the page is listed in reverse chronology, we don't need any remaining posts on this page either - post_at = date_parse.parse(post["created_at"]).replace(tzinfo=timezone.utc) - if (created_after and post_at <= created_after) or (since_id and post["id"] <= since_id): - keep_going = False # stop the loop, request no more pages - break # do not yeild this post or remaining (older) posts on this page + post_at = date_parse.parse(post["created_at"]).replace( + tzinfo=timezone.utc + ) + if (created_after and post_at <= created_after) or ( + since_id and post["id"] <= since_id + ): + keep_going = False # stop the loop, request no more pages + break # do not yeild this post or remaining (older) posts on this page if verbose: print(post["id"], post["created_at"]) From a7f623cb252e6057e4dec79fb04699916b448278 Mon Sep 17 00:00:00 2001 From: "R. Miles McCain" Date: Sat, 30 Sep 2023 20:02:11 -0700 Subject: [PATCH 14/16] Readme clarifications --- README.md | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ab47eb9..e150e73 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,8 @@ After installation, you will need to set your Truth Social username and password `export TRUTHSOCIAL_PASSWORD=bar` +You may also set these variables in a `.env` file in the directory from which you are running Truthbrush. + ## CLI Usage ```text @@ -80,7 +82,21 @@ truthbrush user HANDLE Contributions are encouraged! For small bug fixes and minor improvements, feel free to just open a PR. For larger changes, please open an issue first so that other contributors can discuss your plan, avoid duplicated work, and ensure it aligns with the goals of the project. Be sure to also follow the [code of conduct](CODE_OF_CONDUCT.md). Thanks! -Development setup: +Development setup (ensure you have [Poetry](https://python-poetry.org/) installed): + +```sh +poetry install +poetry shell +truthbrush --help # will use your local copy of truthbrush +``` + +To run the tests: + +```sh +pytest +``` + +If you prefer not to install Poetry in your root environment, you can also use Conda: ```sh conda create -n truthbrush-env python=3.9 @@ -90,13 +106,12 @@ conda install -c conda-forge poetry poetry install ``` -Running tests: - +Please format your code with `black`: + ```sh -pytest +black . ``` - ## Wishlist Support for the following capabilities is planned: From 299cc5838b8fe9830bb008f51796f673b87bc117 Mon Sep 17 00:00:00 2001 From: "R. Miles McCain" Date: Sat, 30 Sep 2023 20:12:45 -0700 Subject: [PATCH 15/16] Fix the CLI --- truthbrush/cli.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/truthbrush/cli.py b/truthbrush/cli.py index 44d2abb..dfcbed5 100644 --- a/truthbrush/cli.py +++ b/truthbrush/cli.py @@ -4,6 +4,7 @@ import os import click from datetime import date +import datetime from .api import Api @@ -111,10 +112,17 @@ def ads(): @click.option( "--created-after", default=None, - help="Only pull posts created on or after the specified date, e.g. 2021-10-02 (defaults to none).", - type=date.fromisoformat, + help="Only pull posts created on or after the specified datetime, e.g. 2021-10-02 or 2011-11-04T00:05:23+04:00 (defaults to none). If a timezone is not specified, UTC is assumed.", + type=datetime.datetime.fromisoformat, ) def statuses(username: str, replies: bool = False, created_after: date = None): """Pull a user's statuses""" - for page in api.pull_statuses(username, created_after, replies): + + # Assume UTC if no timezone is specified + if created_after and created_after.tzinfo is None: + created_after = created_after.replace(tzinfo=datetime.timezone.utc) + + for page in api.pull_statuses( + username, created_after=created_after, replies=replies + ): print(json.dumps(page)) From b77393229aafe069216eebc6dd5ff69d1fcc844d Mon Sep 17 00:00:00 2001 From: MJ Rossetti Date: Sun, 1 Oct 2023 21:21:21 -0400 Subject: [PATCH 16/16] Convert print statements to logging --- README.md | 16 ++++++++++------ truthbrush/api.py | 8 ++++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index e150e73..c4db29c 100644 --- a/README.md +++ b/README.md @@ -90,11 +90,6 @@ poetry shell truthbrush --help # will use your local copy of truthbrush ``` -To run the tests: - -```sh -pytest -``` If you prefer not to install Poetry in your root environment, you can also use Conda: @@ -106,8 +101,17 @@ conda install -c conda-forge poetry poetry install ``` +To run the tests: + +```sh +pytest + +# optionally run tests with verbose logging outputs: +pytest --log-cli-level=DEBUG -s +``` + Please format your code with `black`: - + ```sh black . ``` diff --git a/truthbrush/api.py b/truthbrush/api.py index a8d80e2..c3142a6 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -281,8 +281,8 @@ def pull_statuses( if not replies: url += "?exclude_replies=true" if verbose: - print("--------------------------") - print(url, params) + logger.debug("--------------------------") + logger.debug(f"{url} {params}") result = self._get(url, params=params) page_counter += 1 except json.JSONDecodeError as e: @@ -312,7 +312,7 @@ def pull_statuses( ] # when pulling the next page, get posts before this (the oldest) if verbose: - print("PAGE", page_counter) + logger.debug(f"PAGE: {page_counter}") for post in posts: post["_pulled"] = datetime.now().isoformat() @@ -330,7 +330,7 @@ def pull_statuses( break # do not yeild this post or remaining (older) posts on this page if verbose: - print(post["id"], post["created_at"]) + logger.debug(f"{post['id']} {post['created_at']}") yield post