diff --git a/README.md b/README.md index c36e86d..c4db29c 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,8 @@ After installation, you will need to set your Truth Social username and password `export TRUTHSOCIAL_PASSWORD=bar` +You may also set these variables in a `.env` file in the directory from which you are running Truthbrush. + ## CLI Usage ```text @@ -80,6 +82,40 @@ truthbrush user HANDLE Contributions are encouraged! For small bug fixes and minor improvements, feel free to just open a PR. For larger changes, please open an issue first so that other contributors can discuss your plan, avoid duplicated work, and ensure it aligns with the goals of the project. Be sure to also follow the [code of conduct](CODE_OF_CONDUCT.md). Thanks! +Development setup (ensure you have [Poetry](https://python-poetry.org/) installed): + +```sh +poetry install +poetry shell +truthbrush --help # will use your local copy of truthbrush +``` + + +If you prefer not to install Poetry in your root environment, you can also use Conda: + +```sh +conda create -n truthbrush-env python=3.9 +conda activate truthbrush-env + +conda install -c conda-forge poetry +poetry install +``` + +To run the tests: + +```sh +pytest + +# optionally run tests with verbose logging outputs: +pytest --log-cli-level=DEBUG -s +``` + +Please format your code with `black`: + +```sh +black . +``` + ## Wishlist Support for the following capabilities is planned: diff --git a/test/test_api.py b/test/test_api.py new file mode 100644 index 0000000..c05d0c1 --- /dev/null +++ b/test/test_api.py @@ -0,0 +1,152 @@ +from datetime import datetime, timezone +from dateutil import parser as date_parse + +import pytest + +from truthbrush.api import Api + + +@pytest.fixture(scope="module") +def api(): + return Api() + + +def as_datetime(date_str): + """Datetime formatter function. Ensures timezone is UTC. Consider moving to Api class.""" + return date_parse.parse(date_str).replace(tzinfo=timezone.utc) + + +def test_lookup(api): + user = api.lookup(user_handle="realDonaldTrump") + assert list(user.keys()) == [ + "id", + "username", + "acct", + "display_name", + "locked", + "bot", + "discoverable", + "group", + "created_at", + "note", + "url", + "avatar", + "avatar_static", + "header", + "header_static", + "followers_count", + "following_count", + "statuses_count", + "last_status_at", + "verified", + "location", + "website", + "accepting_messages", + "chats_onboarded", + "feeds_onboarded", + "show_nonmember_group_statuses", + "pleroma", + "emojis", + "fields", + ] + assert isinstance(user["id"], str) + + +def test_pull_statuses(api): + username = "truthsocial" + + # COMPLETE PULLS + + # it fetches a timeline of the user's posts: + full_timeline = list( + api.pull_statuses(username=username, replies=False, verbose=True) + ) + assert len(full_timeline) > 25 # more than one page + + # the posts are in reverse chronological order: + latest, earliest = full_timeline[0], full_timeline[-1] + latest_at, earliest_at = as_datetime(latest["created_at"]), as_datetime( + earliest["created_at"] + ) + assert earliest_at < latest_at + + # EMPTY PULLS + + # can use created_after param for filtering out posts: + next_pull = list( + api.pull_statuses( + username=username, replies=False, created_after=latest_at, verbose=True + ) + ) + assert not any(next_pull) + + # can use since_id param for filtering out posts: + next_pull = list( + api.pull_statuses( + username=username, replies=False, since_id=latest["id"], verbose=True + ) + ) + assert not any(next_pull) + + # PARTIAL PULLS + + n_posts = 50 # two and a half pages worth, to verify everything is ok + recent = full_timeline[n_posts] + recent_at = as_datetime(recent["created_at"]) + + # can use created_after param for filtering out posts: + partial_pull = list( + api.pull_statuses( + username=username, replies=False, created_after=recent_at, verbose=True + ) + ) + assert len(partial_pull) == n_posts + assert recent["id"] not in [post["id"] for post in partial_pull] + + # can use since_id param for filtering out posts: + partial_pull = list( + api.pull_statuses( + username=username, replies=False, since_id=recent["id"], verbose=True + ) + ) + assert len(partial_pull) == n_posts + assert recent["id"] not in [post["id"] for post in partial_pull] + + # POST INFO + # contains status info + assert list(latest.keys()) == [ + "id", + "created_at", + "in_reply_to_id", + "quote_id", + "in_reply_to_account_id", + "sensitive", + "spoiler_text", + "visibility", + "language", + "uri", + "url", + "content", + "account", + "media_attachments", + "mentions", + "tags", + "card", + "group", + "quote", + "in_reply_to", + "reblog", + "sponsored", + "replies_count", + "reblogs_count", + "favourites_count", + "favourited", + "reblogged", + "muted", + "pinned", + "bookmarked", + "poll", + "emojis", + "_pulled", + ] + assert isinstance(latest["id"], str) diff --git a/truthbrush/__init__.py b/truthbrush/__init__.py index deec674..227cbab 100644 --- a/truthbrush/__init__.py +++ b/truthbrush/__init__.py @@ -1 +1 @@ -from truthbrush.api import Api \ No newline at end of file +from truthbrush.api import Api diff --git a/truthbrush/api.py b/truthbrush/api.py index b55dea7..c3142a6 100644 --- a/truthbrush/api.py +++ b/truthbrush/api.py @@ -7,6 +7,9 @@ import json import logging import os +from dotenv import load_dotenv + +load_dotenv() # take environment variables from .env. logging.basicConfig( level=( @@ -26,13 +29,22 @@ proxies = {"http": os.getenv("http_proxy"), "https": os.getenv("https_proxy")} +TRUTHSOCIAL_USERNAME = os.getenv("TRUTHSOCIAL_USERNAME") +TRUTHSOCIAL_PASSWORD = os.getenv("TRUTHSOCIAL_PASSWORD") +TRUTHSOCIAL_TOKEN = os.getenv("TRUTHSOCIAL_TOKEN") + class LoginErrorException(Exception): pass class Api: - def __init__(self, username: str = None, password: str = None, token: str = None): + def __init__( + self, + username=TRUTHSOCIAL_USERNAME, + password=TRUTHSOCIAL_PASSWORD, + token=TRUTHSOCIAL_TOKEN, + ): self.ratelimit_max = 300 self.ratelimit_remaining = None self.ratelimit_reset = None @@ -242,28 +254,47 @@ def user_following( return def pull_statuses( - self, username: str, created_after: date, replies: bool + self, + username: str, + replies=False, + verbose=False, + created_after: datetime = None, + since_id=None, ) -> List[dict]: - """Pull the given user's statuses. Returns an empty list if not found.""" + """Pull the given user's statuses. + + Params: + created_after : timezone aware datetime object + since_id : number or string + + Returns a list of posts in reverse chronological order, + or an empty list if not found. + """ params = {} - id = self.lookup(username)["id"] - while True: + user_id = self.lookup(username)["id"] + page_counter = 0 + keep_going = True + while keep_going: try: - url = f"/v1/accounts/{id}/statuses" + url = f"/v1/accounts/{user_id}/statuses" if not replies: url += "?exclude_replies=true" + if verbose: + logger.debug("--------------------------") + logger.debug(f"{url} {params}") result = self._get(url, params=params) + page_counter += 1 except json.JSONDecodeError as e: - logger.error(f"Unable to pull user #{id}'s statuses': {e}") + logger.error(f"Unable to pull user #{user_id}'s statuses': {e}") break except Exception as e: - logger.error(f"Misc. error while pulling statuses for {id}: {e}") + logger.error(f"Misc. error while pulling statuses for {user_id}: {e}") break if "error" in result: logger.error( - f"API returned an error while pulling user #{id}'s statuses: {result}" + f"API returned an error while pulling user #{user_id}'s statuses: {result}" ) break @@ -273,27 +304,33 @@ def pull_statuses( if not isinstance(result, list): logger.error(f"Result is not a list (it's a {type(result)}): {result}") - posts = sorted(result, key=lambda k: k["id"]) - params["max_id"] = posts[0]["id"] + posts = sorted( + result, key=lambda k: k["id"], reverse=True + ) # reverse chronological order (recent first, older last) + params["max_id"] = posts[-1][ + "id" + ] # when pulling the next page, get posts before this (the oldest) - most_recent_date = ( - date_parse.parse(posts[-1]["created_at"]) - .replace(tzinfo=timezone.utc) - .date() - ) - if created_after and most_recent_date < created_after: - # Current and all future batches are too old - break + if verbose: + logger.debug(f"PAGE: {page_counter}") for post in posts: post["_pulled"] = datetime.now().isoformat() - date_created = ( - date_parse.parse(post["created_at"]) - .replace(tzinfo=timezone.utc) - .date() + + # only keep posts created after the specified date + # exclude posts created before the specified date + # since the page is listed in reverse chronology, we don't need any remaining posts on this page either + post_at = date_parse.parse(post["created_at"]).replace( + tzinfo=timezone.utc ) - if created_after and date_created < created_after: - continue + if (created_after and post_at <= created_after) or ( + since_id and post["id"] <= since_id + ): + keep_going = False # stop the loop, request no more pages + break # do not yeild this post or remaining (older) posts on this page + + if verbose: + logger.debug(f"{post['id']} {post['created_at']}") yield post diff --git a/truthbrush/cli.py b/truthbrush/cli.py index 0292d0c..dfcbed5 100644 --- a/truthbrush/cli.py +++ b/truthbrush/cli.py @@ -4,16 +4,11 @@ import os import click from datetime import date -from dotenv import load_dotenv +import datetime from .api import Api -load_dotenv() # take environment variables from .env. -api = Api( - os.getenv("TRUTHSOCIAL_USERNAME"), - os.getenv("TRUTHSOCIAL_PASSWORD"), - os.getenv("TRUTHSOCIAL_TOKEN"), -) +api = Api() @click.group() @@ -117,10 +112,17 @@ def ads(): @click.option( "--created-after", default=None, - help="Only pull posts created on or after the specified date, e.g. 2021-10-02 (defaults to none).", - type=date.fromisoformat, + help="Only pull posts created on or after the specified datetime, e.g. 2021-10-02 or 2011-11-04T00:05:23+04:00 (defaults to none). If a timezone is not specified, UTC is assumed.", + type=datetime.datetime.fromisoformat, ) def statuses(username: str, replies: bool = False, created_after: date = None): """Pull a user's statuses""" - for page in api.pull_statuses(username, created_after, replies): + + # Assume UTC if no timezone is specified + if created_after and created_after.tzinfo is None: + created_after = created_after.replace(tzinfo=datetime.timezone.utc) + + for page in api.pull_statuses( + username, created_after=created_after, replies=replies + ): print(json.dumps(page))