Skip to content

Commit

Permalink
Merge pull request #22 from s2t2/timelines-2
Browse files Browse the repository at this point in the history
Pull Statuses (created after datetime)
  • Loading branch information
milesmcc authored Oct 2, 2023
2 parents f7b5952 + b773932 commit 1639259
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 36 deletions.
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ After installation, you will need to set your Truth Social username and password

`export TRUTHSOCIAL_PASSWORD=bar`

You may also set these variables in a `.env` file in the directory from which you are running Truthbrush.

## CLI Usage

```text
Expand Down Expand Up @@ -80,6 +82,40 @@ truthbrush user HANDLE

Contributions are encouraged! For small bug fixes and minor improvements, feel free to just open a PR. For larger changes, please open an issue first so that other contributors can discuss your plan, avoid duplicated work, and ensure it aligns with the goals of the project. Be sure to also follow the [code of conduct](CODE_OF_CONDUCT.md). Thanks!

Development setup (ensure you have [Poetry](https://python-poetry.org/) installed):

```sh
poetry install
poetry shell
truthbrush --help # will use your local copy of truthbrush
```


If you prefer not to install Poetry in your root environment, you can also use Conda:

```sh
conda create -n truthbrush-env python=3.9
conda activate truthbrush-env

conda install -c conda-forge poetry
poetry install
```

To run the tests:

```sh
pytest

# optionally run tests with verbose logging outputs:
pytest --log-cli-level=DEBUG -s
```

Please format your code with `black`:

```sh
black .
```

## Wishlist

Support for the following capabilities is planned:
Expand Down
152 changes: 152 additions & 0 deletions test/test_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from datetime import datetime, timezone
from dateutil import parser as date_parse

import pytest

from truthbrush.api import Api


@pytest.fixture(scope="module")
def api():
return Api()


def as_datetime(date_str):
"""Datetime formatter function. Ensures timezone is UTC. Consider moving to Api class."""
return date_parse.parse(date_str).replace(tzinfo=timezone.utc)


def test_lookup(api):
user = api.lookup(user_handle="realDonaldTrump")
assert list(user.keys()) == [
"id",
"username",
"acct",
"display_name",
"locked",
"bot",
"discoverable",
"group",
"created_at",
"note",
"url",
"avatar",
"avatar_static",
"header",
"header_static",
"followers_count",
"following_count",
"statuses_count",
"last_status_at",
"verified",
"location",
"website",
"accepting_messages",
"chats_onboarded",
"feeds_onboarded",
"show_nonmember_group_statuses",
"pleroma",
"emojis",
"fields",
]
assert isinstance(user["id"], str)


def test_pull_statuses(api):
username = "truthsocial"

# COMPLETE PULLS

# it fetches a timeline of the user's posts:
full_timeline = list(
api.pull_statuses(username=username, replies=False, verbose=True)
)
assert len(full_timeline) > 25 # more than one page

# the posts are in reverse chronological order:
latest, earliest = full_timeline[0], full_timeline[-1]
latest_at, earliest_at = as_datetime(latest["created_at"]), as_datetime(
earliest["created_at"]
)
assert earliest_at < latest_at

# EMPTY PULLS

# can use created_after param for filtering out posts:
next_pull = list(
api.pull_statuses(
username=username, replies=False, created_after=latest_at, verbose=True
)
)
assert not any(next_pull)

# can use since_id param for filtering out posts:
next_pull = list(
api.pull_statuses(
username=username, replies=False, since_id=latest["id"], verbose=True
)
)
assert not any(next_pull)

# PARTIAL PULLS

n_posts = 50 # two and a half pages worth, to verify everything is ok
recent = full_timeline[n_posts]
recent_at = as_datetime(recent["created_at"])

# can use created_after param for filtering out posts:
partial_pull = list(
api.pull_statuses(
username=username, replies=False, created_after=recent_at, verbose=True
)
)
assert len(partial_pull) == n_posts
assert recent["id"] not in [post["id"] for post in partial_pull]

# can use since_id param for filtering out posts:
partial_pull = list(
api.pull_statuses(
username=username, replies=False, since_id=recent["id"], verbose=True
)
)
assert len(partial_pull) == n_posts
assert recent["id"] not in [post["id"] for post in partial_pull]

# POST INFO
# contains status info
assert list(latest.keys()) == [
"id",
"created_at",
"in_reply_to_id",
"quote_id",
"in_reply_to_account_id",
"sensitive",
"spoiler_text",
"visibility",
"language",
"uri",
"url",
"content",
"account",
"media_attachments",
"mentions",
"tags",
"card",
"group",
"quote",
"in_reply_to",
"reblog",
"sponsored",
"replies_count",
"reblogs_count",
"favourites_count",
"favourited",
"reblogged",
"muted",
"pinned",
"bookmarked",
"poll",
"emojis",
"_pulled",
]
assert isinstance(latest["id"], str)
2 changes: 1 addition & 1 deletion truthbrush/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from truthbrush.api import Api
from truthbrush.api import Api
87 changes: 62 additions & 25 deletions truthbrush/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import json
import logging
import os
from dotenv import load_dotenv

load_dotenv() # take environment variables from .env.

logging.basicConfig(
level=(
Expand All @@ -26,13 +29,22 @@

proxies = {"http": os.getenv("http_proxy"), "https": os.getenv("https_proxy")}

TRUTHSOCIAL_USERNAME = os.getenv("TRUTHSOCIAL_USERNAME")
TRUTHSOCIAL_PASSWORD = os.getenv("TRUTHSOCIAL_PASSWORD")
TRUTHSOCIAL_TOKEN = os.getenv("TRUTHSOCIAL_TOKEN")


class LoginErrorException(Exception):
pass


class Api:
def __init__(self, username: str = None, password: str = None, token: str = None):
def __init__(
self,
username=TRUTHSOCIAL_USERNAME,
password=TRUTHSOCIAL_PASSWORD,
token=TRUTHSOCIAL_TOKEN,
):
self.ratelimit_max = 300
self.ratelimit_remaining = None
self.ratelimit_reset = None
Expand Down Expand Up @@ -242,28 +254,47 @@ def user_following(
return

def pull_statuses(
self, username: str, created_after: date, replies: bool
self,
username: str,
replies=False,
verbose=False,
created_after: datetime = None,
since_id=None,
) -> List[dict]:
"""Pull the given user's statuses. Returns an empty list if not found."""
"""Pull the given user's statuses.
Params:
created_after : timezone aware datetime object
since_id : number or string
Returns a list of posts in reverse chronological order,
or an empty list if not found.
"""

params = {}
id = self.lookup(username)["id"]
while True:
user_id = self.lookup(username)["id"]
page_counter = 0
keep_going = True
while keep_going:
try:
url = f"/v1/accounts/{id}/statuses"
url = f"/v1/accounts/{user_id}/statuses"
if not replies:
url += "?exclude_replies=true"
if verbose:
logger.debug("--------------------------")
logger.debug(f"{url} {params}")
result = self._get(url, params=params)
page_counter += 1
except json.JSONDecodeError as e:
logger.error(f"Unable to pull user #{id}'s statuses': {e}")
logger.error(f"Unable to pull user #{user_id}'s statuses': {e}")
break
except Exception as e:
logger.error(f"Misc. error while pulling statuses for {id}: {e}")
logger.error(f"Misc. error while pulling statuses for {user_id}: {e}")
break

if "error" in result:
logger.error(
f"API returned an error while pulling user #{id}'s statuses: {result}"
f"API returned an error while pulling user #{user_id}'s statuses: {result}"
)
break

Expand All @@ -273,27 +304,33 @@ def pull_statuses(
if not isinstance(result, list):
logger.error(f"Result is not a list (it's a {type(result)}): {result}")

posts = sorted(result, key=lambda k: k["id"])
params["max_id"] = posts[0]["id"]
posts = sorted(
result, key=lambda k: k["id"], reverse=True
) # reverse chronological order (recent first, older last)
params["max_id"] = posts[-1][
"id"
] # when pulling the next page, get posts before this (the oldest)

most_recent_date = (
date_parse.parse(posts[-1]["created_at"])
.replace(tzinfo=timezone.utc)
.date()
)
if created_after and most_recent_date < created_after:
# Current and all future batches are too old
break
if verbose:
logger.debug(f"PAGE: {page_counter}")

for post in posts:
post["_pulled"] = datetime.now().isoformat()
date_created = (
date_parse.parse(post["created_at"])
.replace(tzinfo=timezone.utc)
.date()

# only keep posts created after the specified date
# exclude posts created before the specified date
# since the page is listed in reverse chronology, we don't need any remaining posts on this page either
post_at = date_parse.parse(post["created_at"]).replace(
tzinfo=timezone.utc
)
if created_after and date_created < created_after:
continue
if (created_after and post_at <= created_after) or (
since_id and post["id"] <= since_id
):
keep_going = False # stop the loop, request no more pages
break # do not yeild this post or remaining (older) posts on this page

if verbose:
logger.debug(f"{post['id']} {post['created_at']}")

yield post

Expand Down
22 changes: 12 additions & 10 deletions truthbrush/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,11 @@
import os
import click
from datetime import date
from dotenv import load_dotenv
import datetime

from .api import Api

load_dotenv() # take environment variables from .env.
api = Api(
os.getenv("TRUTHSOCIAL_USERNAME"),
os.getenv("TRUTHSOCIAL_PASSWORD"),
os.getenv("TRUTHSOCIAL_TOKEN"),
)
api = Api()


@click.group()
Expand Down Expand Up @@ -117,10 +112,17 @@ def ads():
@click.option(
"--created-after",
default=None,
help="Only pull posts created on or after the specified date, e.g. 2021-10-02 (defaults to none).",
type=date.fromisoformat,
help="Only pull posts created on or after the specified datetime, e.g. 2021-10-02 or 2011-11-04T00:05:23+04:00 (defaults to none). If a timezone is not specified, UTC is assumed.",
type=datetime.datetime.fromisoformat,
)
def statuses(username: str, replies: bool = False, created_after: date = None):
"""Pull a user's statuses"""
for page in api.pull_statuses(username, created_after, replies):

# Assume UTC if no timezone is specified
if created_after and created_after.tzinfo is None:
created_after = created_after.replace(tzinfo=datetime.timezone.utc)

for page in api.pull_statuses(
username, created_after=created_after, replies=replies
):
print(json.dumps(page))

0 comments on commit 1639259

Please sign in to comment.