Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into lemmy-perf-fix-2
Browse files Browse the repository at this point in the history
  • Loading branch information
phiresky committed Jul 5, 2023
2 parents 920cab6 + 2158621 commit b9669f3
Show file tree
Hide file tree
Showing 12 changed files with 390 additions and 108 deletions.
295 changes: 288 additions & 7 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions crates/apub/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ once_cell = { workspace = true }
html2md = "0.2.14"
serde_with = { workspace = true }
enum_delegate = "0.2.0"
moka = { version = "0.11", features = ["future"] }

[dev-dependencies]
serial_test = { workspace = true }
Expand Down
70 changes: 45 additions & 25 deletions crates/apub/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::fetcher::post_or_comment::PostOrComment;
use activitypub_federation::config::{Data, UrlVerifier};
use async_trait::async_trait;
use futures::future::join3;
use lemmy_api_common::context::LemmyContext;
use lemmy_db_schema::{
source::{
Expand All @@ -11,9 +12,11 @@ use lemmy_db_schema::{
traits::Crud,
utils::DbPool,
};
use lemmy_utils::{error::LemmyError, settings::structs::Settings};
use lemmy_utils::error::{LemmyError, LemmyResult};
use moka::future::Cache;
use once_cell::sync::Lazy;
use serde::Serialize;
use std::{sync::Arc, time::Duration};
use url::Url;

pub mod activities;
Expand All @@ -27,6 +30,11 @@ pub mod objects;
pub mod protocol;

pub const FEDERATION_HTTP_FETCH_LIMIT: u32 = 50;
/// All incoming and outgoing federation actions read the blocklist/allowlist and slur filters
/// multiple times. This causes a huge number of database reads if we hit the db directly. So we
/// cache these values for a short time, which will already make a huge difference and ensures that
/// changes take effect quickly.
const BLOCKLIST_CACHE_DURATION: Duration = Duration::from_secs(60);

static CONTEXT: Lazy<Vec<serde_json::Value>> = Lazy::new(|| {
serde_json::from_str(include_str!("../assets/lemmy/context.json")).expect("parse context")
Expand All @@ -38,7 +46,7 @@ pub struct VerifyUrlData(pub DbPool);
#[async_trait]
impl UrlVerifier for VerifyUrlData {
async fn verify(&self, url: &Url) -> Result<(), &'static str> {
let local_site_data = fetch_local_site_data(&self.0)
let local_site_data = local_site_data_cached(&self.0)
.await
.expect("read local site data");
check_apub_id_valid(url, &local_site_data)?;
Expand All @@ -53,9 +61,6 @@ impl UrlVerifier for VerifyUrlData {
/// - the correct scheme (either http or https)
/// - URL being in the allowlist (if it is active)
/// - URL not being in the blocklist (if it is active)
///
/// `use_strict_allowlist` should be true only when parsing a remote community, or when parsing a
/// post/comment in a local community.
#[tracing::instrument(skip(local_site_data))]
fn check_apub_id_valid(apub_id: &Url, local_site_data: &LocalSiteData) -> Result<(), &'static str> {
let domain = apub_id.domain().expect("apud id has domain").to_string();
Expand Down Expand Up @@ -97,36 +102,50 @@ pub(crate) struct LocalSiteData {
blocked_instances: Vec<Instance>,
}

pub(crate) async fn fetch_local_site_data(
pool: &DbPool,
) -> Result<LocalSiteData, diesel::result::Error> {
// LocalSite may be missing
let local_site = LocalSite::read(pool).await.ok();
let allowed_instances = Instance::allowlist(pool).await?;
let blocked_instances = Instance::blocklist(pool).await?;

Ok(LocalSiteData {
local_site,
allowed_instances,
blocked_instances,
})
pub(crate) async fn local_site_data_cached(pool: &DbPool) -> LemmyResult<Arc<LocalSiteData>> {
static CACHE: Lazy<Cache<(), Arc<LocalSiteData>>> = Lazy::new(|| {
Cache::builder()
.max_capacity(1)
.time_to_live(BLOCKLIST_CACHE_DURATION)
.build()
});
Ok(
CACHE
.try_get_with((), async {
let (local_site, allowed_instances, blocked_instances) = join3(
LocalSite::read(pool),
Instance::allowlist(pool),
Instance::blocklist(pool),
)
.await;

Ok::<_, diesel::result::Error>(Arc::new(LocalSiteData {
// LocalSite may be missing
local_site: local_site.ok(),
allowed_instances: allowed_instances?,
blocked_instances: blocked_instances?,
}))
})
.await?,
)
}

#[tracing::instrument(skip(settings, local_site_data))]
pub(crate) fn check_apub_id_valid_with_strictness(
pub(crate) async fn check_apub_id_valid_with_strictness(
apub_id: &Url,
is_strict: bool,
local_site_data: &LocalSiteData,
settings: &Settings,
context: &LemmyContext,
) -> Result<(), LemmyError> {
let domain = apub_id.domain().expect("apud id has domain").to_string();
let local_instance = settings
let local_instance = context
.settings()
.get_hostname_without_port()
.expect("local hostname is valid");
if domain == local_instance {
return Ok(());
}
check_apub_id_valid(apub_id, local_site_data).map_err(LemmyError::from_message)?;

let local_site_data = local_site_data_cached(context.pool()).await?;
check_apub_id_valid(apub_id, &local_site_data).map_err(LemmyError::from_message)?;

// Only check allowlist if this is a community, and there are instances in the allowlist
if is_strict && !local_site_data.allowed_instances.is_empty() {
Expand All @@ -137,7 +156,8 @@ pub(crate) fn check_apub_id_valid_with_strictness(
.iter()
.map(|i| i.domain.clone())
.collect::<Vec<String>>();
let local_instance = settings
let local_instance = context
.settings()
.get_hostname_without_port()
.expect("local hostname is valid");
allowed_and_local.push(local_instance);
Expand Down
9 changes: 1 addition & 8 deletions crates/apub/src/objects/comment.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::{
activities::{verify_is_public, verify_person_in_community},
check_apub_id_valid_with_strictness,
fetch_local_site_data,
mentions::collect_non_local_mentions,
objects::{read_from_string_or_source, verify_is_remote_object},
protocol::{
Expand Down Expand Up @@ -132,14 +131,8 @@ impl Object for ApubComment {
verify_domains_match(note.attributed_to.inner(), note.id.inner())?;
verify_is_public(&note.to, &note.cc)?;
let community = note.community(context).await?;
let local_site_data = fetch_local_site_data(context.pool()).await?;

check_apub_id_valid_with_strictness(
note.id.inner(),
community.local,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(note.id.inner(), community.local, context).await?;
verify_is_remote_object(note.id.inner(), context.settings())?;
verify_person_in_community(&note.attributed_to, &community, context).await?;
let (post, _) = note.get_parents(context).await?;
Expand Down
24 changes: 7 additions & 17 deletions crates/apub/src/objects/community.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
check_apub_id_valid_with_strictness,
fetch_local_site_data,
check_apub_id_valid,
local_site_data_cached,
objects::instance::fetch_instance_actor_for_object,
protocol::{
objects::{group::Group, Endpoints, LanguageTag},
Expand All @@ -14,7 +14,6 @@ use activitypub_federation::{
traits::{Actor, Object},
};
use chrono::NaiveDateTime;
use itertools::Itertools;
use lemmy_api_common::{
context::LemmyContext,
utils::{generate_featured_url, generate_moderators_url, generate_outbox_url},
Expand Down Expand Up @@ -187,24 +186,15 @@ impl ApubCommunity {
) -> Result<Vec<Url>, LemmyError> {
let id = self.id;

let local_site_data = fetch_local_site_data(context.pool()).await?;
let follows = CommunityFollowerView::for_community(context.pool(), id).await?;
let local_site_data = local_site_data_cached(context.pool()).await?;
let follows = CommunityFollowerView::get_community_follower_inboxes(context.pool(), id).await?;

let inboxes: Vec<Url> = follows
.into_iter()
.filter(|f| !f.follower.local)
.map(|f| {
f.follower
.shared_inbox_url
.unwrap_or(f.follower.inbox_url)
.into()
})
.unique()
.map(Into::into)
.filter(|inbox: &Url| inbox.host_str() != Some(&context.settings().hostname))
// Don't send to blocked instances
.filter(|inbox| {
check_apub_id_valid_with_strictness(inbox, false, &local_site_data, context.settings())
.is_ok()
})
.filter(|inbox| check_apub_id_valid(inbox, &local_site_data).is_ok())
.collect();

Ok(inboxes)
Expand Down
9 changes: 4 additions & 5 deletions crates/apub/src/objects/instance.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
check_apub_id_valid_with_strictness,
fetch_local_site_data,
local_site_data_cached,
objects::read_from_string_or_source_opt,
protocol::{
objects::{instance::Instance, LanguageTag},
Expand Down Expand Up @@ -113,15 +113,14 @@ impl Object for ApubSite {
expected_domain: &Url,
data: &Data<Self::DataType>,
) -> Result<(), LemmyError> {
let local_site_data = fetch_local_site_data(data.pool()).await?;

check_apub_id_valid_with_strictness(apub.id.inner(), true, &local_site_data, data.settings())?;
check_apub_id_valid_with_strictness(apub.id.inner(), true, data).await?;
verify_domains_match(expected_domain, apub.id.inner())?;

let local_site_data = local_site_data_cached(data.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);

check_slurs(&apub.name, slur_regex)?;
check_slurs_opt(&apub.summary, slur_regex)?;

Ok(())
}

Expand Down
12 changes: 3 additions & 9 deletions crates/apub/src/objects/person.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
check_apub_id_valid_with_strictness,
fetch_local_site_data,
local_site_data_cached,
objects::{instance::fetch_instance_actor_for_object, read_from_string_or_source_opt},
protocol::{
objects::{
Expand Down Expand Up @@ -118,19 +118,13 @@ impl Object for ApubPerson {
expected_domain: &Url,
context: &Data<Self::DataType>,
) -> Result<(), LemmyError> {
let local_site_data = fetch_local_site_data(context.pool()).await?;
let local_site_data = local_site_data_cached(context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);

check_slurs(&person.preferred_username, slur_regex)?;
check_slurs_opt(&person.name, slur_regex)?;

verify_domains_match(person.id.inner(), expected_domain)?;
check_apub_id_valid_with_strictness(
person.id.inner(),
false,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(person.id.inner(), false, context).await?;

let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source);
check_slurs_opt(&bio, slur_regex)?;
Expand Down
12 changes: 3 additions & 9 deletions crates/apub/src/objects/post.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
activities::{verify_is_public, verify_person_in_community},
check_apub_id_valid_with_strictness,
fetch_local_site_data,
local_site_data_cached,
objects::{read_from_string_or_source_opt, verify_is_remote_object},
protocol::{
objects::{
Expand Down Expand Up @@ -143,17 +143,11 @@ impl Object for ApubPost {
verify_is_remote_object(page.id.inner(), context.settings())?;
};

let local_site_data = fetch_local_site_data(context.pool()).await?;

let community = page.community(context).await?;
check_apub_id_valid_with_strictness(
page.id.inner(),
community.local,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(page.id.inner(), community.local, context).await?;
verify_person_in_community(&page.creator()?, &community, context).await?;

let local_site_data = local_site_data_cached(context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);
check_slurs_opt(&page.name, slur_regex)?;

Expand Down
10 changes: 1 addition & 9 deletions crates/apub/src/objects/private_message.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::{
check_apub_id_valid_with_strictness,
fetch_local_site_data,
objects::read_from_string_or_source,
protocol::{
objects::chat_message::{ChatMessage, ChatMessageType},
Expand Down Expand Up @@ -102,14 +101,7 @@ impl Object for ApubPrivateMessage {
verify_domains_match(note.id.inner(), expected_domain)?;
verify_domains_match(note.attributed_to.inner(), note.id.inner())?;

let local_site_data = fetch_local_site_data(context.pool()).await?;

check_apub_id_valid_with_strictness(
note.id.inner(),
false,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(note.id.inner(), false, context).await?;
let person = note.attributed_to.dereference(context).await?;
if person.banned {
return Err(LemmyError::from_message("Person is banned from site"));
Expand Down
4 changes: 2 additions & 2 deletions crates/apub/src/protocol/collections/group_followers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ impl GroupFollowers {
) -> Result<GroupFollowers, LemmyError> {
let community_id = community.id;
let community_followers =
CommunityFollowerView::for_community(context.pool(), community_id).await?;
CommunityFollowerView::count_community_followers(context.pool(), community_id).await?;

Ok(GroupFollowers {
id: generate_followers_url(&community.actor_id)?.into(),
r#type: CollectionType::Collection,
total_items: community_followers.len() as i32,
total_items: community_followers as i32,
items: vec![],
})
}
Expand Down
12 changes: 3 additions & 9 deletions crates/apub/src/protocol/objects/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
community_moderators::ApubCommunityModerators,
community_outbox::ApubCommunityOutbox,
},
fetch_local_site_data,
local_site_data_cached,
objects::{community::ApubCommunity, read_from_string_or_source_opt},
protocol::{
objects::{Endpoints, LanguageTag},
Expand Down Expand Up @@ -80,16 +80,10 @@ impl Group {
expected_domain: &Url,
context: &LemmyContext,
) -> Result<(), LemmyError> {
let local_site_data = fetch_local_site_data(context.pool()).await?;

check_apub_id_valid_with_strictness(
self.id.inner(),
true,
&local_site_data,
context.settings(),
)?;
check_apub_id_valid_with_strictness(self.id.inner(), true, context).await?;
verify_domains_match(expected_domain, self.id.inner())?;

let local_site_data = local_site_data_cached(context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);

check_slurs(&self.preferred_username, slur_regex)?;
Expand Down
Loading

0 comments on commit b9669f3

Please sign in to comment.