Skip to content

Commit a1e8ae9

Browse files
committed
feat: [torrust#469] import torrent statistics in batches
Instead of importing all torrents statistics from the tracker every hour, this change imports 50 torrents every 100 milliseconds. Batches only include torrents that have not been updated in the last hour (or whatever is the `torrent_info_update_interval` value in the configuration). This change avoid loading the whole set of torrents in memory every time the importation starts. In the future, It could also allow to handle the nunber of request per second to the tracker (statically, from config value or dinamically, depending on the tracler load). Althought it saves memory, it runs more SQL queries to get the list of torrents pending to update.
1 parent ff9f5ff commit a1e8ae9

File tree

6 files changed

+156
-21
lines changed

6 files changed

+156
-21
lines changed

src/console/cronjobs/tracker_statistics_importer.rs

+36-12
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@ use axum::extract::State;
1717
use axum::routing::{get, post};
1818
use axum::{Json, Router};
1919
use chrono::{DateTime, Utc};
20-
use log::{error, info};
20+
use log::{debug, error, info};
2121
use serde_json::{json, Value};
22+
use text_colorizer::Colorize;
2223
use tokio::net::TcpListener;
2324
use tokio::task::JoinHandle;
2425

2526
use crate::tracker::statistics_importer::StatisticsImporter;
27+
use crate::utils::clock::seconds_ago_utc;
2628

2729
const IMPORTER_API_IP: &str = "127.0.0.1";
2830

@@ -41,7 +43,7 @@ struct ImporterState {
4143
#[must_use]
4244
pub fn start(
4345
importer_port: u16,
44-
torrent_info_update_interval: u64,
46+
torrent_stats_update_interval: u64,
4547
tracker_statistics_importer: &Arc<StatisticsImporter>,
4648
) -> JoinHandle<()> {
4749
let weak_tracker_statistics_importer = Arc::downgrade(tracker_statistics_importer);
@@ -54,7 +56,7 @@ pub fn start(
5456
let _importer_api_handle = tokio::spawn(async move {
5557
let import_state = Arc::new(ImporterState {
5658
last_heartbeat: Arc::new(Mutex::new(Utc::now())),
57-
torrent_info_update_interval,
59+
torrent_info_update_interval: torrent_stats_update_interval,
5860
});
5961

6062
let app = Router::new()
@@ -81,25 +83,47 @@ pub fn start(
8183

8284
info!("Tracker statistics importer cronjob starting ...");
8385

84-
let interval = std::time::Duration::from_secs(torrent_info_update_interval);
85-
let mut interval = tokio::time::interval(interval);
86+
let execution_interval_in_milliseconds = 100;
87+
let execution_interval_duration = std::time::Duration::from_millis(execution_interval_in_milliseconds);
88+
let mut execution_interval = tokio::time::interval(execution_interval_duration);
8689

87-
interval.tick().await; // first tick is immediate...
90+
execution_interval.tick().await; // first tick is immediate...
8891

89-
loop {
90-
interval.tick().await;
91-
92-
info!("Running tracker statistics importer ...");
92+
info!("Running tracker statistics importer every {execution_interval_in_milliseconds} milliseconds ...");
9393

94+
loop {
9495
if let Err(e) = send_heartbeat(importer_port).await {
9596
error!("Failed to send heartbeat from importer cronjob: {}", e);
9697
}
9798

98-
if let Some(tracker) = weak_tracker_statistics_importer.upgrade() {
99-
drop(tracker.import_all_torrents_statistics().await);
99+
if let Some(statistics_importer) = weak_tracker_statistics_importer.upgrade() {
100+
let one_interval_ago = seconds_ago_utc(
101+
torrent_stats_update_interval
102+
.try_into()
103+
.expect("update interval should be a positive integer"),
104+
);
105+
let limit = 50;
106+
107+
debug!(
108+
"Importing torrents statistics not updated since {} limited to a maximum of {} torrents ...",
109+
one_interval_ago.to_string().yellow(),
110+
limit.to_string().yellow()
111+
);
112+
113+
match statistics_importer
114+
.import_torrents_statistics_not_updated_since(one_interval_ago, limit)
115+
.await
116+
{
117+
Ok(()) => {}
118+
Err(e) => error!("Failed to import statistics: {:?}", e),
119+
}
120+
121+
drop(statistics_importer);
100122
} else {
101123
break;
102124
}
125+
126+
execution_interval.tick().await;
103127
}
104128
})
105129
}

src/databases/database.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use async_trait::async_trait;
2-
use chrono::NaiveDateTime;
2+
use chrono::{DateTime, NaiveDateTime, Utc};
33
use serde::{Deserialize, Serialize};
44

55
use crate::databases::mysql::Mysql;
@@ -292,6 +292,13 @@ pub trait Database: Sync + Send {
292292
/// Get all torrents as `Vec<TorrentCompact>`.
293293
async fn get_all_torrents_compact(&self) -> Result<Vec<TorrentCompact>, Error>;
294294

295+
/// Get torrents whose stats have not been imported from the tracker at least since a given datetime.
296+
async fn get_torrents_with_stats_not_updated_since(
297+
&self,
298+
datetime: DateTime<Utc>,
299+
limit: i64,
300+
) -> Result<Vec<TorrentCompact>, Error>;
301+
295302
/// Update a torrent's title with `torrent_id` and `title`.
296303
async fn update_torrent_title(&self, torrent_id: i64, title: &str) -> Result<(), Error>;
297304

src/databases/mysql.rs

+23-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::str::FromStr;
22
use std::time::Duration;
33

44
use async_trait::async_trait;
5-
use chrono::NaiveDateTime;
5+
use chrono::{DateTime, NaiveDateTime, Utc};
66
use sqlx::mysql::{MySqlConnectOptions, MySqlPoolOptions};
77
use sqlx::{query, query_as, Acquire, ConnectOptions, MySqlPool};
88

@@ -20,7 +20,7 @@ use crate::models::torrent_tag::{TagId, TorrentTag};
2020
use crate::models::tracker_key::TrackerKey;
2121
use crate::models::user::{User, UserAuthentication, UserCompact, UserId, UserProfile};
2222
use crate::services::torrent::{CanonicalInfoHashGroup, DbTorrentInfoHash};
23-
use crate::utils::clock::{self, datetime_now};
23+
use crate::utils::clock::{self, datetime_now, DATETIME_FORMAT};
2424
use crate::utils::hex::from_bytes;
2525

2626
pub struct Mysql {
@@ -884,6 +884,27 @@ impl Database for Mysql {
884884
.map_err(|_| database::Error::Error)
885885
}
886886

887+
async fn get_torrents_with_stats_not_updated_since(
888+
&self,
889+
datetime: DateTime<Utc>,
890+
limit: i64,
891+
) -> Result<Vec<TorrentCompact>, database::Error> {
892+
query_as::<_, TorrentCompact>(
893+
"SELECT tt.torrent_id, tt.info_hash
894+
FROM torrust_torrents tt
895+
LEFT JOIN torrust_torrent_tracker_stats tts ON tt.torrent_id = tts.torrent_id
896+
WHERE tts.updated_at < ? OR tts.updated_at IS NULL
897+
ORDER BY tts.updated_at ASC
898+
LIMIT ?
899+
",
900+
)
901+
.bind(datetime.format(DATETIME_FORMAT).to_string())
902+
.bind(limit)
903+
.fetch_all(&self.pool)
904+
.await
905+
.map_err(|_| database::Error::Error)
906+
}
907+
887908
async fn update_torrent_title(&self, torrent_id: i64, title: &str) -> Result<(), database::Error> {
888909
query("UPDATE torrust_torrent_info SET title = ? WHERE torrent_id = ?")
889910
.bind(title)

src/databases/sqlite.rs

+23-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::str::FromStr;
22
use std::time::Duration;
33

44
use async_trait::async_trait;
5-
use chrono::NaiveDateTime;
5+
use chrono::{DateTime, NaiveDateTime, Utc};
66
use sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions};
77
use sqlx::{query, query_as, Acquire, ConnectOptions, SqlitePool};
88

@@ -20,7 +20,7 @@ use crate::models::torrent_tag::{TagId, TorrentTag};
2020
use crate::models::tracker_key::TrackerKey;
2121
use crate::models::user::{User, UserAuthentication, UserCompact, UserId, UserProfile};
2222
use crate::services::torrent::{CanonicalInfoHashGroup, DbTorrentInfoHash};
23-
use crate::utils::clock::{self, datetime_now};
23+
use crate::utils::clock::{self, datetime_now, DATETIME_FORMAT};
2424
use crate::utils::hex::from_bytes;
2525

2626
pub struct Sqlite {
@@ -876,6 +876,27 @@ impl Database for Sqlite {
876876
.map_err(|_| database::Error::Error)
877877
}
878878

879+
async fn get_torrents_with_stats_not_updated_since(
880+
&self,
881+
datetime: DateTime<Utc>,
882+
limit: i64,
883+
) -> Result<Vec<TorrentCompact>, database::Error> {
884+
query_as::<_, TorrentCompact>(
885+
"SELECT tt.torrent_id, tt.info_hash
886+
FROM torrust_torrents tt
887+
LEFT JOIN torrust_torrent_tracker_stats tts ON tt.torrent_id = tts.torrent_id
888+
WHERE tts.updated_at < ? OR tts.updated_at IS NULL
889+
ORDER BY tts.updated_at ASC
890+
LIMIT ?
891+
",
892+
)
893+
.bind(datetime.format(DATETIME_FORMAT).to_string())
894+
.bind(limit)
895+
.fetch_all(&self.pool)
896+
.await
897+
.map_err(|_| database::Error::Error)
898+
}
899+
879900
async fn update_torrent_title(&self, torrent_id: i64, title: &str) -> Result<(), database::Error> {
880901
query("UPDATE torrust_torrent_info SET title = $1 WHERE torrent_id = $2")
881902
.bind(title)

src/tracker/statistics_importer.rs

+56-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
use std::sync::Arc;
22
use std::time::Instant;
33

4-
use log::{error, info};
4+
use chrono::{DateTime, Utc};
5+
use log::{debug, error, info};
56
use text_colorizer::Colorize;
67

78
use super::service::{Service, TorrentInfo, TrackerAPIError};
@@ -36,13 +37,17 @@ impl StatisticsImporter {
3637
pub async fn import_all_torrents_statistics(&self) -> Result<(), database::Error> {
3738
let torrents = self.database.get_all_torrents_compact().await?;
3839

40+
if torrents.is_empty() {
41+
return Ok(());
42+
}
43+
3944
info!(target: LOG_TARGET, "Importing {} torrents statistics from tracker {} ...", torrents.len().to_string().yellow(), self.tracker_url.yellow());
4045

4146
// Start the timer before the loop
4247
let start_time = Instant::now();
4348

4449
for torrent in torrents {
45-
info!(target: LOG_TARGET, "Importing torrent #{} ...", torrent.torrent_id.to_string().yellow());
50+
info!(target: LOG_TARGET, "Importing torrent #{} statistics ...", torrent.torrent_id.to_string().yellow());
4651

4752
let ret = self.import_torrent_statistics(torrent.torrent_id, &torrent.info_hash).await;
4853

@@ -64,6 +69,55 @@ impl StatisticsImporter {
6469
Ok(())
6570
}
6671

72+
/// Import torrents statistics not updated recently..
73+
///
74+
/// # Errors
75+
///
76+
/// Will return an error if the database query failed.
77+
pub async fn import_torrents_statistics_not_updated_since(
78+
&self,
79+
datetime: DateTime<Utc>,
80+
limit: i64,
81+
) -> Result<(), database::Error> {
82+
debug!(target: LOG_TARGET, "Importing torrents statistics not updated since {} limited to a maximum of {} torrents ...", datetime.to_string().yellow(), limit.to_string().yellow());
83+
84+
let torrents = self
85+
.database
86+
.get_torrents_with_stats_not_updated_since(datetime, limit)
87+
.await?;
88+
89+
if torrents.is_empty() {
90+
return Ok(());
91+
}
92+
93+
info!(target: LOG_TARGET, "Importing {} torrents statistics from tracker {} ...", torrents.len().to_string().yellow(), self.tracker_url.yellow());
94+
95+
// Start the timer before the loop
96+
let start_time = Instant::now();
97+
98+
for torrent in torrents {
99+
info!(target: LOG_TARGET, "Importing torrent #{} statistics ...", torrent.torrent_id.to_string().yellow());
100+
101+
let ret = self.import_torrent_statistics(torrent.torrent_id, &torrent.info_hash).await;
102+
103+
if let Some(err) = ret.err() {
104+
if err != TrackerAPIError::TorrentNotFound {
105+
let message = format!(
106+
"Error updating torrent tracker stats for torrent. Torrent: id {}; infohash {}. Error: {:?}",
107+
torrent.torrent_id, torrent.info_hash, err
108+
);
109+
error!(target: LOG_TARGET, "{}", message);
110+
}
111+
}
112+
}
113+
114+
let elapsed_time = start_time.elapsed();
115+
116+
info!(target: LOG_TARGET, "Statistics import completed in {:.2?}", elapsed_time);
117+
118+
Ok(())
119+
}
120+
67121
/// Import torrent statistics from tracker and update them in database.
68122
///
69123
/// # Errors

src/utils/clock.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use chrono::Utc;
1+
use chrono::{DateTime, Duration, Utc};
2+
3+
pub const DATETIME_FORMAT: &str = "%Y-%m-%d %H:%M:%S";
24

35
/// Returns the current timestamp in seconds.
46
///
@@ -11,10 +13,16 @@ pub fn now() -> u64 {
1113
u64::try_from(chrono::prelude::Utc::now().timestamp()).expect("timestamp should be positive")
1214
}
1315

16+
/// Returns the datetime some seconds ago.
17+
#[must_use]
18+
pub fn seconds_ago_utc(seconds: i64) -> DateTime<chrono::Utc> {
19+
Utc::now() - Duration::seconds(seconds)
20+
}
21+
1422
/// Returns the current time in database format.
1523
///
1624
/// For example: `2024-03-12 15:56:24`.
1725
#[must_use]
1826
pub fn datetime_now() -> String {
19-
Utc::now().format("%Y-%m-%d %H:%M:%S").to_string()
27+
Utc::now().format(DATETIME_FORMAT).to_string()
2028
}

0 commit comments

Comments
 (0)