Skip to content

Commit

Permalink
feat: [#261] store original infohashes
Browse files Browse the repository at this point in the history
When you upload a torrent, the infohash might change if the `info` dictionary contains non-standard fields because we remove them. That leads to a different infohash. We store the
 original infohash in a new table so that we can know if the torrent was previously uploaded.

If we do not store the original infohash we could reject uploads producing the same canonical infohash. Still, there is no way for the user to ask if a torrent exists with a given original infohash. They only would be able to interact with the API with the canonical infohash.

Sometimes it's useful to use the original infohash, for instance, if you are importing torrents from an external source and you want to check if the original torrent (with the original infohash) was already uploaded.
  • Loading branch information
josecelano committed Sep 6, 2023
1 parent b3fe7f9 commit 9aa52a7
Show file tree
Hide file tree
Showing 13 changed files with 381 additions and 36 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
-- Step 1: Create a new table with all infohashes
CREATE TABLE torrust_torrent_info_hashes (
info_hash CHAR(40) NOT NULL,
canonical_info_hash CHAR(40) NOT NULL,
original_is_known BOOLEAN NOT NULL,
PRIMARY KEY(info_hash),
FOREIGN KEY(canonical_info_hash) REFERENCES torrust_torrents(info_hash) ON DELETE CASCADE
);

-- Step 2: Create one record for each torrent with only the canonical infohash.
-- The original infohash is NULL so we do not know if it was the same.
-- This happens if the uploaded torrent was uploaded before introducing
-- the feature to store the original infohash
INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known)
SELECT info_hash, info_hash, FALSE
FROM torrust_torrents
WHERE original_info_hash IS NULL;

-- Step 3: Create one record for each torrent with the same original and
-- canonical infohashes.
INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known)
SELECT info_hash, info_hash, TRUE
FROM torrust_torrents
WHERE original_info_hash IS NOT NULL
AND info_hash = original_info_hash;

-- Step 4: Create two records for each torrent with a different original and
-- canonical infohashes. One record with the same original and canonical
-- infohashes and one record with the original infohash and the canonical
-- one.
-- Insert the canonical infohash
INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known)
SELECT info_hash, info_hash, TRUE
FROM torrust_torrents
WHERE original_info_hash IS NOT NULL
AND info_hash != original_info_hash;
-- Insert the original infohash pointing to the canonical
INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known)
SELECT original_info_hash, info_hash, TRUE
FROM torrust_torrents
WHERE original_info_hash IS NOT NULL
AND info_hash != original_info_hash;

-- Step 5: Delete the `torrust_torrents::original_info_hash` column
ALTER TABLE torrust_torrents DROP COLUMN original_info_hash;

Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-- Step 1: Create a new table with all infohashes
CREATE TABLE IF NOT EXISTS torrust_torrent_info_hashes (
info_hash TEXT NOT NULL,
canonical_info_hash TEXT NOT NULL,
original_is_known BOOLEAN NOT NULL,
PRIMARY KEY(info_hash),
FOREIGN KEY(canonical_info_hash) REFERENCES torrust_torrents (info_hash) ON DELETE CASCADE
);

-- Step 2: Create one record for each torrent with only the canonical infohash.
-- The original infohash is NULL so we do not know if it was the same.
-- This happens if the uploaded torrent was uploaded before introducing
-- the feature to store the original infohash
INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known)
SELECT info_hash, info_hash, FALSE
FROM torrust_torrents
WHERE original_info_hash is NULL;

-- Step 3: Create one record for each torrent with the same original and
-- canonical infohashes.
INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known)
SELECT info_hash, info_hash, TRUE
FROM torrust_torrents
WHERE original_info_hash is NOT NULL
AND info_hash = original_info_hash;

-- Step 4: Create two records for each torrent with a different original and
-- canonical infohashes. One record with the same original and canonical
-- infohashes and one record with the original infohash and the canonical
-- one.
-- Insert the canonical infohash
INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known)
SELECT info_hash, info_hash, TRUE
FROM torrust_torrents
WHERE original_info_hash is NOT NULL
AND info_hash != original_info_hash;
-- Insert the original infohash pointing to the canonical
INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known)
SELECT original_info_hash, info_hash, TRUE
FROM torrust_torrents
WHERE original_info_hash is NOT NULL
AND info_hash != original_info_hash;

-- Step 5: Delete the `torrust_torrents::original_info_hash` column
-- SQLite 2021-03-12 (3.35.0) supports DROP COLUMN
-- https://www.sqlite.org/lang_altertable.html#alter_table_drop_column
ALTER TABLE torrust_torrents DROP COLUMN original_info_hash;

7 changes: 5 additions & 2 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ use crate::services::authentication::{DbUserAuthenticationRepository, JsonWebTok
use crate::services::category::{self, DbCategoryRepository};
use crate::services::tag::{self, DbTagRepository};
use crate::services::torrent::{
DbTorrentAnnounceUrlRepository, DbTorrentFileRepository, DbTorrentInfoRepository, DbTorrentListingGenerator,
DbTorrentRepository, DbTorrentTagRepository,
DbTorrentAnnounceUrlRepository, DbTorrentFileRepository, DbTorrentInfoHashRepository, DbTorrentInfoRepository,
DbTorrentListingGenerator, DbTorrentRepository, DbTorrentTagRepository,
};
use crate::services::user::{self, DbBannedUserList, DbUserProfileRepository, DbUserRepository};
use crate::services::{proxy, settings, torrent};
Expand Down Expand Up @@ -68,6 +68,7 @@ pub async fn run(configuration: Configuration, api_version: &Version) -> Running
let user_authentication_repository = Arc::new(DbUserAuthenticationRepository::new(database.clone()));
let user_profile_repository = Arc::new(DbUserProfileRepository::new(database.clone()));
let torrent_repository = Arc::new(DbTorrentRepository::new(database.clone()));
let torrent_info_hash_repository = Arc::new(DbTorrentInfoHashRepository::new(database.clone()));
let torrent_info_repository = Arc::new(DbTorrentInfoRepository::new(database.clone()));
let torrent_file_repository = Arc::new(DbTorrentFileRepository::new(database.clone()));
let torrent_announce_url_repository = Arc::new(DbTorrentAnnounceUrlRepository::new(database.clone()));
Expand All @@ -92,6 +93,7 @@ pub async fn run(configuration: Configuration, api_version: &Version) -> Running
user_repository.clone(),
category_repository.clone(),
torrent_repository.clone(),
torrent_info_hash_repository.clone(),
torrent_info_repository.clone(),
torrent_file_repository.clone(),
torrent_announce_url_repository.clone(),
Expand Down Expand Up @@ -135,6 +137,7 @@ pub async fn run(configuration: Configuration, api_version: &Version) -> Running
user_authentication_repository,
user_profile_repository,
torrent_repository,
torrent_info_hash_repository,
torrent_info_repository,
torrent_file_repository,
torrent_announce_url_repository,
Expand Down
7 changes: 5 additions & 2 deletions src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use crate::services::authentication::{DbUserAuthenticationRepository, JsonWebTok
use crate::services::category::{self, DbCategoryRepository};
use crate::services::tag::{self, DbTagRepository};
use crate::services::torrent::{
DbTorrentAnnounceUrlRepository, DbTorrentFileRepository, DbTorrentInfoRepository, DbTorrentListingGenerator,
DbTorrentRepository, DbTorrentTagRepository,
DbTorrentAnnounceUrlRepository, DbTorrentFileRepository, DbTorrentInfoHashRepository, DbTorrentInfoRepository,
DbTorrentListingGenerator, DbTorrentRepository, DbTorrentTagRepository,
};
use crate::services::user::{self, DbBannedUserList, DbUserProfileRepository, DbUserRepository};
use crate::services::{proxy, settings, torrent};
Expand All @@ -34,6 +34,7 @@ pub struct AppData {
pub user_authentication_repository: Arc<DbUserAuthenticationRepository>,
pub user_profile_repository: Arc<DbUserProfileRepository>,
pub torrent_repository: Arc<DbTorrentRepository>,
pub torrent_info_hash_repository: Arc<DbTorrentInfoHashRepository>,
pub torrent_info_repository: Arc<DbTorrentInfoRepository>,
pub torrent_file_repository: Arc<DbTorrentFileRepository>,
pub torrent_announce_url_repository: Arc<DbTorrentAnnounceUrlRepository>,
Expand Down Expand Up @@ -69,6 +70,7 @@ impl AppData {
user_authentication_repository: Arc<DbUserAuthenticationRepository>,
user_profile_repository: Arc<DbUserProfileRepository>,
torrent_repository: Arc<DbTorrentRepository>,
torrent_info_hash_repository: Arc<DbTorrentInfoHashRepository>,
torrent_info_repository: Arc<DbTorrentInfoRepository>,
torrent_file_repository: Arc<DbTorrentFileRepository>,
torrent_announce_url_repository: Arc<DbTorrentAnnounceUrlRepository>,
Expand Down Expand Up @@ -101,6 +103,7 @@ impl AppData {
user_authentication_repository,
user_profile_repository,
torrent_repository,
torrent_info_hash_repository,
torrent_info_repository,
torrent_file_repository,
torrent_announce_url_repository,
Expand Down
13 changes: 13 additions & 0 deletions src/databases/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::models::torrent_file::{DbTorrentInfo, Torrent, TorrentFile};
use crate::models::torrent_tag::{TagId, TorrentTag};
use crate::models::tracker_key::TrackerKey;
use crate::models::user::{User, UserAuthentication, UserCompact, UserId, UserProfile};
use crate::services::torrent::OriginalInfoHashes;

/// Database tables to be truncated when upgrading from v1.0.0 to v2.0.0.
/// They must be in the correct order to avoid foreign key errors.
Expand Down Expand Up @@ -87,6 +88,7 @@ pub enum Error {
TorrentNotFound,
TorrentAlreadyExists, // when uploading an already uploaded info_hash
TorrentTitleAlreadyExists,
TorrentInfoHashNotFound,
}

/// Get the Driver of the Database from the Connection String
Expand Down Expand Up @@ -229,6 +231,17 @@ pub trait Database: Sync + Send {
))
}

/// Returns the list of original infohashes ofr a canonical infohash.
///
/// When you upload a torrent the infohash migth change because the Index
/// remove the non-standard fields in the `info` dictionary. That makes the
/// infohash change. The canonical infohash is the resulting infohash.
/// This function returns the original infohashes of a canonical infohash.
/// The relationship is 1 canonical infohash -> N original infohashes.
async fn get_torrent_original_info_hashes(&self, canonical: &InfoHash) -> Result<OriginalInfoHashes, Error>;

async fn insert_torrent_info_hash(&self, original: &InfoHash, canonical: &InfoHash) -> Result<(), Error>;

/// Get torrent's info as `DbTorrentInfo` from `torrent_id`.
async fn get_torrent_info_from_id(&self, torrent_id: i64) -> Result<DbTorrentInfo, Error>;

Expand Down
64 changes: 60 additions & 4 deletions src/databases/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use crate::models::torrent_file::{DbTorrentAnnounceUrl, DbTorrentFile, DbTorrent
use crate::models::torrent_tag::{TagId, TorrentTag};
use crate::models::tracker_key::TrackerKey;
use crate::models::user::{User, UserAuthentication, UserCompact, UserId, UserProfile};
use crate::services::torrent::{DbTorrentInfoHash, OriginalInfoHashes};
use crate::utils::clock;
use crate::utils::hex::from_bytes;

Expand Down Expand Up @@ -425,7 +426,8 @@ impl Database for Mysql {
title: &str,
description: &str,
) -> Result<i64, database::Error> {
let info_hash = torrent.info_hash();
let info_hash = torrent.info_hash_hex();
let canonical_info_hash = torrent.canonical_info_hash();

// open pool connection
let mut conn = self.pool.acquire().await.map_err(|_| database::Error::Error)?;
Expand All @@ -444,7 +446,7 @@ impl Database for Mysql {
let private = torrent.info.private.unwrap_or(0);

// add torrent
let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, `source`, original_info_hash, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, UTC_TIMESTAMP())")
let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, `source`, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, UTC_TIMESTAMP())")
.bind(uploader_id)
.bind(category_id)
.bind(info_hash.to_lowercase())
Expand All @@ -455,8 +457,7 @@ impl Database for Mysql {
.bind(private)
.bind(root_hash)
.bind(torrent.info.source.clone())
.bind(original_info_hash.to_hex_string())
.execute(&self.pool)
.execute(&mut tx)
.await
.map(|v| i64::try_from(v.last_insert_id()).expect("last ID is larger than i64"))
.map_err(|e| match e {
Expand All @@ -472,6 +473,27 @@ impl Database for Mysql {
_ => database::Error::Error
})?;

// add torrent canonical infohash

let insert_info_hash_result =
query("INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) VALUES (?, ?, ?)")
.bind(original_info_hash.to_hex_string())
.bind(canonical_info_hash.to_hex_string())
.bind(true)
.execute(&mut tx)
.await
.map(|_| ())
.map_err(|err| {
log::error!("DB error: {:?}", err);
database::Error::Error
});

// rollback transaction on error
if let Err(e) = insert_info_hash_result {
drop(tx.rollback().await);
return Err(e);
}

let insert_torrent_files_result = if let Some(length) = torrent.info.length {
query("INSERT INTO torrust_torrent_files (md5sum, torrent_id, length) VALUES (?, ?, ?)")
.bind(torrent.info.md5sum.clone())
Expand Down Expand Up @@ -573,6 +595,40 @@ impl Database for Mysql {
}
}

async fn get_torrent_original_info_hashes(&self, canonical: &InfoHash) -> Result<OriginalInfoHashes, database::Error> {
let db_info_hashes = query_as::<_, DbTorrentInfoHash>(
"SELECT info_hash, canonical_info_hash, original_is_known FROM torrust_torrent_info_hashes WHERE canonical_info_hash = ?",
)
.bind(canonical.to_hex_string())
.fetch_all(&self.pool)
.await
.map_err(|err| database::Error::ErrorWithText(err.to_string()))?;

let info_hashes: Vec<InfoHash> = db_info_hashes
.into_iter()
.map(|db_info_hash| {
InfoHash::from_str(&db_info_hash.info_hash)
.unwrap_or_else(|_| panic!("Invalid info-hash in database: {}", db_info_hash.info_hash))
})
.collect();

Ok(OriginalInfoHashes {
canonical_info_hash: *canonical,
original_info_hashes: info_hashes,
})
}

async fn insert_torrent_info_hash(&self, info_hash: &InfoHash, canonical: &InfoHash) -> Result<(), database::Error> {
query("INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) VALUES (?, ?, ?)")
.bind(info_hash.to_hex_string())
.bind(canonical.to_hex_string())
.bind(true)
.execute(&self.pool)
.await
.map(|_| ())
.map_err(|err| database::Error::ErrorWithText(err.to_string()))
}

async fn get_torrent_info_from_id(&self, torrent_id: i64) -> Result<DbTorrentInfo, database::Error> {
query_as::<_, DbTorrentInfo>(
"SELECT torrent_id, info_hash, name, pieces, piece_length, private, root_hash FROM torrust_torrents WHERE torrent_id = ?",
Expand Down
Loading

0 comments on commit 9aa52a7

Please sign in to comment.