From 066b50b721f4a20e7e3be78fd2a757e9203fda23 Mon Sep 17 00:00:00 2001 From: CircuitSacul Date: Tue, 27 Jun 2023 21:33:07 -0400 Subject: [PATCH] Embedder improvements (#318) * fix fxtweet embeds * add embed videos as uploadable attachments * fix the fxtweet fix * reformat and comment embed parser * preserve attachment names from URLs * linked images/videos now appear in attachments list * remove singular image urls from message content * only remove content if the whole message is a URL --- src/core/embedder/attachment_handle.rs | 8 ++- src/core/embedder/builder.rs | 40 +++++++++-- src/core/embedder/image_only_embed.rs | 6 +- src/core/embedder/parser.rs | 96 ++++++++++++++++++++------ 4 files changed, 115 insertions(+), 35 deletions(-) diff --git a/src/core/embedder/attachment_handle.rs b/src/core/embedder/attachment_handle.rs index 96067c9e..d6457a84 100644 --- a/src/core/embedder/attachment_handle.rs +++ b/src/core/embedder/attachment_handle.rs @@ -12,6 +12,8 @@ use crate::{ errors::{StarboardError, StarboardResult}, }; +use super::parser::AttachmentListItem; + pub struct AttachmentHandle { pub filename: String, pub content_type: Option, @@ -86,11 +88,11 @@ impl AttachmentHandle { }) } - pub fn url_list_item(&self) -> String { + pub fn attachment_list_item(&self) -> AttachmentListItem { if self.filename.len() > 32 { - format!("[{}...]({})", &self.filename[..29], self.url) + AttachmentListItem::new(self.filename[..29].to_string(), self.url.clone()) } else { - format!("[{}]({})", self.filename, self.url) + AttachmentListItem::new(self.filename.clone(), self.url.clone()) } } diff --git a/src/core/embedder/builder.rs b/src/core/embedder/builder.rs index a451ea3c..69f05027 100644 --- a/src/core/embedder/builder.rs +++ b/src/core/embedder/builder.rs @@ -1,5 +1,7 @@ use std::fmt::Write; +use lazy_static::lazy_static; +use regex::Regex; use twilight_model::{ channel::message::{ component::{ActionRow, Button, ButtonStyle}, @@ -28,6 +30,14 @@ use crate::{ use super::{parser::ParsedMessage, AttachmentHandle, Embedder}; +lazy_static! { + static ref URL_REGEX: Regex = Regex::new(concat!( + r"^https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]", + r"{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&/=]*)$" + )) + .unwrap(); +} + pub struct FullBuiltStarboardEmbed { pub top_content: String, pub embeds: Vec, @@ -260,8 +270,26 @@ impl BuiltStarboardEmbed { description.push('\n'); } - if !orig.content.is_empty() { - description.push_str(&orig.content); + 'out: { + if !orig.content.is_empty() { + if URL_REGEX.is_match(&orig.content) { + let url = orig.content.split('?').next().unwrap_or(&orig.content); + + let mut found = false; + for item in &parsed.urls.embedded { + if item.url == url { + found = true; + break; + } + } + + if found { + break 'out; + } + } + + description.push_str(&orig.content); + } } let mut has_description; @@ -278,10 +306,10 @@ impl BuiltStarboardEmbed { } // attachments list - let mut urls = Vec::<&str>::new(); - urls.extend(parsed.urls.uploaded.iter().map(|url| url.as_str())); + let mut urls = Vec::new(); + urls.extend(parsed.urls.uploaded.iter().map(|url| url.to_md())); if !handle.config.resolved.extra_embeds || is_reply && parsed.urls.embedded.len() > 1 { - urls.extend(parsed.urls.embedded.iter().map(|url| url.as_str())); + urls.extend(parsed.urls.embedded.iter().map(|url| url.to_md())); } if (handle.config.resolved.attachments_list || is_reply) && !urls.is_empty() { @@ -293,7 +321,7 @@ impl BuiltStarboardEmbed { break; } - field.push_str(next); + field.push_str(&next); field.push('\n'); } diff --git a/src/core/embedder/image_only_embed.rs b/src/core/embedder/image_only_embed.rs index af561c7f..ec8acb99 100644 --- a/src/core/embedder/image_only_embed.rs +++ b/src/core/embedder/image_only_embed.rs @@ -53,9 +53,9 @@ pub fn maybe_get_attachment_handle(embed: &Embed) -> Option { }; let name = { - let ext = url.split('.').last(); - match ext { - Some(ext) => format!("attachment.{ext}"), + let name = url.split('/').last(); + match name { + Some(name) => name.to_string(), None => "attachment".to_string(), } }; diff --git a/src/core/embedder/parser.rs b/src/core/embedder/parser.rs index d0a70d5e..b513282d 100644 --- a/src/core/embedder/parser.rs +++ b/src/core/embedder/parser.rs @@ -18,10 +18,25 @@ pub type PrimaryImage = ImageSource; pub type Embeds = Vec; pub type UploadAttachments = Vec; +pub struct AttachmentListItem { + pub name: String, + pub url: String, +} + +impl AttachmentListItem { + pub fn new(name: String, url: String) -> Self { + Self { name, url } + } + + pub fn to_md(&self) -> String { + format!("[{}]({})", self.name, self.url) + } +} + #[derive(Default)] pub struct AttachmentUrls { - pub embedded: Vec, - pub uploaded: Vec, + pub embedded: Vec, + pub uploaded: Vec, } pub struct ParsedMessage { @@ -66,21 +81,22 @@ impl ParsedMessage { if primary_image.is_none() { if let Some(image) = handle.embedable_image() { - urls.embedded.push(handle.url_list_item()); + urls.embedded.push(handle.attachment_list_item()); primary_image.replace(image); continue; } } else if let Some(embed) = handle.as_embed() { - urls.embedded.push(handle.url_list_item()); + urls.embedded.push(handle.attachment_list_item()); embeds.push(embed); continue; } - urls.uploaded.push(handle.url_list_item()); + urls.uploaded.push(handle.attachment_list_item()); upload_attachments.push(handle); } for embed in &orig.embeds { + // handle imgur if let Some(provider) = &embed.provider { if matches!(provider.name.as_deref(), Some("Imgur")) { let ret = modify_imgur_embed(embed.clone()); @@ -94,6 +110,7 @@ impl ParsedMessage { } } + // handle embeds that are purely attachments if let Some(attachment) = maybe_get_attachment_handle(embed) { if let Some(image) = attachment.embedable_image() { if primary_image.is_none() && embeds.is_empty() { @@ -101,32 +118,65 @@ impl ParsedMessage { } else { embeds.push(attachment.as_embed().unwrap()); } + urls.embedded.push(attachment.attachment_list_item()); } else { + urls.uploaded.push(attachment.attachment_list_item()); upload_attachments.push(attachment); } - } else { - let mut embed = embed.to_owned(); - - if &*embed.kind == "article" && embed.image.is_none() { - let thumb = std::mem::take(&mut embed.thumbnail); - if let Some(thumb) = thumb { - embed.image = Some(EmbedImage { - height: None, - width: None, - proxy_url: None, - url: thumb.url, - }); - } + + continue; + } + + // process "actual" embeds + let mut embed = embed.to_owned(); + + if &*embed.kind == "article" && embed.image.is_none() { + // article embeds use a thumbnail, but discord makes it the image instead + let thumb = std::mem::take(&mut embed.thumbnail); + if let Some(thumb) = thumb { + embed.image = Some(EmbedImage { + height: None, + width: None, + proxy_url: None, + url: thumb.url, + }); } + } - if let Some(provider) = &embed.provider { - if provider.name.as_deref() == Some("YouTube") { - modify_yt_embed(&mut embed); + // handle embeds with videos + 'out: { + let Some(video) = &embed.video else { break 'out; }; + let Some(proxy_url) = &video.proxy_url else { break 'out; }; + + let handle = AttachmentHandle { + filename: format!( + "embed_video.{}", + proxy_url.split('.').last().unwrap_or("mp4") + ), + content_type: Some("video".to_string()), + url: proxy_url.clone(), + }; + urls.uploaded.push(handle.attachment_list_item()); + upload_attachments.push(handle); + } + + // platform-specific modifications + if let Some(provider) = &embed.provider { + if let Some(mut name) = provider.name.as_deref() { + if name.starts_with("FixTweet") { + name = "FixTweet"; + } + match name { + "YouTube" => modify_yt_embed(&mut embed), + "FixTweet" => { + embed.description = None; + } + _ => (), } } - - embeds.push(embed); } + + embeds.push(embed); } let sticker_names_str: Option;