Skip to content

Commit

Permalink
Embedder improvements (#318)
Browse files Browse the repository at this point in the history
* fix fxtweet embeds

* add embed videos as uploadable attachments

* fix the fxtweet fix

* reformat and comment embed parser

* preserve attachment names from URLs

* linked images/videos now appear in attachments list

* remove singular image urls from message content

* only remove content if the whole message is a URL
  • Loading branch information
CircuitSacul committed Jun 28, 2023
1 parent f92e7c1 commit 066b50b
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 35 deletions.
8 changes: 5 additions & 3 deletions src/core/embedder/attachment_handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use crate::{
errors::{StarboardError, StarboardResult},
};

use super::parser::AttachmentListItem;

pub struct AttachmentHandle {
pub filename: String,
pub content_type: Option<String>,
Expand Down Expand Up @@ -86,11 +88,11 @@ impl AttachmentHandle {
})
}

pub fn url_list_item(&self) -> String {
pub fn attachment_list_item(&self) -> AttachmentListItem {
if self.filename.len() > 32 {
format!("[{}...]({})", &self.filename[..29], self.url)
AttachmentListItem::new(self.filename[..29].to_string(), self.url.clone())
} else {
format!("[{}]({})", self.filename, self.url)
AttachmentListItem::new(self.filename.clone(), self.url.clone())
}
}

Expand Down
40 changes: 34 additions & 6 deletions src/core/embedder/builder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::fmt::Write;

use lazy_static::lazy_static;
use regex::Regex;
use twilight_model::{
channel::message::{
component::{ActionRow, Button, ButtonStyle},
Expand Down Expand Up @@ -28,6 +30,14 @@ use crate::{

use super::{parser::ParsedMessage, AttachmentHandle, Embedder};

lazy_static! {
static ref URL_REGEX: Regex = Regex::new(concat!(
r"^https?://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]",
r"{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&/=]*)$"
))
.unwrap();
}

pub struct FullBuiltStarboardEmbed {
pub top_content: String,
pub embeds: Vec<Embed>,
Expand Down Expand Up @@ -260,8 +270,26 @@ impl BuiltStarboardEmbed {
description.push('\n');
}

if !orig.content.is_empty() {
description.push_str(&orig.content);
'out: {
if !orig.content.is_empty() {
if URL_REGEX.is_match(&orig.content) {
let url = orig.content.split('?').next().unwrap_or(&orig.content);

let mut found = false;
for item in &parsed.urls.embedded {
if item.url == url {
found = true;
break;
}
}

if found {
break 'out;
}
}

description.push_str(&orig.content);
}
}

let mut has_description;
Expand All @@ -278,10 +306,10 @@ impl BuiltStarboardEmbed {
}

// attachments list
let mut urls = Vec::<&str>::new();
urls.extend(parsed.urls.uploaded.iter().map(|url| url.as_str()));
let mut urls = Vec::new();
urls.extend(parsed.urls.uploaded.iter().map(|url| url.to_md()));
if !handle.config.resolved.extra_embeds || is_reply && parsed.urls.embedded.len() > 1 {
urls.extend(parsed.urls.embedded.iter().map(|url| url.as_str()));
urls.extend(parsed.urls.embedded.iter().map(|url| url.to_md()));
}

if (handle.config.resolved.attachments_list || is_reply) && !urls.is_empty() {
Expand All @@ -293,7 +321,7 @@ impl BuiltStarboardEmbed {
break;
}

field.push_str(next);
field.push_str(&next);
field.push('\n');
}

Expand Down
6 changes: 3 additions & 3 deletions src/core/embedder/image_only_embed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ pub fn maybe_get_attachment_handle(embed: &Embed) -> Option<AttachmentHandle> {
};

let name = {
let ext = url.split('.').last();
match ext {
Some(ext) => format!("attachment.{ext}"),
let name = url.split('/').last();
match name {
Some(name) => name.to_string(),
None => "attachment".to_string(),
}
};
Expand Down
96 changes: 73 additions & 23 deletions src/core/embedder/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,25 @@ pub type PrimaryImage = ImageSource;
pub type Embeds = Vec<Embed>;
pub type UploadAttachments = Vec<AttachmentHandle>;

pub struct AttachmentListItem {
pub name: String,
pub url: String,
}

impl AttachmentListItem {
pub fn new(name: String, url: String) -> Self {
Self { name, url }
}

pub fn to_md(&self) -> String {
format!("[{}]({})", self.name, self.url)
}
}

#[derive(Default)]
pub struct AttachmentUrls {
pub embedded: Vec<String>,
pub uploaded: Vec<String>,
pub embedded: Vec<AttachmentListItem>,
pub uploaded: Vec<AttachmentListItem>,
}

pub struct ParsedMessage {
Expand Down Expand Up @@ -66,21 +81,22 @@ impl ParsedMessage {

if primary_image.is_none() {
if let Some(image) = handle.embedable_image() {
urls.embedded.push(handle.url_list_item());
urls.embedded.push(handle.attachment_list_item());
primary_image.replace(image);
continue;
}
} else if let Some(embed) = handle.as_embed() {
urls.embedded.push(handle.url_list_item());
urls.embedded.push(handle.attachment_list_item());
embeds.push(embed);
continue;
}

urls.uploaded.push(handle.url_list_item());
urls.uploaded.push(handle.attachment_list_item());
upload_attachments.push(handle);
}

for embed in &orig.embeds {
// handle imgur
if let Some(provider) = &embed.provider {
if matches!(provider.name.as_deref(), Some("Imgur")) {
let ret = modify_imgur_embed(embed.clone());
Expand All @@ -94,39 +110,73 @@ impl ParsedMessage {
}
}

// handle embeds that are purely attachments
if let Some(attachment) = maybe_get_attachment_handle(embed) {
if let Some(image) = attachment.embedable_image() {
if primary_image.is_none() && embeds.is_empty() {
primary_image.replace(image);
} else {
embeds.push(attachment.as_embed().unwrap());
}
urls.embedded.push(attachment.attachment_list_item());
} else {
urls.uploaded.push(attachment.attachment_list_item());
upload_attachments.push(attachment);
}
} else {
let mut embed = embed.to_owned();

if &*embed.kind == "article" && embed.image.is_none() {
let thumb = std::mem::take(&mut embed.thumbnail);
if let Some(thumb) = thumb {
embed.image = Some(EmbedImage {
height: None,
width: None,
proxy_url: None,
url: thumb.url,
});
}

continue;
}

// process "actual" embeds
let mut embed = embed.to_owned();

if &*embed.kind == "article" && embed.image.is_none() {
// article embeds use a thumbnail, but discord makes it the image instead
let thumb = std::mem::take(&mut embed.thumbnail);
if let Some(thumb) = thumb {
embed.image = Some(EmbedImage {
height: None,
width: None,
proxy_url: None,
url: thumb.url,
});
}
}

if let Some(provider) = &embed.provider {
if provider.name.as_deref() == Some("YouTube") {
modify_yt_embed(&mut embed);
// handle embeds with videos
'out: {
let Some(video) = &embed.video else { break 'out; };
let Some(proxy_url) = &video.proxy_url else { break 'out; };

let handle = AttachmentHandle {
filename: format!(
"embed_video.{}",
proxy_url.split('.').last().unwrap_or("mp4")
),
content_type: Some("video".to_string()),
url: proxy_url.clone(),
};
urls.uploaded.push(handle.attachment_list_item());
upload_attachments.push(handle);
}

// platform-specific modifications
if let Some(provider) = &embed.provider {
if let Some(mut name) = provider.name.as_deref() {
if name.starts_with("FixTweet") {
name = "FixTweet";
}
match name {
"YouTube" => modify_yt_embed(&mut embed),
"FixTweet" => {
embed.description = None;
}
_ => (),
}
}

embeds.push(embed);
}

embeds.push(embed);
}

let sticker_names_str: Option<String>;
Expand Down

0 comments on commit 066b50b

Please sign in to comment.