move media helpers from api to services

Signed-off-by: Jason Volk <jason@zemos.net>
This commit is contained in:
Jason Volk 2024-07-30 05:29:32 +00:00
parent a0a002b17f
commit 24884c3b9b
9 changed files with 296 additions and 275 deletions

View file

@ -47,7 +47,6 @@ hmac.workspace = true
http.workspace = true
http-body-util.workspace = true
hyper.workspace = true
image.workspace = true
ipaddress.workspace = true
jsonwebtoken.workspace = true
log.workspace = true
@ -60,7 +59,6 @@ serde.workspace = true
sha-1.workspace = true
tokio.workspace = true
tracing.workspace = true
webpage.workspace = true
[lints]
workspace = true

View file

@ -1,11 +1,11 @@
#![allow(deprecated)]
use std::{io::Cursor, time::Duration};
use std::time::Duration;
use axum::extract::State;
use axum_client_ip::InsecureClientIp;
use conduit::{
debug, debug_warn, error,
debug_warn, error,
utils::{
self,
content_disposition::{content_disposition_type, make_content_disposition, sanitise_filename},
@ -13,9 +13,6 @@ use conduit::{
},
warn, Error, Result,
};
use image::io::Reader as ImgReader;
use ipaddress::IPAddress;
use reqwest::Url;
use ruma::api::client::{
error::{ErrorKind, RetryAfter},
media::{
@ -24,16 +21,12 @@ use ruma::api::client::{
},
};
use service::{
media::{FileMeta, UrlPreviewData},
media::{FileMeta, MXC_LENGTH},
Services,
};
use webpage::HTML;
use crate::{Ruma, RumaResponse};
/// generated MXC ID (`media-id`) length
const MXC_LENGTH: usize = 32;
/// Cache control for immutable objects
const CACHE_CONTROL_IMMUTABLE: &str = "public,max-age=31536000,immutable";
@ -76,12 +69,12 @@ pub(crate) async fn get_media_preview_route(
let sender_user = body.sender_user.as_ref().expect("user is authenticated");
let url = &body.url;
if !url_preview_allowed(&services, url) {
if !services.media.url_preview_allowed(url) {
warn!(%sender_user, "URL is not allowed to be previewed: {url}");
return Err(Error::BadRequest(ErrorKind::forbidden(), "URL is not allowed to be previewed"));
}
match get_url_preview(&services, url).await {
match services.media.get_url_preview(url).await {
Ok(preview) => {
let res = serde_json::value::to_raw_value(&preview).map_err(|e| {
error!(%sender_user, "Failed to convert UrlPreviewData into a serde json value: {e}");
@ -553,222 +546,3 @@ async fn get_remote_content(
cache_control: Some(CACHE_CONTROL_IMMUTABLE.to_owned()),
})
}
async fn download_image(services: &Services, client: &reqwest::Client, url: &str) -> Result<UrlPreviewData> {
let image = client.get(url).send().await?.bytes().await?;
let mxc = format!("mxc://{}/{}", services.globals.server_name(), utils::random_string(MXC_LENGTH));
services
.media
.create(None, &mxc, None, None, &image)
.await?;
let (width, height) = match ImgReader::new(Cursor::new(&image)).with_guessed_format() {
Err(_) => (None, None),
Ok(reader) => match reader.into_dimensions() {
Err(_) => (None, None),
Ok((width, height)) => (Some(width), Some(height)),
},
};
Ok(UrlPreviewData {
image: Some(mxc),
image_size: Some(image.len()),
image_width: width,
image_height: height,
..Default::default()
})
}
async fn download_html(services: &Services, client: &reqwest::Client, url: &str) -> Result<UrlPreviewData> {
let mut response = client.get(url).send().await?;
let mut bytes: Vec<u8> = Vec::new();
while let Some(chunk) = response.chunk().await? {
bytes.extend_from_slice(&chunk);
if bytes.len() > services.globals.url_preview_max_spider_size() {
debug!(
"Response body from URL {} exceeds url_preview_max_spider_size ({}), not processing the rest of the \
response body and assuming our necessary data is in this range.",
url,
services.globals.url_preview_max_spider_size()
);
break;
}
}
let body = String::from_utf8_lossy(&bytes);
let Ok(html) = HTML::from_string(body.to_string(), Some(url.to_owned())) else {
return Err(Error::BadRequest(ErrorKind::Unknown, "Failed to parse HTML"));
};
let mut data = match html.opengraph.images.first() {
None => UrlPreviewData::default(),
Some(obj) => download_image(services, client, &obj.url).await?,
};
let props = html.opengraph.properties;
/* use OpenGraph title/description, but fall back to HTML if not available */
data.title = props.get("title").cloned().or(html.title);
data.description = props.get("description").cloned().or(html.description);
Ok(data)
}
async fn request_url_preview(services: &Services, url: &str) -> Result<UrlPreviewData> {
if let Ok(ip) = IPAddress::parse(url) {
if !services.globals.valid_cidr_range(&ip) {
return Err(Error::BadServerResponse("Requesting from this address is forbidden"));
}
}
let client = &services.client.url_preview;
let response = client.head(url).send().await?;
if let Some(remote_addr) = response.remote_addr() {
if let Ok(ip) = IPAddress::parse(remote_addr.ip().to_string()) {
if !services.globals.valid_cidr_range(&ip) {
return Err(Error::BadServerResponse("Requesting from this address is forbidden"));
}
}
}
let Some(content_type) = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|x| x.to_str().ok())
else {
return Err(Error::BadRequest(ErrorKind::Unknown, "Unknown Content-Type"));
};
let data = match content_type {
html if html.starts_with("text/html") => download_html(services, client, url).await?,
img if img.starts_with("image/") => download_image(services, client, url).await?,
_ => return Err(Error::BadRequest(ErrorKind::Unknown, "Unsupported Content-Type")),
};
services.media.set_url_preview(url, &data).await?;
Ok(data)
}
async fn get_url_preview(services: &Services, url: &str) -> Result<UrlPreviewData> {
if let Some(preview) = services.media.get_url_preview(url).await {
return Ok(preview);
}
// ensure that only one request is made per URL
let _request_lock = services.media.url_preview_mutex.lock(url).await;
match services.media.get_url_preview(url).await {
Some(preview) => Ok(preview),
None => request_url_preview(services, url).await,
}
}
fn url_preview_allowed(services: &Services, url_str: &str) -> bool {
let url: Url = match Url::parse(url_str) {
Ok(u) => u,
Err(e) => {
warn!("Failed to parse URL from a str: {}", e);
return false;
},
};
if ["http", "https"]
.iter()
.all(|&scheme| scheme != url.scheme().to_lowercase())
{
debug!("Ignoring non-HTTP/HTTPS URL to preview: {}", url);
return false;
}
let host = match url.host_str() {
None => {
debug!("Ignoring URL preview for a URL that does not have a host (?): {}", url);
return false;
},
Some(h) => h.to_owned(),
};
let allowlist_domain_contains = services.globals.url_preview_domain_contains_allowlist();
let allowlist_domain_explicit = services.globals.url_preview_domain_explicit_allowlist();
let denylist_domain_explicit = services.globals.url_preview_domain_explicit_denylist();
let allowlist_url_contains = services.globals.url_preview_url_contains_allowlist();
if allowlist_domain_contains.contains(&"*".to_owned())
|| allowlist_domain_explicit.contains(&"*".to_owned())
|| allowlist_url_contains.contains(&"*".to_owned())
{
debug!("Config key contains * which is allowing all URL previews. Allowing URL {}", url);
return true;
}
if !host.is_empty() {
if denylist_domain_explicit.contains(&host) {
debug!(
"Host {} is not allowed by url_preview_domain_explicit_denylist (check 1/4)",
&host
);
return false;
}
if allowlist_domain_explicit.contains(&host) {
debug!("Host {} is allowed by url_preview_domain_explicit_allowlist (check 2/4)", &host);
return true;
}
if allowlist_domain_contains
.iter()
.any(|domain_s| domain_s.contains(&host.clone()))
{
debug!("Host {} is allowed by url_preview_domain_contains_allowlist (check 3/4)", &host);
return true;
}
if allowlist_url_contains
.iter()
.any(|url_s| url.to_string().contains(&url_s.to_string()))
{
debug!("URL {} is allowed by url_preview_url_contains_allowlist (check 4/4)", &host);
return true;
}
// check root domain if available and if user has root domain checks
if services.globals.url_preview_check_root_domain() {
debug!("Checking root domain");
match host.split_once('.') {
None => return false,
Some((_, root_domain)) => {
if denylist_domain_explicit.contains(&root_domain.to_owned()) {
debug!(
"Root domain {} is not allowed by url_preview_domain_explicit_denylist (check 1/3)",
&root_domain
);
return true;
}
if allowlist_domain_explicit.contains(&root_domain.to_owned()) {
debug!(
"Root domain {} is allowed by url_preview_domain_explicit_allowlist (check 2/3)",
&root_domain
);
return true;
}
if allowlist_domain_contains
.iter()
.any(|domain_s| domain_s.contains(&root_domain.to_owned()))
{
debug!(
"Root domain {} is allowed by url_preview_domain_contains_allowlist (check 3/3)",
&root_domain
);
return true;
}
},
}
}
}
false
}