combine service/users data w/ mod unit split sliding sync related out of service/users instrument database entry points remove increment crap from database interface de-wrap all database get() calls de-wrap all database insert() calls de-wrap all database remove() calls refactor database interface for async streaming add query key serializer for database implement Debug for result handle add query deserializer for database add deserialization trait for option handle start a stream utils suite de-wrap/asyncify/type-query count_one_time_keys() de-wrap/asyncify users count add admin query users command suite de-wrap/asyncify users exists de-wrap/partially asyncify user filter related asyncify/de-wrap users device/keys related asyncify/de-wrap user auth/misc related asyncify/de-wrap users blurhash asyncify/de-wrap account_data get; merge Data into Service partial asyncify/de-wrap uiaa; merge Data into Service partially asyncify/de-wrap transaction_ids get; merge Data into Service partially asyncify/de-wrap key_backups; merge Data into Service asyncify/de-wrap pusher service getters; merge Data into Service asyncify/de-wrap rooms alias getters/some iterators asyncify/de-wrap rooms directory getters/iterator partially asyncify/de-wrap rooms lazy-loading partially asyncify/de-wrap rooms metadata asyncify/dewrap rooms outlier asyncify/dewrap rooms pdu_metadata dewrap/partially asyncify rooms read receipt de-wrap rooms search service de-wrap/partially asyncify rooms user service partial de-wrap rooms state_compressor de-wrap rooms state_cache de-wrap room state et al de-wrap rooms timeline service additional users device/keys related de-wrap/asyncify sender asyncify services refactor database to TryFuture/TryStream refactor services for TryFuture/TryStream asyncify api handlers additional asyncification for admin module abstract stream related; support reverse streams additional stream conversions asyncify state-res related Signed-off-by: Jason Volk <jason@zemos.net>
274 lines
8 KiB
Rust
274 lines
8 KiB
Rust
use std::{io::Cursor, time::SystemTime};
|
|
|
|
use conduit::{debug, utils, warn, Err, Result};
|
|
use conduit_core::implement;
|
|
use image::ImageReader as ImgReader;
|
|
use ipaddress::IPAddress;
|
|
use ruma::Mxc;
|
|
use serde::Serialize;
|
|
use url::Url;
|
|
use webpage::HTML;
|
|
|
|
use super::{Service, MXC_LENGTH};
|
|
|
|
#[derive(Serialize, Default)]
|
|
pub struct UrlPreviewData {
|
|
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:title"))]
|
|
pub title: Option<String>,
|
|
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:description"))]
|
|
pub description: Option<String>,
|
|
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:image"))]
|
|
pub image: Option<String>,
|
|
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "matrix:image:size"))]
|
|
pub image_size: Option<usize>,
|
|
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:image:width"))]
|
|
pub image_width: Option<u32>,
|
|
#[serde(skip_serializing_if = "Option::is_none", rename(serialize = "og:image:height"))]
|
|
pub image_height: Option<u32>,
|
|
}
|
|
|
|
#[implement(Service)]
|
|
pub async fn remove_url_preview(&self, url: &str) -> Result<()> {
|
|
// TODO: also remove the downloaded image
|
|
self.db.remove_url_preview(url)
|
|
}
|
|
|
|
#[implement(Service)]
|
|
pub async fn set_url_preview(&self, url: &str, data: &UrlPreviewData) -> Result<()> {
|
|
let now = SystemTime::now()
|
|
.duration_since(SystemTime::UNIX_EPOCH)
|
|
.expect("valid system time");
|
|
self.db.set_url_preview(url, data, now)
|
|
}
|
|
|
|
#[implement(Service)]
|
|
pub async fn download_image(&self, url: &str) -> Result<UrlPreviewData> {
|
|
let client = &self.services.client.url_preview;
|
|
let image = client.get(url).send().await?.bytes().await?;
|
|
let mxc = Mxc {
|
|
server_name: self.services.globals.server_name(),
|
|
media_id: &utils::random_string(MXC_LENGTH),
|
|
};
|
|
|
|
self.create(&mxc, None, None, None, &image).await?;
|
|
|
|
let (width, height) = match ImgReader::new(Cursor::new(&image)).with_guessed_format() {
|
|
Err(_) => (None, None),
|
|
Ok(reader) => match reader.into_dimensions() {
|
|
Err(_) => (None, None),
|
|
Ok((width, height)) => (Some(width), Some(height)),
|
|
},
|
|
};
|
|
|
|
Ok(UrlPreviewData {
|
|
image: Some(mxc.to_string()),
|
|
image_size: Some(image.len()),
|
|
image_width: width,
|
|
image_height: height,
|
|
..Default::default()
|
|
})
|
|
}
|
|
|
|
#[implement(Service)]
|
|
pub async fn get_url_preview(&self, url: &str) -> Result<UrlPreviewData> {
|
|
if let Ok(preview) = self.db.get_url_preview(url).await {
|
|
return Ok(preview);
|
|
}
|
|
|
|
// ensure that only one request is made per URL
|
|
let _request_lock = self.url_preview_mutex.lock(url).await;
|
|
|
|
match self.db.get_url_preview(url).await {
|
|
Ok(preview) => Ok(preview),
|
|
Err(_) => self.request_url_preview(url).await,
|
|
}
|
|
}
|
|
|
|
#[implement(Service)]
|
|
async fn request_url_preview(&self, url: &str) -> Result<UrlPreviewData> {
|
|
if let Ok(ip) = IPAddress::parse(url) {
|
|
if !self.services.globals.valid_cidr_range(&ip) {
|
|
return Err!(BadServerResponse("Requesting from this address is forbidden"));
|
|
}
|
|
}
|
|
|
|
let client = &self.services.client.url_preview;
|
|
let response = client.head(url).send().await?;
|
|
|
|
if let Some(remote_addr) = response.remote_addr() {
|
|
if let Ok(ip) = IPAddress::parse(remote_addr.ip().to_string()) {
|
|
if !self.services.globals.valid_cidr_range(&ip) {
|
|
return Err!(BadServerResponse("Requesting from this address is forbidden"));
|
|
}
|
|
}
|
|
}
|
|
|
|
let Some(content_type) = response
|
|
.headers()
|
|
.get(reqwest::header::CONTENT_TYPE)
|
|
.and_then(|x| x.to_str().ok())
|
|
else {
|
|
return Err!(Request(Unknown("Unknown Content-Type")));
|
|
};
|
|
let data = match content_type {
|
|
html if html.starts_with("text/html") => self.download_html(url).await?,
|
|
img if img.starts_with("image/") => self.download_image(url).await?,
|
|
_ => return Err!(Request(Unknown("Unsupported Content-Type"))),
|
|
};
|
|
|
|
self.set_url_preview(url, &data).await?;
|
|
|
|
Ok(data)
|
|
}
|
|
|
|
#[implement(Service)]
|
|
async fn download_html(&self, url: &str) -> Result<UrlPreviewData> {
|
|
let client = &self.services.client.url_preview;
|
|
let mut response = client.get(url).send().await?;
|
|
|
|
let mut bytes: Vec<u8> = Vec::new();
|
|
while let Some(chunk) = response.chunk().await? {
|
|
bytes.extend_from_slice(&chunk);
|
|
if bytes.len() > self.services.globals.url_preview_max_spider_size() {
|
|
debug!(
|
|
"Response body from URL {} exceeds url_preview_max_spider_size ({}), not processing the rest of the \
|
|
response body and assuming our necessary data is in this range.",
|
|
url,
|
|
self.services.globals.url_preview_max_spider_size()
|
|
);
|
|
break;
|
|
}
|
|
}
|
|
let body = String::from_utf8_lossy(&bytes);
|
|
let Ok(html) = HTML::from_string(body.to_string(), Some(url.to_owned())) else {
|
|
return Err!(Request(Unknown("Failed to parse HTML")));
|
|
};
|
|
|
|
let mut data = match html.opengraph.images.first() {
|
|
None => UrlPreviewData::default(),
|
|
Some(obj) => self.download_image(&obj.url).await?,
|
|
};
|
|
|
|
let props = html.opengraph.properties;
|
|
|
|
/* use OpenGraph title/description, but fall back to HTML if not available */
|
|
data.title = props.get("title").cloned().or(html.title);
|
|
data.description = props.get("description").cloned().or(html.description);
|
|
|
|
Ok(data)
|
|
}
|
|
|
|
#[implement(Service)]
|
|
pub fn url_preview_allowed(&self, url_str: &str) -> bool {
|
|
let url: Url = match Url::parse(url_str) {
|
|
Ok(u) => u,
|
|
Err(e) => {
|
|
warn!("Failed to parse URL from a str: {}", e);
|
|
return false;
|
|
},
|
|
};
|
|
|
|
if ["http", "https"]
|
|
.iter()
|
|
.all(|&scheme| scheme != url.scheme().to_lowercase())
|
|
{
|
|
debug!("Ignoring non-HTTP/HTTPS URL to preview: {}", url);
|
|
return false;
|
|
}
|
|
|
|
let host = match url.host_str() {
|
|
None => {
|
|
debug!("Ignoring URL preview for a URL that does not have a host (?): {}", url);
|
|
return false;
|
|
},
|
|
Some(h) => h.to_owned(),
|
|
};
|
|
|
|
let allowlist_domain_contains = self
|
|
.services
|
|
.globals
|
|
.url_preview_domain_contains_allowlist();
|
|
let allowlist_domain_explicit = self
|
|
.services
|
|
.globals
|
|
.url_preview_domain_explicit_allowlist();
|
|
let denylist_domain_explicit = self.services.globals.url_preview_domain_explicit_denylist();
|
|
let allowlist_url_contains = self.services.globals.url_preview_url_contains_allowlist();
|
|
|
|
if allowlist_domain_contains.contains(&"*".to_owned())
|
|
|| allowlist_domain_explicit.contains(&"*".to_owned())
|
|
|| allowlist_url_contains.contains(&"*".to_owned())
|
|
{
|
|
debug!("Config key contains * which is allowing all URL previews. Allowing URL {}", url);
|
|
return true;
|
|
}
|
|
|
|
if !host.is_empty() {
|
|
if denylist_domain_explicit.contains(&host) {
|
|
debug!(
|
|
"Host {} is not allowed by url_preview_domain_explicit_denylist (check 1/4)",
|
|
&host
|
|
);
|
|
return false;
|
|
}
|
|
|
|
if allowlist_domain_explicit.contains(&host) {
|
|
debug!("Host {} is allowed by url_preview_domain_explicit_allowlist (check 2/4)", &host);
|
|
return true;
|
|
}
|
|
|
|
if allowlist_domain_contains
|
|
.iter()
|
|
.any(|domain_s| domain_s.contains(&host.clone()))
|
|
{
|
|
debug!("Host {} is allowed by url_preview_domain_contains_allowlist (check 3/4)", &host);
|
|
return true;
|
|
}
|
|
|
|
if allowlist_url_contains
|
|
.iter()
|
|
.any(|url_s| url.to_string().contains(&url_s.to_string()))
|
|
{
|
|
debug!("URL {} is allowed by url_preview_url_contains_allowlist (check 4/4)", &host);
|
|
return true;
|
|
}
|
|
|
|
// check root domain if available and if user has root domain checks
|
|
if self.services.globals.url_preview_check_root_domain() {
|
|
debug!("Checking root domain");
|
|
match host.split_once('.') {
|
|
None => return false,
|
|
Some((_, root_domain)) => {
|
|
if denylist_domain_explicit.contains(&root_domain.to_owned()) {
|
|
debug!(
|
|
"Root domain {} is not allowed by url_preview_domain_explicit_denylist (check 1/3)",
|
|
&root_domain
|
|
);
|
|
return true;
|
|
}
|
|
|
|
if allowlist_domain_explicit.contains(&root_domain.to_owned()) {
|
|
debug!(
|
|
"Root domain {} is allowed by url_preview_domain_explicit_allowlist (check 2/3)",
|
|
&root_domain
|
|
);
|
|
return true;
|
|
}
|
|
|
|
if allowlist_domain_contains
|
|
.iter()
|
|
.any(|domain_s| domain_s.contains(&root_domain.to_owned()))
|
|
{
|
|
debug!(
|
|
"Root domain {} is allowed by url_preview_domain_contains_allowlist (check 3/3)",
|
|
&root_domain
|
|
);
|
|
return true;
|
|
}
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
false
|
|
}
|