From 154b2ab49051bf14ca62a143e5d2edf9e0ef5c93 Mon Sep 17 00:00:00 2001
From: strawberry <strawberry@puppygock.gay>
Date: Tue, 7 May 2024 21:32:06 -0400
Subject: [PATCH] media: additional sanitisation on the `Content-Disposition`
 filename

Signed-off-by: strawberry <strawberry@puppygock.gay>
---
 Cargo.lock                       | 11 ++++
 Cargo.toml                       |  1 +
 src/api/client_server/media.rs   | 85 ++++++++++++++++--------------
 src/utils/content_disposition.rs | 88 ++++++++++++++++++++++++++++++++
 src/utils/mod.rs                 |  1 +
 5 files changed, 148 insertions(+), 38 deletions(-)
 create mode 100644 src/utils/content_disposition.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9935aa21..6944d8d1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -615,6 +615,7 @@ dependencies = [
  "ruma-identifiers-validation",
  "rusqlite",
  "rust-rocksdb",
+ "sanitize-filename",
  "sd-notify",
  "sentry",
  "sentry-tower",
@@ -3054,6 +3055,16 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "sanitize-filename"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603"
+dependencies = [
+ "lazy_static",
+ "regex",
+]
+
 [[package]]
 name = "schannel"
 version = "0.1.23"
diff --git a/Cargo.toml b/Cargo.toml
index 681057a0..f24789f6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -79,6 +79,7 @@ url = { version = "2.5.0", features = ["serde"] }
 async-trait = "0.1.80"
 
 lru-cache = "0.1.2"
+sanitize-filename = "0.5.0"
 
 # standard date and time tools
 [dependencies.chrono]
diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs
index 53091d3a..e4e7376d 100644
--- a/src/api/client_server/media.rs
+++ b/src/api/client_server/media.rs
@@ -1,7 +1,6 @@
 use std::{io::Cursor, sync::Arc, time::Duration};
 
 use image::io::Reader as ImgReader;
-use infer::MatcherType;
 use ipaddress::IPAddress;
 use reqwest::Url;
 use ruma::api::client::{
@@ -18,7 +17,11 @@ use crate::{
 	debug_warn,
 	service::media::{FileMeta, UrlPreviewData},
 	services,
-	utils::{self, server_name::server_is_ours},
+	utils::{
+		self,
+		content_disposition::{content_disposition_type, make_content_disposition, sanitise_filename},
+		server_name::server_is_ours,
+	},
 	Error, Result, Ruma, RumaResponse,
 };
 
@@ -131,7 +134,13 @@ pub(crate) async fn create_content_route(
 			mxc.clone(),
 			body.filename
 				.as_ref()
-				.map(|filename| format!("attachment; filename={filename}"))
+				.map(|filename| {
+					format!(
+						"{}; filename={}",
+						content_disposition_type(&body.file, &body.content_type),
+						sanitise_filename(filename.to_owned())
+					)
+				})
 				.as_deref(),
 			body.content_type.as_deref(),
 			&body.file,
@@ -176,12 +185,11 @@ pub(crate) async fn get_content_route(body: Ruma<get_content::v3::Request>) -> R
 	if let Some(FileMeta {
 		content_type,
 		file,
-		..
+		content_disposition,
 	}) = services().media.get(mxc.clone()).await?
 	{
-		let content_disposition = Some(String::from(content_disposition_type(&file, &content_type)));
+		let content_disposition = Some(make_content_disposition(&file, &content_type, content_disposition));
 
-		// TODO: safely sanitise filename to be included in the content-disposition
 		Ok(get_content::v3::Response {
 			file,
 			content_type,
@@ -190,7 +198,7 @@ pub(crate) async fn get_content_route(body: Ruma<get_content::v3::Request>) -> R
 			cache_control: Some(CACHE_CONTROL_IMMUTABLE.into()),
 		})
 	} else if !server_is_ours(&body.server_name) && body.allow_remote {
-		get_remote_content(
+		let response = get_remote_content(
 			&mxc,
 			&body.server_name,
 			body.media_id.clone(),
@@ -201,6 +209,20 @@ pub(crate) async fn get_content_route(body: Ruma<get_content::v3::Request>) -> R
 		.map_err(|e| {
 			debug_warn!("Fetching media `{}` failed: {:?}", mxc, e);
 			Error::BadRequest(ErrorKind::NotFound, "Remote media error.")
+		})?;
+
+		let content_disposition = Some(make_content_disposition(
+			&response.file,
+			&response.content_type,
+			response.content_disposition,
+		));
+
+		Ok(get_content::v3::Response {
+			file: response.file,
+			content_type: response.content_type,
+			content_disposition,
+			cross_origin_resource_policy: Some(CORP_CROSS_ORIGIN.to_owned()),
+			cache_control: Some(CACHE_CONTROL_IMMUTABLE.to_owned()),
 		})
 	} else {
 		Err(Error::BadRequest(ErrorKind::NotFound, "Media not found."))
@@ -241,10 +263,10 @@ pub(crate) async fn get_content_as_filename_route(
 	if let Some(FileMeta {
 		content_type,
 		file,
-		..
+		content_disposition,
 	}) = services().media.get(mxc.clone()).await?
 	{
-		let content_disposition = Some(String::from(content_disposition_type(&file, &content_type)));
+		let content_disposition = Some(make_content_disposition(&file, &content_type, content_disposition));
 
 		Ok(get_content_as_filename::v3::Response {
 			file,
@@ -264,10 +286,11 @@ pub(crate) async fn get_content_as_filename_route(
 		.await
 		{
 			Ok(remote_content_response) => {
-				let content_disposition = Some(String::from(content_disposition_type(
+				let content_disposition = Some(make_content_disposition(
 					&remote_content_response.file,
 					&remote_content_response.content_type,
-				)));
+					remote_content_response.content_disposition,
+				));
 
 				Ok(get_content_as_filename::v3::Response {
 					content_disposition,
@@ -321,7 +344,7 @@ pub(crate) async fn get_content_thumbnail_route(
 	if let Some(FileMeta {
 		content_type,
 		file,
-		..
+		content_disposition,
 	}) = services()
 		.media
 		.get_thumbnail(
@@ -335,7 +358,7 @@ pub(crate) async fn get_content_thumbnail_route(
 		)
 		.await?
 	{
-		let content_disposition = Some(String::from(content_disposition_type(&file, &content_type)));
+		let content_disposition = Some(make_content_disposition(&file, &content_type, content_disposition));
 
 		Ok(get_content_thumbnail::v3::Response {
 			file,
@@ -387,10 +410,11 @@ pub(crate) async fn get_content_thumbnail_route(
 					)
 					.await?;
 
-				let content_disposition = Some(String::from(content_disposition_type(
+				let content_disposition = Some(make_content_disposition(
 					&get_thumbnail_response.file,
 					&get_thumbnail_response.content_type,
-				)));
+					get_thumbnail_response.content_disposition,
+				));
 
 				Ok(get_content_thumbnail::v3::Response {
 					file: get_thumbnail_response.file,
@@ -456,12 +480,18 @@ async fn get_remote_content(
 		)
 		.await?;
 
+	let content_disposition = Some(make_content_disposition(
+		&content_response.file,
+		&content_response.content_type,
+		content_response.content_disposition,
+	));
+
 	services()
 		.media
 		.create(
 			None,
 			mxc.to_owned(),
-			Some("attachment"),
+			content_disposition.as_deref(),
 			content_response.content_type.as_deref(),
 			&content_response.file,
 		)
@@ -470,7 +500,7 @@ async fn get_remote_content(
 	Ok(get_content::v3::Response {
 		file: content_response.file,
 		content_type: content_response.content_type,
-		content_disposition: Some("attachment".to_owned()),
+		content_disposition,
 		cross_origin_resource_policy: Some(CORP_CROSS_ORIGIN.to_owned()),
 		cache_control: Some(CACHE_CONTROL_IMMUTABLE.to_owned()),
 	})
@@ -707,24 +737,3 @@ fn url_preview_allowed(url_str: &str) -> bool {
 
 	false
 }
-
-/// Returns a Content-Disposition of `attachment` or `inline`, depending on the
-/// *parsed* contents of the file uploaded via format magic keys using `infer`
-/// crate (basically libmagic without needing libmagic).
-///
-/// This forbids trusting what the client or remote server says the file is from
-/// their `Content-Type` and we try to detect it ourselves. Also returns
-/// `attachment` if the Content-Type does not match what we detected.
-///
-/// TODO: add a "strict" function for comparing the Content-Type with what we
-/// detected: `file_type.mime_type() != content_type`
-fn content_disposition_type(buf: &[u8], _content_type: &Option<String>) -> &'static str {
-	let Some(file_type) = infer::get(buf) else {
-		return "attachment";
-	};
-
-	match file_type.matcher_type() {
-		MatcherType::IMAGE | MatcherType::AUDIO | MatcherType::TEXT | MatcherType::VIDEO => "inline",
-		_ => "attachment",
-	}
-}
diff --git a/src/utils/content_disposition.rs b/src/utils/content_disposition.rs
new file mode 100644
index 00000000..6df0e14f
--- /dev/null
+++ b/src/utils/content_disposition.rs
@@ -0,0 +1,88 @@
+use infer::MatcherType;
+
+/// Returns a Content-Disposition of `attachment` or `inline`, depending on the
+/// *parsed* contents of the file uploaded via format magic keys using `infer`
+/// crate (basically libmagic without needing libmagic).
+///
+/// This forbids trusting what the client or remote server says the file is from
+/// their `Content-Type` and we try to detect it ourselves. Also returns
+/// `attachment` if the Content-Type does not match what we detected.
+///
+/// TODO: add a "strict" function for comparing the Content-Type with what we
+/// detected: `file_type.mime_type() != content_type`
+pub(crate) fn content_disposition_type(buf: &[u8], _content_type: &Option<String>) -> &'static str {
+	let Some(file_type) = infer::get(buf) else {
+		return "attachment";
+	};
+
+	match file_type.matcher_type() {
+		MatcherType::IMAGE | MatcherType::AUDIO | MatcherType::TEXT | MatcherType::VIDEO => "inline",
+		_ => "attachment",
+	}
+}
+
+/// sanitises the file name for the Content-Disposition using
+/// `sanitize_filename` crate
+#[tracing::instrument]
+pub(crate) fn sanitise_filename(filename: String) -> String {
+	let options = sanitize_filename::Options {
+		truncate: false,
+		..Default::default()
+	};
+
+	sanitize_filename::sanitize_with_options(filename, options)
+}
+
+/// creates the final Content-Disposition based on whether the filename exists
+/// or not.
+///
+/// if filename exists: `Content-Disposition: attachment/inline;
+/// filename=filename.ext` else: `Content-Disposition: attachment/inline`
+#[tracing::instrument(skip(file))]
+pub(crate) fn make_content_disposition(
+	file: &[u8], content_type: &Option<String>, content_disposition: Option<String>,
+) -> String {
+	let filename = content_disposition.map_or_else(String::new, |content_disposition| {
+		let (_, filename) = content_disposition
+			.split_once("filename=")
+			.unwrap_or(("", ""));
+
+		if filename.is_empty() {
+			String::new()
+		} else {
+			sanitise_filename(filename.to_owned())
+		}
+	});
+
+	if !filename.is_empty() {
+		// Content-Disposition: attachment/inline; filename=filename.ext
+		format!("{}; filename={}", content_disposition_type(file, content_type), filename)
+	} else {
+		// Content-Disposition: attachment/inline
+		String::from(content_disposition_type(file, content_type))
+	}
+}
+
+#[cfg(test)]
+mod tests {
+	#[test]
+	fn string_sanitisation() {
+		const SAMPLE: &str =
+			"🏳️‍⚧️this\\r\\n įs \r\\n ä \\r\nstrïng 🥴that\n\r ../../../../../../../may be\r\n malicious🏳️‍⚧️";
+		const SANITISED: &str = "🏳️‍⚧️thisrn įs n ä rstrïng 🥴that ..............may be malicious🏳️‍⚧️";
+
+		let options = sanitize_filename::Options {
+			windows: true,
+			truncate: true,
+			replacement: "",
+		};
+
+		// cargo test -- --nocapture
+		println!("{}", SAMPLE);
+		println!("{}", sanitize_filename::sanitize_with_options(SAMPLE, options.clone()));
+		println!("{:?}", SAMPLE);
+		println!("{:?}", sanitize_filename::sanitize_with_options(SAMPLE, options.clone()));
+
+		assert_eq!(SANITISED, sanitize_filename::sanitize_with_options(SAMPLE, options.clone()));
+	}
+}
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index dbcef82d..2ceb0ff5 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -1,4 +1,5 @@
 pub(crate) mod clap;
+pub(crate) mod content_disposition;
 pub(crate) mod debug;
 pub(crate) mod error;
 pub(crate) mod server_name;