Hot-Reloading Refactor

Signed-off-by: Jason Volk <jason@zemos.net>
This commit is contained in:
Jason Volk 2024-05-09 15:59:08 -07:00 committed by June 🍓🦴
parent ae1a4fd283
commit 6c1434c165
212 changed files with 5679 additions and 4206 deletions

20
src/core/utils/clap.rs Normal file
View file

@ -0,0 +1,20 @@
//! Integration with `clap`
use std::path::PathBuf;
pub use clap::Parser;
use super::conduwuit_version;
/// Commandline arguments
#[derive(Parser, Debug)]
#[clap(version = conduwuit_version(), about, long_about = None)]
pub struct Args {
#[arg(short, long)]
/// Optional argument to the path of a conduwuit config TOML file
pub config: Option<PathBuf>,
}
/// Parse commandline arguments into structured data
#[must_use]
pub fn parse() -> Args { Args::parse() }

View file

@ -0,0 +1,130 @@
use infer::MatcherType;
use crate::debug_info;
const ATTACHMENT: &str = "attachment";
const INLINE: &str = "inline";
const APPLICATION_OCTET_STREAM: &str = "application/octet-stream";
const IMAGE_SVG_XML: &str = "image/svg+xml";
/// Returns a Content-Disposition of `attachment` or `inline`, depending on the
/// *parsed* contents of the file uploaded via format magic keys using `infer`
/// crate (basically libmagic without needing libmagic).
///
/// This forbids trusting what the client or remote server says the file is from
/// their `Content-Type` and we try to detect it ourselves. Also returns
/// `attachment` if the Content-Type does not match what we detected.
///
/// TODO: add a "strict" function for comparing the Content-Type with what we
/// detected: `file_type.mime_type() != content_type`
#[must_use]
#[tracing::instrument(skip(buf))]
pub fn content_disposition_type(buf: &[u8], content_type: &Option<String>) -> &'static str {
let Some(file_type) = infer::get(buf) else {
return ATTACHMENT;
};
debug_info!("MIME type: {}", file_type.mime_type());
match file_type.matcher_type() {
MatcherType::Image | MatcherType::Audio | MatcherType::Text | MatcherType::Video => {
if file_type.mime_type().contains("xml") {
ATTACHMENT
} else {
INLINE
}
},
_ => ATTACHMENT,
}
}
/// overrides the Content-Type with what we detected
///
/// SVG is special-cased due to the MIME type being classified as `text/xml` but
/// browsers need `image/svg+xml`
#[must_use]
#[tracing::instrument(skip(buf))]
pub fn make_content_type(buf: &[u8], content_type: &Option<String>) -> &'static str {
let Some(file_type) = infer::get(buf) else {
debug_info!("Failed to infer the file's contents");
return APPLICATION_OCTET_STREAM;
};
let Some(claimed_content_type) = content_type else {
return file_type.mime_type();
};
if claimed_content_type.contains("svg") && file_type.mime_type().contains("xml") {
return IMAGE_SVG_XML;
}
file_type.mime_type()
}
/// sanitises the file name for the Content-Disposition using
/// `sanitize_filename` crate
#[tracing::instrument]
pub fn sanitise_filename(filename: String) -> String {
let options = sanitize_filename::Options {
truncate: false,
..Default::default()
};
sanitize_filename::sanitize_with_options(filename, options)
}
/// creates the final Content-Disposition based on whether the filename exists
/// or not.
///
/// if filename exists:
/// `Content-Disposition: attachment/inline; filename=filename.ext`
///
/// else: `Content-Disposition: attachment/inline`
#[tracing::instrument(skip(file))]
pub fn make_content_disposition(
file: &[u8], content_type: &Option<String>, content_disposition: Option<String>,
) -> String {
let filename = content_disposition.map_or_else(String::new, |content_disposition| {
let (_, filename) = content_disposition
.split_once("filename=")
.unwrap_or(("", ""));
if filename.is_empty() {
String::new()
} else {
sanitise_filename(filename.to_owned())
}
});
if !filename.is_empty() {
// Content-Disposition: attachment/inline; filename=filename.ext
format!("{}; filename={}", content_disposition_type(file, content_type), filename)
} else {
// Content-Disposition: attachment/inline
String::from(content_disposition_type(file, content_type))
}
}
#[cfg(test)]
mod tests {
#[test]
fn string_sanitisation() {
const SAMPLE: &str =
"🏳this\\r\\n įs \r\\n ä \\r\nstrïng 🥴that\n\r ../../../../../../../may be\r\n malicious🏳";
const SANITISED: &str = "🏳thisrn įs n ä rstrïng 🥴that ..............may be malicious🏳";
let options = sanitize_filename::Options {
windows: true,
truncate: true,
replacement: "",
};
// cargo test -- --nocapture
println!("{}", SAMPLE);
println!("{}", sanitize_filename::sanitize_with_options(SAMPLE, options.clone()));
println!("{:?}", SAMPLE);
println!("{:?}", sanitize_filename::sanitize_with_options(SAMPLE, options.clone()));
assert_eq!(SANITISED, sanitize_filename::sanitize_with_options(SAMPLE, options.clone()));
}
}

22
src/core/utils/defer.rs Normal file
View file

@ -0,0 +1,22 @@
#[macro_export]
macro_rules! defer {
($body:block) => {
struct _Defer_<F>
where
F: FnMut(),
{
closure: F,
}
impl<F> Drop for _Defer_<F>
where
F: FnMut(),
{
fn drop(&mut self) { (self.closure)(); }
}
let _defer_ = _Defer_ {
closure: || $body,
};
};
}

270
src/core/utils/mod.rs Normal file
View file

@ -0,0 +1,270 @@
use std::{
cmp,
cmp::Ordering,
fmt,
str::FromStr,
time::{SystemTime, UNIX_EPOCH},
};
use rand::prelude::*;
use ring::digest;
use ruma::{canonical_json::try_from_json_map, CanonicalJsonError, CanonicalJsonObject, OwnedUserId};
use tracing::debug;
use crate::{Error, Result};
pub mod clap;
pub mod content_disposition;
pub mod defer;
pub fn clamp<T: Ord>(val: T, min: T, max: T) -> T { cmp::min(cmp::max(val, min), max) }
#[must_use]
#[allow(clippy::as_conversions)]
pub fn millis_since_unix_epoch() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("time is valid")
.as_millis() as u64
}
pub fn increment(old: Option<&[u8]>) -> Vec<u8> {
let number = match old.map(TryInto::try_into) {
Some(Ok(bytes)) => {
let number = u64::from_be_bytes(bytes);
number + 1
},
_ => 1, // Start at one. since 0 should return the first event in the db
};
number.to_be_bytes().to_vec()
}
#[must_use]
pub fn generate_keypair() -> Vec<u8> {
let mut value = random_string(8).as_bytes().to_vec();
value.push(0xFF);
value.extend_from_slice(
&ruma::signatures::Ed25519KeyPair::generate().expect("Ed25519KeyPair generation always works (?)"),
);
value
}
/// Parses the bytes into an u64.
pub fn u64_from_bytes(bytes: &[u8]) -> Result<u64, std::array::TryFromSliceError> {
let array: [u8; 8] = bytes.try_into()?;
Ok(u64::from_be_bytes(array))
}
/// Parses the bytes into a string.
pub fn string_from_bytes(bytes: &[u8]) -> Result<String, std::string::FromUtf8Error> {
String::from_utf8(bytes.to_vec())
}
/// Parses a `OwnedUserId` from bytes.
pub fn user_id_from_bytes(bytes: &[u8]) -> Result<OwnedUserId> {
OwnedUserId::try_from(
string_from_bytes(bytes).map_err(|_| Error::bad_database("Failed to parse string from bytes"))?,
)
.map_err(|_| Error::bad_database("Failed to parse user id from bytes"))
}
pub fn random_string(length: usize) -> String {
thread_rng()
.sample_iter(&rand::distributions::Alphanumeric)
.take(length)
.map(char::from)
.collect()
}
#[tracing::instrument(skip(keys))]
pub fn calculate_hash(keys: &[&[u8]]) -> Vec<u8> {
// We only hash the pdu's event ids, not the whole pdu
let bytes = keys.join(&0xFF);
let hash = digest::digest(&digest::SHA256, &bytes);
hash.as_ref().to_owned()
}
#[allow(clippy::impl_trait_in_params)]
pub fn common_elements(
mut iterators: impl Iterator<Item = impl Iterator<Item = Vec<u8>>>, check_order: impl Fn(&[u8], &[u8]) -> Ordering,
) -> Option<impl Iterator<Item = Vec<u8>>> {
let first_iterator = iterators.next()?;
let mut other_iterators = iterators.map(Iterator::peekable).collect::<Vec<_>>();
Some(first_iterator.filter(move |target| {
other_iterators.iter_mut().all(|it| {
while let Some(element) = it.peek() {
match check_order(element, target) {
Ordering::Greater => return false, // We went too far
Ordering::Equal => return true, // Element is in both iters
Ordering::Less => {
// Keep searching
it.next();
},
}
}
false
})
}))
}
/// Fallible conversion from any value that implements `Serialize` to a
/// `CanonicalJsonObject`.
///
/// `value` must serialize to an `serde_json::Value::Object`.
pub fn to_canonical_object<T: serde::Serialize>(value: T) -> Result<CanonicalJsonObject, CanonicalJsonError> {
use serde::ser::Error;
match serde_json::to_value(value).map_err(CanonicalJsonError::SerDe)? {
serde_json::Value::Object(map) => try_from_json_map(map),
_ => Err(CanonicalJsonError::SerDe(serde_json::Error::custom("Value must be an object"))),
}
}
pub fn deserialize_from_str<'de, D: serde::de::Deserializer<'de>, T: FromStr<Err = E>, E: fmt::Display>(
deserializer: D,
) -> Result<T, D::Error> {
struct Visitor<T: FromStr<Err = E>, E>(std::marker::PhantomData<T>);
impl<T: FromStr<Err = Err>, Err: fmt::Display> serde::de::Visitor<'_> for Visitor<T, Err> {
type Value = T;
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(formatter, "a parsable string")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
v.parse().map_err(serde::de::Error::custom)
}
}
deserializer.deserialize_str(Visitor(std::marker::PhantomData))
}
// Copied from librustdoc:
// https://github.com/rust-lang/rust/blob/cbaeec14f90b59a91a6b0f17fc046c66fa811892/src/librustdoc/html/escape.rs
/// Wrapper struct which will emit the HTML-escaped version of the contained
/// string when passed to a format string.
pub struct HtmlEscape<'a>(pub &'a str);
impl fmt::Display for HtmlEscape<'_> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
// Because the internet is always right, turns out there's not that many
// characters to escape: http://stackoverflow.com/questions/7381974
let HtmlEscape(s) = *self;
let pile_o_bits = s;
let mut last = 0;
for (i, ch) in s.char_indices() {
let s = match ch {
'>' => "&gt;",
'<' => "&lt;",
'&' => "&amp;",
'\'' => "&#39;",
'"' => "&quot;",
_ => continue,
};
fmt.write_str(&pile_o_bits[last..i])?;
fmt.write_str(s)?;
// NOTE: we only expect single byte characters here - which is fine as long as
// we only match single byte characters
last = i + 1;
}
if last < s.len() {
fmt.write_str(&pile_o_bits[last..])?;
}
Ok(())
}
}
/// one true function for returning the conduwuit version with the necessary
/// CONDUWUIT_VERSION_EXTRA env variables used if specified
///
/// Set the environment variable `CONDUWUIT_VERSION_EXTRA` to any UTF-8 string
/// to include it in parenthesis after the SemVer version. A common value are
/// git commit hashes.
#[must_use]
pub fn conduwuit_version() -> String {
match option_env!("CONDUWUIT_VERSION_EXTRA") {
Some(extra) => {
if extra.is_empty() {
env!("CARGO_PKG_VERSION").to_owned()
} else {
format!("{} ({})", env!("CARGO_PKG_VERSION"), extra)
}
},
None => match option_env!("CONDUIT_VERSION_EXTRA") {
Some(extra) => {
if extra.is_empty() {
env!("CARGO_PKG_VERSION").to_owned()
} else {
format!("{} ({})", env!("CARGO_PKG_VERSION"), extra)
}
},
None => env!("CARGO_PKG_VERSION").to_owned(),
},
}
}
/// Debug-formats the given slice, but only up to the first `max_len` elements.
/// Any further elements are replaced by an ellipsis.
///
/// See also [`debug_slice_truncated()`],
pub struct TruncatedDebugSlice<'a, T> {
inner: &'a [T],
max_len: usize,
}
impl<T: fmt::Debug> fmt::Debug for TruncatedDebugSlice<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.inner.len() <= self.max_len {
write!(f, "{:?}", self.inner)
} else {
f.debug_list()
.entries(&self.inner[..self.max_len])
.entry(&"...")
.finish()
}
}
}
/// See [`TruncatedDebugSlice`]. Useful for `#[instrument]`:
///
/// ```
/// use conduit_core::utils::debug_slice_truncated;
///
/// #[tracing::instrument(fields(foos = debug_slice_truncated(foos, 42)))]
/// fn bar(foos: &[&str]);
/// ```
pub fn debug_slice_truncated<T: fmt::Debug>(
slice: &[T], max_len: usize,
) -> tracing::field::DebugValue<TruncatedDebugSlice<'_, T>> {
tracing::field::debug(TruncatedDebugSlice {
inner: slice,
max_len,
})
}
/// This is needed for opening lots of file descriptors, which tends to
/// happen more often when using RocksDB and making lots of federation
/// connections at startup. The soft limit is usually 1024, and the hard
/// limit is usually 512000; I've personally seen it hit >2000.
///
/// * <https://www.freedesktop.org/software/systemd/man/systemd.exec.html#id-1.12.2.1.17.6>
/// * <https://github.com/systemd/systemd/commit/0abf94923b4a95a7d89bc526efc84e7ca2b71741>
#[cfg(unix)]
pub fn maximize_fd_limit() -> Result<(), nix::errno::Errno> {
use nix::sys::resource::{getrlimit, setrlimit, Resource::RLIMIT_NOFILE as NOFILE};
let (soft_limit, hard_limit) = getrlimit(NOFILE)?;
if soft_limit < hard_limit {
setrlimit(NOFILE, hard_limit, hard_limit)?;
assert_eq!((hard_limit, hard_limit), getrlimit(NOFILE)?, "getrlimit != setrlimit");
debug!(to = hard_limit, from = soft_limit, "Raised RLIMIT_NOFILE",);
}
Ok(())
}