This commit is contained in:
Jonathan de Jong 2021-07-14 07:07:08 +00:00 committed by Timo Kösters
parent bd4bd58612
commit 9d4fa9a220
49 changed files with 1525 additions and 681 deletions

View file

@ -1,28 +1,21 @@
use super::Config;
use crate::{utils, Result};
use log::warn;
use crate::Result;
use std::{future::Future, pin::Pin, sync::Arc};
#[cfg(feature = "rocksdb")]
use std::{collections::BTreeMap, sync::RwLock};
pub mod rocksdb;
#[cfg(feature = "sled")]
pub struct SledEngine(sled::Db);
#[cfg(feature = "sled")]
pub struct SledEngineTree(sled::Tree);
pub mod sled;
#[cfg(feature = "rocksdb")]
pub struct RocksDbEngine(rocksdb::DBWithThreadMode<rocksdb::MultiThreaded>);
#[cfg(feature = "rocksdb")]
pub struct RocksDbEngineTree<'a> {
db: Arc<RocksDbEngine>,
name: &'a str,
watchers: RwLock<BTreeMap<Vec<u8>, Vec<tokio::sync::oneshot::Sender<()>>>>,
}
#[cfg(feature = "sqlite")]
pub mod sqlite;
pub trait DatabaseEngine: Sized {
fn open(config: &Config) -> Result<Arc<Self>>;
fn open_tree(self: &Arc<Self>, name: &'static str) -> Result<Arc<dyn Tree>>;
fn flush(self: &Arc<Self>) -> Result<()>;
}
pub trait Tree: Send + Sync {
@ -32,20 +25,20 @@ pub trait Tree: Send + Sync {
fn remove(&self, key: &[u8]) -> Result<()>;
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = (Box<[u8]>, Box<[u8]>)> + Send + Sync + 'a>;
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + Send + 'a>;
fn iter_from<'a>(
&'a self,
from: &[u8],
backwards: bool,
) -> Box<dyn Iterator<Item = (Box<[u8]>, Box<[u8]>)> + 'a>;
) -> Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + Send + 'a>;
fn increment(&self, key: &[u8]) -> Result<Vec<u8>>;
fn scan_prefix<'a>(
&'a self,
prefix: Vec<u8>,
) -> Box<dyn Iterator<Item = (Box<[u8]>, Box<[u8]>)> + Send + 'a>;
) -> Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + Send + 'a>;
fn watch_prefix<'a>(&'a self, prefix: &[u8]) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>>;
@ -57,273 +50,3 @@ pub trait Tree: Send + Sync {
Ok(())
}
}
#[cfg(feature = "sled")]
impl DatabaseEngine for SledEngine {
fn open(config: &Config) -> Result<Arc<Self>> {
Ok(Arc::new(SledEngine(
sled::Config::default()
.path(&config.database_path)
.cache_capacity(config.cache_capacity as u64)
.use_compression(true)
.open()?,
)))
}
fn open_tree(self: &Arc<Self>, name: &'static str) -> Result<Arc<dyn Tree>> {
Ok(Arc::new(SledEngineTree(self.0.open_tree(name)?)))
}
}
#[cfg(feature = "sled")]
impl Tree for SledEngineTree {
fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>> {
Ok(self.0.get(key)?.map(|v| v.to_vec()))
}
fn insert(&self, key: &[u8], value: &[u8]) -> Result<()> {
self.0.insert(key, value)?;
Ok(())
}
fn remove(&self, key: &[u8]) -> Result<()> {
self.0.remove(key)?;
Ok(())
}
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = (Box<[u8]>, Box<[u8]>)> + Send + Sync + 'a> {
Box::new(
self.0
.iter()
.filter_map(|r| {
if let Err(e) = &r {
warn!("Error: {}", e);
}
r.ok()
})
.map(|(k, v)| (k.to_vec().into(), v.to_vec().into())),
)
}
fn iter_from(
&self,
from: &[u8],
backwards: bool,
) -> Box<dyn Iterator<Item = (Box<[u8]>, Box<[u8]>)>> {
let iter = if backwards {
self.0.range(..from)
} else {
self.0.range(from..)
};
let iter = iter
.filter_map(|r| {
if let Err(e) = &r {
warn!("Error: {}", e);
}
r.ok()
})
.map(|(k, v)| (k.to_vec().into(), v.to_vec().into()));
if backwards {
Box::new(iter.rev())
} else {
Box::new(iter)
}
}
fn increment(&self, key: &[u8]) -> Result<Vec<u8>> {
Ok(self
.0
.update_and_fetch(key, utils::increment)
.map(|o| o.expect("increment always sets a value").to_vec())?)
}
fn scan_prefix<'a>(
&'a self,
prefix: Vec<u8>,
) -> Box<dyn Iterator<Item = (Box<[u8]>, Box<[u8]>)> + Send + 'a> {
let iter = self
.0
.scan_prefix(prefix)
.filter_map(|r| {
if let Err(e) = &r {
warn!("Error: {}", e);
}
r.ok()
})
.map(|(k, v)| (k.to_vec().into(), v.to_vec().into()));
Box::new(iter)
}
fn watch_prefix<'a>(&'a self, prefix: &[u8]) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>> {
let prefix = prefix.to_vec();
Box::pin(async move {
self.0.watch_prefix(prefix).await;
})
}
}
#[cfg(feature = "rocksdb")]
impl DatabaseEngine for RocksDbEngine {
fn open(config: &Config) -> Result<Arc<Self>> {
let mut db_opts = rocksdb::Options::default();
db_opts.create_if_missing(true);
db_opts.set_max_open_files(16);
db_opts.set_compaction_style(rocksdb::DBCompactionStyle::Level);
db_opts.set_compression_type(rocksdb::DBCompressionType::Snappy);
db_opts.set_target_file_size_base(256 << 20);
db_opts.set_write_buffer_size(256 << 20);
let mut block_based_options = rocksdb::BlockBasedOptions::default();
block_based_options.set_block_size(512 << 10);
db_opts.set_block_based_table_factory(&block_based_options);
let cfs = rocksdb::DBWithThreadMode::<rocksdb::MultiThreaded>::list_cf(
&db_opts,
&config.database_path,
)
.unwrap_or_default();
let mut options = rocksdb::Options::default();
options.set_merge_operator_associative("increment", utils::increment_rocksdb);
let db = rocksdb::DBWithThreadMode::<rocksdb::MultiThreaded>::open_cf_descriptors(
&db_opts,
&config.database_path,
cfs.iter()
.map(|name| rocksdb::ColumnFamilyDescriptor::new(name, options.clone())),
)?;
Ok(Arc::new(RocksDbEngine(db)))
}
fn open_tree(self: &Arc<Self>, name: &'static str) -> Result<Arc<dyn Tree>> {
let mut options = rocksdb::Options::default();
options.set_merge_operator_associative("increment", utils::increment_rocksdb);
// Create if it doesn't exist
let _ = self.0.create_cf(name, &options);
Ok(Arc::new(RocksDbEngineTree {
name,
db: Arc::clone(self),
watchers: RwLock::new(BTreeMap::new()),
}))
}
}
#[cfg(feature = "rocksdb")]
impl RocksDbEngineTree<'_> {
fn cf(&self) -> rocksdb::BoundColumnFamily<'_> {
self.db.0.cf_handle(self.name).unwrap()
}
}
#[cfg(feature = "rocksdb")]
impl Tree for RocksDbEngineTree<'_> {
fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>> {
Ok(self.db.0.get_cf(self.cf(), key)?)
}
fn insert(&self, key: &[u8], value: &[u8]) -> Result<()> {
let watchers = self.watchers.read().unwrap();
let mut triggered = Vec::new();
for length in 0..=key.len() {
if watchers.contains_key(&key[..length]) {
triggered.push(&key[..length]);
}
}
drop(watchers);
if !triggered.is_empty() {
let mut watchers = self.watchers.write().unwrap();
for prefix in triggered {
if let Some(txs) = watchers.remove(prefix) {
for tx in txs {
let _ = tx.send(());
}
}
}
}
Ok(self.db.0.put_cf(self.cf(), key, value)?)
}
fn remove(&self, key: &[u8]) -> Result<()> {
Ok(self.db.0.delete_cf(self.cf(), key)?)
}
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = (Box<[u8]>, Box<[u8]>)> + Send + Sync + 'a> {
Box::new(
self.db
.0
.iterator_cf(self.cf(), rocksdb::IteratorMode::Start),
)
}
fn iter_from<'a>(
&'a self,
from: &[u8],
backwards: bool,
) -> Box<dyn Iterator<Item = (Box<[u8]>, Box<[u8]>)> + 'a> {
Box::new(self.db.0.iterator_cf(
self.cf(),
rocksdb::IteratorMode::From(
from,
if backwards {
rocksdb::Direction::Reverse
} else {
rocksdb::Direction::Forward
},
),
))
}
fn increment(&self, key: &[u8]) -> Result<Vec<u8>> {
let stats = rocksdb::perf::get_memory_usage_stats(Some(&[&self.db.0]), None).unwrap();
dbg!(stats.mem_table_total);
dbg!(stats.mem_table_unflushed);
dbg!(stats.mem_table_readers_total);
dbg!(stats.cache_total);
// TODO: atomic?
let old = self.get(key)?;
let new = utils::increment(old.as_deref()).unwrap();
self.insert(key, &new)?;
Ok(new)
}
fn scan_prefix<'a>(
&'a self,
prefix: Vec<u8>,
) -> Box<dyn Iterator<Item = (Box<[u8]>, Box<[u8]>)> + Send + 'a> {
Box::new(
self.db
.0
.iterator_cf(
self.cf(),
rocksdb::IteratorMode::From(&prefix, rocksdb::Direction::Forward),
)
.take_while(move |(k, _)| k.starts_with(&prefix)),
)
}
fn watch_prefix<'a>(&'a self, prefix: &[u8]) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>> {
let (tx, rx) = tokio::sync::oneshot::channel();
self.watchers
.write()
.unwrap()
.entry(prefix.to_vec())
.or_default()
.push(tx);
Box::pin(async move {
// Tx is never destroyed
rx.await.unwrap();
})
}
}

View file

@ -0,0 +1,176 @@
use super::super::Config;
use crate::{utils, Result};
use std::{future::Future, pin::Pin, sync::Arc};
use super::{DatabaseEngine, Tree};
use std::{collections::BTreeMap, sync::RwLock};
pub struct Engine(rocksdb::DBWithThreadMode<rocksdb::MultiThreaded>);
pub struct RocksDbEngineTree<'a> {
db: Arc<Engine>,
name: &'a str,
watchers: RwLock<BTreeMap<Vec<u8>, Vec<tokio::sync::oneshot::Sender<()>>>>,
}
impl DatabaseEngine for Engine {
fn open(config: &Config) -> Result<Arc<Self>> {
let mut db_opts = rocksdb::Options::default();
db_opts.create_if_missing(true);
db_opts.set_max_open_files(16);
db_opts.set_compaction_style(rocksdb::DBCompactionStyle::Level);
db_opts.set_compression_type(rocksdb::DBCompressionType::Snappy);
db_opts.set_target_file_size_base(256 << 20);
db_opts.set_write_buffer_size(256 << 20);
let mut block_based_options = rocksdb::BlockBasedOptions::default();
block_based_options.set_block_size(512 << 10);
db_opts.set_block_based_table_factory(&block_based_options);
let cfs = rocksdb::DBWithThreadMode::<rocksdb::MultiThreaded>::list_cf(
&db_opts,
&config.database_path,
)
.unwrap_or_default();
let mut options = rocksdb::Options::default();
options.set_merge_operator_associative("increment", utils::increment_rocksdb);
let db = rocksdb::DBWithThreadMode::<rocksdb::MultiThreaded>::open_cf_descriptors(
&db_opts,
&config.database_path,
cfs.iter()
.map(|name| rocksdb::ColumnFamilyDescriptor::new(name, options.clone())),
)?;
Ok(Arc::new(Engine(db)))
}
fn open_tree(self: &Arc<Self>, name: &'static str) -> Result<Arc<dyn Tree>> {
let mut options = rocksdb::Options::default();
options.set_merge_operator_associative("increment", utils::increment_rocksdb);
// Create if it doesn't exist
let _ = self.0.create_cf(name, &options);
Ok(Arc::new(RocksDbEngineTree {
name,
db: Arc::clone(self),
watchers: RwLock::new(BTreeMap::new()),
}))
}
}
impl RocksDbEngineTree<'_> {
fn cf(&self) -> rocksdb::BoundColumnFamily<'_> {
self.db.0.cf_handle(self.name).unwrap()
}
}
impl Tree for RocksDbEngineTree<'_> {
fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>> {
Ok(self.db.0.get_cf(self.cf(), key)?)
}
fn insert(&self, key: &[u8], value: &[u8]) -> Result<()> {
let watchers = self.watchers.read().unwrap();
let mut triggered = Vec::new();
for length in 0..=key.len() {
if watchers.contains_key(&key[..length]) {
triggered.push(&key[..length]);
}
}
drop(watchers);
if !triggered.is_empty() {
let mut watchers = self.watchers.write().unwrap();
for prefix in triggered {
if let Some(txs) = watchers.remove(prefix) {
for tx in txs {
let _ = tx.send(());
}
}
}
}
Ok(self.db.0.put_cf(self.cf(), key, value)?)
}
fn remove(&self, key: &[u8]) -> Result<()> {
Ok(self.db.0.delete_cf(self.cf(), key)?)
}
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + Send + Sync + 'a> {
Box::new(
self.db
.0
.iterator_cf(self.cf(), rocksdb::IteratorMode::Start),
)
}
fn iter_from<'a>(
&'a self,
from: &[u8],
backwards: bool,
) -> Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a> {
Box::new(self.db.0.iterator_cf(
self.cf(),
rocksdb::IteratorMode::From(
from,
if backwards {
rocksdb::Direction::Reverse
} else {
rocksdb::Direction::Forward
},
),
))
}
fn increment(&self, key: &[u8]) -> Result<Vec<u8>> {
let stats = rocksdb::perf::get_memory_usage_stats(Some(&[&self.db.0]), None).unwrap();
dbg!(stats.mem_table_total);
dbg!(stats.mem_table_unflushed);
dbg!(stats.mem_table_readers_total);
dbg!(stats.cache_total);
// TODO: atomic?
let old = self.get(key)?;
let new = utils::increment(old.as_deref()).unwrap();
self.insert(key, &new)?;
Ok(new)
}
fn scan_prefix<'a>(
&'a self,
prefix: Vec<u8>,
) -> Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + Send + 'a> {
Box::new(
self.db
.0
.iterator_cf(
self.cf(),
rocksdb::IteratorMode::From(&prefix, rocksdb::Direction::Forward),
)
.take_while(move |(k, _)| k.starts_with(&prefix)),
)
}
fn watch_prefix<'a>(&'a self, prefix: &[u8]) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>> {
let (tx, rx) = tokio::sync::oneshot::channel();
self.watchers
.write()
.unwrap()
.entry(prefix.to_vec())
.or_default()
.push(tx);
Box::pin(async move {
// Tx is never destroyed
rx.await.unwrap();
})
}
}

View file

@ -0,0 +1,119 @@
use super::super::Config;
use crate::{utils, Result};
use log::warn;
use std::{future::Future, pin::Pin, sync::Arc};
use super::{DatabaseEngine, Tree};
pub struct Engine(sled::Db);
pub struct SledEngineTree(sled::Tree);
impl DatabaseEngine for Engine {
fn open(config: &Config) -> Result<Arc<Self>> {
Ok(Arc::new(Engine(
sled::Config::default()
.path(&config.database_path)
.cache_capacity((config.db_cache_capacity_mb * 1024 * 1024) as u64)
.use_compression(true)
.open()?,
)))
}
fn open_tree(self: &Arc<Self>, name: &'static str) -> Result<Arc<dyn Tree>> {
Ok(Arc::new(SledEngineTree(self.0.open_tree(name)?)))
}
fn flush(self: &Arc<Self>) -> Result<()> {
Ok(()) // noop
}
}
impl Tree for SledEngineTree {
fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>> {
Ok(self.0.get(key)?.map(|v| v.to_vec()))
}
fn insert(&self, key: &[u8], value: &[u8]) -> Result<()> {
self.0.insert(key, value)?;
Ok(())
}
fn remove(&self, key: &[u8]) -> Result<()> {
self.0.remove(key)?;
Ok(())
}
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + Send + 'a> {
Box::new(
self.0
.iter()
.filter_map(|r| {
if let Err(e) = &r {
warn!("Error: {}", e);
}
r.ok()
})
.map(|(k, v)| (k.to_vec().into(), v.to_vec().into())),
)
}
fn iter_from(
&self,
from: &[u8],
backwards: bool,
) -> Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + Send> {
let iter = if backwards {
self.0.range(..from)
} else {
self.0.range(from..)
};
let iter = iter
.filter_map(|r| {
if let Err(e) = &r {
warn!("Error: {}", e);
}
r.ok()
})
.map(|(k, v)| (k.to_vec().into(), v.to_vec().into()));
if backwards {
Box::new(iter.rev())
} else {
Box::new(iter)
}
}
fn increment(&self, key: &[u8]) -> Result<Vec<u8>> {
Ok(self
.0
.update_and_fetch(key, utils::increment)
.map(|o| o.expect("increment always sets a value").to_vec())?)
}
fn scan_prefix<'a>(
&'a self,
prefix: Vec<u8>,
) -> Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + Send + 'a> {
let iter = self
.0
.scan_prefix(prefix)
.filter_map(|r| {
if let Err(e) = &r {
warn!("Error: {}", e);
}
r.ok()
})
.map(|(k, v)| (k.to_vec().into(), v.to_vec().into()));
Box::new(iter)
}
fn watch_prefix<'a>(&'a self, prefix: &[u8]) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>> {
let prefix = prefix.to_vec();
Box::pin(async move {
self.0.watch_prefix(prefix).await;
})
}
}

View file

@ -0,0 +1,444 @@
use std::{
collections::BTreeMap,
future::Future,
ops::Deref,
path::{Path, PathBuf},
pin::Pin,
sync::Arc,
thread,
time::{Duration, Instant},
};
use crate::{database::Config, Result};
use super::{DatabaseEngine, Tree};
use log::debug;
use crossbeam::channel::{bounded, Sender as ChannelSender};
use parking_lot::{Mutex, MutexGuard, RwLock};
use rusqlite::{params, Connection, DatabaseName::Main, OptionalExtension};
use tokio::sync::oneshot::Sender;
// const SQL_CREATE_TABLE: &str =
// "CREATE TABLE IF NOT EXISTS {} {{ \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL }}";
// const SQL_SELECT: &str = "SELECT value FROM {} WHERE key = ?";
// const SQL_INSERT: &str = "INSERT OR REPLACE INTO {} (key, value) VALUES (?, ?)";
// const SQL_DELETE: &str = "DELETE FROM {} WHERE key = ?";
// const SQL_SELECT_ITER: &str = "SELECT key, value FROM {}";
// const SQL_SELECT_PREFIX: &str = "SELECT key, value FROM {} WHERE key LIKE ?||'%' ORDER BY key ASC";
// const SQL_SELECT_ITER_FROM_FORWARDS: &str = "SELECT key, value FROM {} WHERE key >= ? ORDER BY ASC";
// const SQL_SELECT_ITER_FROM_BACKWARDS: &str =
// "SELECT key, value FROM {} WHERE key <= ? ORDER BY DESC";
struct Pool {
writer: Mutex<Connection>,
readers: Vec<Mutex<Connection>>,
spill_tracker: Arc<()>,
path: PathBuf,
}
pub const MILLI: Duration = Duration::from_millis(1);
enum HoldingConn<'a> {
FromGuard(MutexGuard<'a, Connection>),
FromOwned(Connection, Arc<()>),
}
impl<'a> Deref for HoldingConn<'a> {
type Target = Connection;
fn deref(&self) -> &Self::Target {
match self {
HoldingConn::FromGuard(guard) => guard.deref(),
HoldingConn::FromOwned(conn, _) => conn,
}
}
}
impl Pool {
fn new<P: AsRef<Path>>(path: P, num_readers: usize, total_cache_size_mb: f64) -> Result<Self> {
// calculates cache-size per permanent connection
// 1. convert MB to KiB
// 2. divide by permanent connections
// 3. round down to nearest integer
let cache_size: u32 = ((total_cache_size_mb * 1024.0) / (num_readers + 1) as f64) as u32;
let writer = Mutex::new(Self::prepare_conn(&path, Some(cache_size))?);
let mut readers = Vec::new();
for _ in 0..num_readers {
readers.push(Mutex::new(Self::prepare_conn(&path, Some(cache_size))?))
}
Ok(Self {
writer,
readers,
spill_tracker: Arc::new(()),
path: path.as_ref().to_path_buf(),
})
}
fn prepare_conn<P: AsRef<Path>>(path: P, cache_size: Option<u32>) -> Result<Connection> {
let conn = Connection::open(path)?;
conn.pragma_update(Some(Main), "journal_mode", &"WAL".to_owned())?;
// conn.pragma_update(Some(Main), "wal_autocheckpoint", &250)?;
// conn.pragma_update(Some(Main), "wal_checkpoint", &"FULL".to_owned())?;
conn.pragma_update(Some(Main), "synchronous", &"OFF".to_owned())?;
if let Some(cache_kib) = cache_size {
conn.pragma_update(Some(Main), "cache_size", &(-Into::<i64>::into(cache_kib)))?;
}
Ok(conn)
}
fn write_lock(&self) -> MutexGuard<'_, Connection> {
self.writer.lock()
}
fn read_lock(&self) -> HoldingConn<'_> {
for r in &self.readers {
if let Some(reader) = r.try_lock() {
return HoldingConn::FromGuard(reader);
}
}
let spill_arc = self.spill_tracker.clone();
let now_count = Arc::strong_count(&spill_arc) - 1 /* because one is held by the pool */;
log::warn!("read_lock: all readers locked, creating spillover reader...");
if now_count > 1 {
log::warn!("read_lock: now {} spillover readers exist", now_count);
}
let spilled = Self::prepare_conn(&self.path, None).unwrap();
return HoldingConn::FromOwned(spilled, spill_arc);
}
}
pub struct Engine {
pool: Pool,
}
impl DatabaseEngine for Engine {
fn open(config: &Config) -> Result<Arc<Self>> {
let pool = Pool::new(
Path::new(&config.database_path).join("conduit.db"),
config.sqlite_read_pool_size,
config.db_cache_capacity_mb,
)?;
pool.write_lock()
.execute("CREATE TABLE IF NOT EXISTS _noop (\"key\" INT)", params![])?;
let arc = Arc::new(Engine { pool });
Ok(arc)
}
fn open_tree(self: &Arc<Self>, name: &str) -> Result<Arc<dyn Tree>> {
self.pool.write_lock().execute(format!("CREATE TABLE IF NOT EXISTS {} ( \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL )", name).as_str(), [])?;
Ok(Arc::new(SqliteTable {
engine: Arc::clone(self),
name: name.to_owned(),
watchers: RwLock::new(BTreeMap::new()),
}))
}
fn flush(self: &Arc<Self>) -> Result<()> {
self.pool
.write_lock()
.execute_batch(
"
PRAGMA synchronous=FULL;
BEGIN;
DELETE FROM _noop;
INSERT INTO _noop VALUES (1);
COMMIT;
PRAGMA synchronous=OFF;
",
)
.map_err(Into::into)
}
}
impl Engine {
pub fn flush_wal(self: &Arc<Self>) -> Result<()> {
self.pool
.write_lock()
.execute_batch(
"
PRAGMA synchronous=FULL; PRAGMA wal_checkpoint=TRUNCATE;
BEGIN;
DELETE FROM _noop;
INSERT INTO _noop VALUES (1);
COMMIT;
PRAGMA wal_checkpoint=PASSIVE; PRAGMA synchronous=OFF;
",
)
.map_err(Into::into)
}
}
pub struct SqliteTable {
engine: Arc<Engine>,
name: String,
watchers: RwLock<BTreeMap<Vec<u8>, Vec<Sender<()>>>>,
}
type TupleOfBytes = (Vec<u8>, Vec<u8>);
impl SqliteTable {
fn get_with_guard(&self, guard: &Connection, key: &[u8]) -> Result<Option<Vec<u8>>> {
Ok(guard
.prepare(format!("SELECT value FROM {} WHERE key = ?", self.name).as_str())?
.query_row([key], |row| row.get(0))
.optional()?)
}
fn insert_with_guard(&self, guard: &Connection, key: &[u8], value: &[u8]) -> Result<()> {
guard.execute(
format!(
"INSERT INTO {} (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value",
self.name
)
.as_str(),
[key, value],
)?;
Ok(())
}
fn _iter_from_thread<F>(&self, f: F) -> Box<dyn Iterator<Item = TupleOfBytes> + Send>
where
F: (for<'a> FnOnce(&'a Connection, ChannelSender<TupleOfBytes>)) + Send + 'static,
{
let (s, r) = bounded::<TupleOfBytes>(5);
let engine = self.engine.clone();
thread::spawn(move || {
let _ = f(&engine.pool.read_lock(), s);
});
Box::new(r.into_iter())
}
}
macro_rules! iter_from_thread {
($self:expr, $sql:expr, $param:expr) => {
$self._iter_from_thread(move |guard, s| {
let _ = guard
.prepare($sql)
.unwrap()
.query_map($param, |row| Ok((row.get_unwrap(0), row.get_unwrap(1))))
.unwrap()
.map(|r| r.unwrap())
.try_for_each(|bob| s.send(bob));
})
};
}
impl Tree for SqliteTable {
fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>> {
let guard = self.engine.pool.read_lock();
// let start = Instant::now();
let val = self.get_with_guard(&guard, key);
// debug!("get: took {:?}", start.elapsed());
// debug!("get key: {:?}", &key)
val
}
fn insert(&self, key: &[u8], value: &[u8]) -> Result<()> {
let guard = self.engine.pool.write_lock();
let start = Instant::now();
self.insert_with_guard(&guard, key, value)?;
let elapsed = start.elapsed();
if elapsed > MILLI {
debug!("insert: took {:012?} : {}", elapsed, &self.name);
}
drop(guard);
let watchers = self.watchers.read();
let mut triggered = Vec::new();
for length in 0..=key.len() {
if watchers.contains_key(&key[..length]) {
triggered.push(&key[..length]);
}
}
drop(watchers);
if !triggered.is_empty() {
let mut watchers = self.watchers.write();
for prefix in triggered {
if let Some(txs) = watchers.remove(prefix) {
for tx in txs {
let _ = tx.send(());
}
}
}
};
Ok(())
}
fn remove(&self, key: &[u8]) -> Result<()> {
let guard = self.engine.pool.write_lock();
let start = Instant::now();
guard.execute(
format!("DELETE FROM {} WHERE key = ?", self.name).as_str(),
[key],
)?;
let elapsed = start.elapsed();
if elapsed > MILLI {
debug!("remove: took {:012?} : {}", elapsed, &self.name);
}
// debug!("remove key: {:?}", &key);
Ok(())
}
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = TupleOfBytes> + Send + 'a> {
let name = self.name.clone();
iter_from_thread!(
self,
format!("SELECT key, value FROM {}", name).as_str(),
params![]
)
}
fn iter_from<'a>(
&'a self,
from: &[u8],
backwards: bool,
) -> Box<dyn Iterator<Item = TupleOfBytes> + Send + 'a> {
let name = self.name.clone();
let from = from.to_vec(); // TODO change interface?
if backwards {
iter_from_thread!(
self,
format!(
"SELECT key, value FROM {} WHERE key <= ? ORDER BY key DESC",
name
)
.as_str(),
[from]
)
} else {
iter_from_thread!(
self,
format!(
"SELECT key, value FROM {} WHERE key >= ? ORDER BY key ASC",
name
)
.as_str(),
[from]
)
}
}
fn increment(&self, key: &[u8]) -> Result<Vec<u8>> {
let guard = self.engine.pool.write_lock();
let start = Instant::now();
let old = self.get_with_guard(&guard, key)?;
let new =
crate::utils::increment(old.as_deref()).expect("utils::increment always returns Some");
self.insert_with_guard(&guard, key, &new)?;
let elapsed = start.elapsed();
if elapsed > MILLI {
debug!("increment: took {:012?} : {}", elapsed, &self.name);
}
// debug!("increment key: {:?}", &key);
Ok(new)
}
fn scan_prefix<'a>(
&'a self,
prefix: Vec<u8>,
) -> Box<dyn Iterator<Item = TupleOfBytes> + Send + 'a> {
// let name = self.name.clone();
// iter_from_thread!(
// self,
// format!(
// "SELECT key, value FROM {} WHERE key BETWEEN ?1 AND ?1 || X'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF' ORDER BY key ASC",
// name
// )
// .as_str(),
// [prefix]
// )
Box::new(
self.iter_from(&prefix, false)
.take_while(move |(key, _)| key.starts_with(&prefix)),
)
}
fn watch_prefix<'a>(&'a self, prefix: &[u8]) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>> {
let (tx, rx) = tokio::sync::oneshot::channel();
self.watchers
.write()
.entry(prefix.to_vec())
.or_default()
.push(tx);
Box::pin(async move {
// Tx is never destroyed
rx.await.unwrap();
})
}
fn clear(&self) -> Result<()> {
debug!("clear: running");
self.engine
.pool
.write_lock()
.execute(format!("DELETE FROM {}", self.name).as_str(), [])?;
debug!("clear: ran");
Ok(())
}
}
// TODO
// struct Pool<const NUM_READERS: usize> {
// writer: Mutex<Connection>,
// readers: [Mutex<Connection>; NUM_READERS],
// }
// // then, to pick a reader:
// for r in &pool.readers {
// if let Ok(reader) = r.try_lock() {
// // use reader
// }
// }
// // none unlocked, pick the next reader
// pool.readers[pool.counter.fetch_add(1, Relaxed) % NUM_READERS].lock()

View file

@ -127,7 +127,7 @@ impl AccountData {
room_id: Option<&RoomId>,
user_id: &UserId,
kind: &EventType,
) -> Result<Option<(Box<[u8]>, Box<[u8]>)>> {
) -> Result<Option<(Vec<u8>, Vec<u8>)>> {
let mut prefix = room_id
.map(|r| r.to_string())
.unwrap_or_default()

View file

@ -10,6 +10,7 @@ use ruma::{
events::{room::message, EventType},
UserId,
};
use tokio::sync::{RwLock, RwLockReadGuard};
pub enum AdminCommand {
RegisterAppservice(serde_yaml::Value),
@ -25,20 +26,23 @@ pub struct Admin {
impl Admin {
pub fn start_handler(
&self,
db: Arc<Database>,
db: Arc<RwLock<Database>>,
mut receiver: mpsc::UnboundedReceiver<AdminCommand>,
) {
tokio::spawn(async move {
// TODO: Use futures when we have long admin commands
//let mut futures = FuturesUnordered::new();
let conduit_user = UserId::try_from(format!("@conduit:{}", db.globals.server_name()))
.expect("@conduit:server_name is valid");
let guard = db.read().await;
let conduit_room = db
let conduit_user =
UserId::try_from(format!("@conduit:{}", guard.globals.server_name()))
.expect("@conduit:server_name is valid");
let conduit_room = guard
.rooms
.id_from_alias(
&format!("#admins:{}", db.globals.server_name())
&format!("#admins:{}", guard.globals.server_name())
.try_into()
.expect("#admins:server_name is a valid room alias"),
)
@ -48,48 +52,54 @@ impl Admin {
warn!("Conduit instance does not have an #admins room. Logging to that room will not work. Restart Conduit after creating a user to fix this.");
}
let send_message = |message: message::MessageEventContent| {
if let Some(conduit_room) = &conduit_room {
db.rooms
.build_and_append_pdu(
PduBuilder {
event_type: EventType::RoomMessage,
content: serde_json::to_value(message)
.expect("event is valid, we just created it"),
unsigned: None,
state_key: None,
redacts: None,
},
&conduit_user,
&conduit_room,
&db,
)
.unwrap();
}
};
drop(guard);
let send_message =
|message: message::MessageEventContent, guard: RwLockReadGuard<'_, Database>| {
if let Some(conduit_room) = &conduit_room {
guard
.rooms
.build_and_append_pdu(
PduBuilder {
event_type: EventType::RoomMessage,
content: serde_json::to_value(message)
.expect("event is valid, we just created it"),
unsigned: None,
state_key: None,
redacts: None,
},
&conduit_user,
&conduit_room,
&guard,
)
.unwrap();
}
};
loop {
tokio::select! {
Some(event) = receiver.next() => {
let guard = db.read().await;
match event {
AdminCommand::RegisterAppservice(yaml) => {
db.appservice.register_appservice(yaml).unwrap(); // TODO handle error
guard.appservice.register_appservice(yaml).unwrap(); // TODO handle error
}
AdminCommand::ListAppservices => {
if let Ok(appservices) = db.appservice.iter_ids().map(|ids| ids.collect::<Vec<_>>()) {
if let Ok(appservices) = guard.appservice.iter_ids().map(|ids| ids.collect::<Vec<_>>()) {
let count = appservices.len();
let output = format!(
"Appservices ({}): {}",
count,
appservices.into_iter().filter_map(|r| r.ok()).collect::<Vec<_>>().join(", ")
);
send_message(message::MessageEventContent::text_plain(output));
send_message(message::MessageEventContent::text_plain(output), guard);
} else {
send_message(message::MessageEventContent::text_plain("Failed to get appservices."));
send_message(message::MessageEventContent::text_plain("Failed to get appservices."), guard);
}
}
AdminCommand::SendMessage(message) => {
send_message(message);
send_message(message, guard)
}
}
}

View file

@ -49,7 +49,7 @@ impl Appservice {
)
}
pub fn iter_ids(&self) -> Result<impl Iterator<Item = Result<String>> + Send + Sync + '_> {
pub fn iter_ids(&self) -> Result<impl Iterator<Item = Result<String>> + Send + '_> {
Ok(self.id_appserviceregistrations.iter().map(|(id, _)| {
utils::string_from_bytes(&id)
.map_err(|_| Error::bad_database("Invalid id bytes in id_appserviceregistrations."))
@ -58,7 +58,7 @@ impl Appservice {
pub fn iter_all(
&self,
) -> Result<impl Iterator<Item = Result<(String, serde_yaml::Value)>> + '_ + Send + Sync> {
) -> Result<impl Iterator<Item = Result<(String, serde_yaml::Value)>> + '_ + Send> {
Ok(self.iter_ids()?.filter_map(|id| id.ok()).map(move |id| {
Ok((
id.clone(),

View file

@ -11,11 +11,12 @@ use rustls::{ServerCertVerifier, WebPKIVerifier};
use std::{
collections::{BTreeMap, HashMap},
fs,
future::Future,
path::PathBuf,
sync::{Arc, RwLock},
time::{Duration, Instant},
};
use tokio::sync::Semaphore;
use tokio::sync::{broadcast, Semaphore};
use trust_dns_resolver::TokioAsyncResolver;
use super::abstraction::Tree;
@ -47,6 +48,7 @@ pub struct Globals {
), // since, rx
>,
>,
pub rotate: RotationHandler,
}
struct MatrixServerVerifier {
@ -82,6 +84,31 @@ impl ServerCertVerifier for MatrixServerVerifier {
}
}
/// Handles "rotation" of long-polling requests. "Rotation" in this context is similar to "rotation" of log files and the like.
///
/// This is utilized to have sync workers return early and release read locks on the database.
pub struct RotationHandler(broadcast::Sender<()>, broadcast::Receiver<()>);
impl RotationHandler {
pub fn new() -> Self {
let (s, r) = broadcast::channel::<()>(1);
Self(s, r)
}
pub fn watch(&self) -> impl Future<Output = ()> {
let mut r = self.0.subscribe();
async move {
let _ = r.recv().await;
}
}
pub fn fire(&self) {
let _ = self.0.send(());
}
}
impl Globals {
pub fn load(
globals: Arc<dyn Tree>,
@ -168,6 +195,7 @@ impl Globals {
bad_signature_ratelimiter: Arc::new(RwLock::new(BTreeMap::new())),
servername_ratelimiter: Arc::new(RwLock::new(BTreeMap::new())),
sync_receivers: RwLock::new(BTreeMap::new()),
rotate: RotationHandler::new(),
};
fs::create_dir_all(s.get_media_folder())?;

View file

@ -73,7 +73,7 @@ impl PushData {
pub fn get_pusher_senderkeys<'a>(
&'a self,
sender: &UserId,
) -> impl Iterator<Item = Box<[u8]>> + 'a {
) -> impl Iterator<Item = Vec<u8>> + 'a {
let mut prefix = sender.as_bytes().to_vec();
prefix.push(0xff);

View file

@ -1078,13 +1078,13 @@ impl Rooms {
.scan_prefix(old_shortstatehash.clone())
// Chop the old_shortstatehash out leaving behind the short state key
.map(|(k, v)| (k[old_shortstatehash.len()..].to_vec(), v))
.collect::<HashMap<Vec<u8>, Box<[u8]>>>()
.collect::<HashMap<Vec<u8>, Vec<u8>>>()
} else {
HashMap::new()
};
if let Some(state_key) = &new_pdu.state_key {
let mut new_state: HashMap<Vec<u8>, Box<[u8]>> = old_state;
let mut new_state: HashMap<Vec<u8>, Vec<u8>> = old_state;
let mut new_state_key = new_pdu.kind.as_ref().as_bytes().to_vec();
new_state_key.push(0xff);
@ -1450,7 +1450,7 @@ impl Rooms {
&'a self,
user_id: &UserId,
room_id: &RoomId,
) -> impl Iterator<Item = Result<(Box<[u8]>, PduEvent)>> + 'a {
) -> impl Iterator<Item = Result<(Vec<u8>, PduEvent)>> + 'a {
self.pdus_since(user_id, room_id, 0)
}
@ -1462,7 +1462,7 @@ impl Rooms {
user_id: &UserId,
room_id: &RoomId,
since: u64,
) -> impl Iterator<Item = Result<(Box<[u8]>, PduEvent)>> + 'a {
) -> impl Iterator<Item = Result<(Vec<u8>, PduEvent)>> + 'a {
let mut prefix = room_id.as_bytes().to_vec();
prefix.push(0xff);
@ -1491,7 +1491,7 @@ impl Rooms {
user_id: &UserId,
room_id: &RoomId,
until: u64,
) -> impl Iterator<Item = Result<(Box<[u8]>, PduEvent)>> + 'a {
) -> impl Iterator<Item = Result<(Vec<u8>, PduEvent)>> + 'a {
// Create the first part of the full pdu id
let mut prefix = room_id.as_bytes().to_vec();
prefix.push(0xff);
@ -1523,7 +1523,7 @@ impl Rooms {
user_id: &UserId,
room_id: &RoomId,
from: u64,
) -> impl Iterator<Item = Result<(Box<[u8]>, PduEvent)>> + 'a {
) -> impl Iterator<Item = Result<(Vec<u8>, PduEvent)>> + 'a {
// Create the first part of the full pdu id
let mut prefix = room_id.as_bytes().to_vec();
prefix.push(0xff);

View file

@ -30,7 +30,10 @@ use ruma::{
receipt::ReceiptType,
MilliSecondsSinceUnixEpoch, ServerName, UInt, UserId,
};
use tokio::{select, sync::Semaphore};
use tokio::{
select,
sync::{RwLock, Semaphore},
};
use super::abstraction::Tree;
@ -90,7 +93,11 @@ enum TransactionStatus {
}
impl Sending {
pub fn start_handler(&self, db: Arc<Database>, mut receiver: mpsc::UnboundedReceiver<Vec<u8>>) {
pub fn start_handler(
&self,
db: Arc<RwLock<Database>>,
mut receiver: mpsc::UnboundedReceiver<Vec<u8>>,
) {
tokio::spawn(async move {
let mut futures = FuturesUnordered::new();
@ -98,8 +105,12 @@ impl Sending {
// Retry requests we could not finish yet
let mut initial_transactions = HashMap::<OutgoingKind, Vec<SendingEventType>>::new();
let guard = db.read().await;
for (key, outgoing_kind, event) in
db.sending
guard
.sending
.servercurrentevents
.iter()
.filter_map(|(key, _)| {
@ -117,17 +128,23 @@ impl Sending {
"Dropping some current events: {:?} {:?} {:?}",
key, outgoing_kind, event
);
db.sending.servercurrentevents.remove(&key).unwrap();
guard.sending.servercurrentevents.remove(&key).unwrap();
continue;
}
entry.push(event);
}
drop(guard);
for (outgoing_kind, events) in initial_transactions {
current_transaction_status
.insert(outgoing_kind.get_prefix(), TransactionStatus::Running);
futures.push(Self::handle_events(outgoing_kind.clone(), events, &db));
futures.push(Self::handle_events(
outgoing_kind.clone(),
events,
Arc::clone(&db),
));
}
loop {
@ -135,15 +152,17 @@ impl Sending {
Some(response) = futures.next() => {
match response {
Ok(outgoing_kind) => {
let guard = db.read().await;
let prefix = outgoing_kind.get_prefix();
for (key, _) in db.sending.servercurrentevents
for (key, _) in guard.sending.servercurrentevents
.scan_prefix(prefix.clone())
{
db.sending.servercurrentevents.remove(&key).unwrap();
guard.sending.servercurrentevents.remove(&key).unwrap();
}
// Find events that have been added since starting the last request
let new_events = db.sending.servernamepduids
let new_events = guard.sending.servernamepduids
.scan_prefix(prefix.clone())
.map(|(k, _)| {
SendingEventType::Pdu(k[prefix.len()..].to_vec())
@ -161,17 +180,19 @@ impl Sending {
SendingEventType::Pdu(b) |
SendingEventType::Edu(b) => {
current_key.extend_from_slice(&b);
db.sending.servercurrentevents.insert(&current_key, &[]).unwrap();
db.sending.servernamepduids.remove(&current_key).unwrap();
guard.sending.servercurrentevents.insert(&current_key, &[]).unwrap();
guard.sending.servernamepduids.remove(&current_key).unwrap();
}
}
}
drop(guard);
futures.push(
Self::handle_events(
outgoing_kind.clone(),
new_events,
&db,
Arc::clone(&db),
)
);
} else {
@ -192,13 +213,15 @@ impl Sending {
},
Some(key) = receiver.next() => {
if let Ok((outgoing_kind, event)) = Self::parse_servercurrentevent(&key) {
let guard = db.read().await;
if let Ok(Some(events)) = Self::select_events(
&outgoing_kind,
vec![(event, key)],
&mut current_transaction_status,
&db
&guard
) {
futures.push(Self::handle_events(outgoing_kind, events, &db));
futures.push(Self::handle_events(outgoing_kind, events, Arc::clone(&db)));
}
}
}
@ -357,7 +380,7 @@ impl Sending {
}
#[tracing::instrument(skip(self))]
pub fn send_push_pdu(&self, pdu_id: &[u8], senderkey: Box<[u8]>) -> Result<()> {
pub fn send_push_pdu(&self, pdu_id: &[u8], senderkey: Vec<u8>) -> Result<()> {
let mut key = b"$".to_vec();
key.extend_from_slice(&senderkey);
key.push(0xff);
@ -403,8 +426,10 @@ impl Sending {
async fn handle_events(
kind: OutgoingKind,
events: Vec<SendingEventType>,
db: &Database,
db: Arc<RwLock<Database>>,
) -> std::result::Result<OutgoingKind, (OutgoingKind, Error)> {
let db = db.read().await;
match &kind {
OutgoingKind::Appservice(server) => {
let mut pdu_jsons = Vec::new();
@ -543,7 +568,7 @@ impl Sending {
&pusher,
rules_for_user,
&pdu,
db,
&db,
)
.await
.map(|_response| kind.clone())