refactor database engine/options; add column descriptors
Signed-off-by: Jason Volk <jason@zemos.net>
This commit is contained in:
parent
02f19cf951
commit
6a0f9add0c
21 changed files with 1358 additions and 965 deletions
73
src/database/engine/backup.rs
Normal file
73
src/database/engine/backup.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
use std::fmt::Write;
|
||||
|
||||
use conduwuit::{error, implement, info, utils::time::rfc2822_from_seconds, warn, Result};
|
||||
use rocksdb::backup::{BackupEngine, BackupEngineOptions};
|
||||
|
||||
use super::Engine;
|
||||
use crate::{or_else, util::map_err};
|
||||
|
||||
#[implement(Engine)]
|
||||
#[tracing::instrument(skip(self))]
|
||||
pub fn backup(&self) -> Result {
|
||||
let server = &self.ctx.server;
|
||||
let config = &server.config;
|
||||
let path = config.database_backup_path.as_ref();
|
||||
if path.is_none() || path.is_some_and(|path| path.as_os_str().is_empty()) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let options =
|
||||
BackupEngineOptions::new(path.expect("valid database backup path")).map_err(map_err)?;
|
||||
let mut engine = BackupEngine::open(&options, &*self.ctx.env.lock()?).map_err(map_err)?;
|
||||
if config.database_backups_to_keep > 0 {
|
||||
let flush = !self.is_read_only();
|
||||
engine
|
||||
.create_new_backup_flush(&self.db, flush)
|
||||
.map_err(map_err)?;
|
||||
|
||||
let engine_info = engine.get_backup_info();
|
||||
let info = &engine_info.last().expect("backup engine info is not empty");
|
||||
info!(
|
||||
"Created database backup #{} using {} bytes in {} files",
|
||||
info.backup_id, info.size, info.num_files,
|
||||
);
|
||||
}
|
||||
|
||||
if config.database_backups_to_keep >= 0 {
|
||||
let keep = u32::try_from(config.database_backups_to_keep)?;
|
||||
if let Err(e) = engine.purge_old_backups(keep.try_into()?) {
|
||||
error!("Failed to purge old backup: {e:?}");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[implement(Engine)]
|
||||
pub fn backup_list(&self) -> Result<String> {
|
||||
let server = &self.ctx.server;
|
||||
let config = &server.config;
|
||||
let path = config.database_backup_path.as_ref();
|
||||
if path.is_none() || path.is_some_and(|path| path.as_os_str().is_empty()) {
|
||||
return Ok("Configure database_backup_path to enable backups, or the path specified is \
|
||||
not valid"
|
||||
.to_owned());
|
||||
}
|
||||
|
||||
let mut res = String::new();
|
||||
let options =
|
||||
BackupEngineOptions::new(path.expect("valid database backup path")).or_else(or_else)?;
|
||||
let engine = BackupEngine::open(&options, &*self.ctx.env.lock()?).or_else(or_else)?;
|
||||
for info in engine.get_backup_info() {
|
||||
writeln!(
|
||||
res,
|
||||
"#{} {}: {} bytes, {} files",
|
||||
info.backup_id,
|
||||
rfc2822_from_seconds(info.timestamp),
|
||||
info.size,
|
||||
info.num_files,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
214
src/database/engine/cf_opts.rs
Normal file
214
src/database/engine/cf_opts.rs
Normal file
|
@ -0,0 +1,214 @@
|
|||
use conduwuit::{
|
||||
err,
|
||||
utils::{math::Expected, BoolExt},
|
||||
Config, Result,
|
||||
};
|
||||
use rocksdb::{
|
||||
BlockBasedIndexType, BlockBasedOptions, BlockBasedPinningTier, Cache,
|
||||
DBCompressionType as CompressionType, DataBlockIndexType, LruCacheOptions, Options,
|
||||
UniversalCompactOptions, UniversalCompactionStopStyle,
|
||||
};
|
||||
|
||||
use super::descriptor::{CacheDisp, Descriptor};
|
||||
use crate::Context;
|
||||
|
||||
/// Adjust options for the specific column by name. Provide the result of
|
||||
/// db_options() as the argument to this function and use the return value in
|
||||
/// the arguments to open the specific column.
|
||||
pub(crate) fn cf_options(ctx: &Context, opts: Options, desc: &Descriptor) -> Result<Options> {
|
||||
let cache = get_cache(ctx, desc);
|
||||
let config = &ctx.server.config;
|
||||
descriptor_cf_options(opts, desc.clone(), config, cache.as_ref())
|
||||
}
|
||||
|
||||
fn descriptor_cf_options(
|
||||
mut opts: Options,
|
||||
mut desc: Descriptor,
|
||||
config: &Config,
|
||||
cache: Option<&Cache>,
|
||||
) -> Result<Options> {
|
||||
set_compression(&mut desc, config);
|
||||
set_table_options(&mut opts, &desc, cache);
|
||||
|
||||
opts.set_min_write_buffer_number(1);
|
||||
opts.set_max_write_buffer_number(2);
|
||||
if let Some(write_size) = desc.write_size {
|
||||
opts.set_write_buffer_size(write_size);
|
||||
}
|
||||
|
||||
opts.set_target_file_size_base(desc.file_size);
|
||||
opts.set_target_file_size_multiplier(desc.file_shape[0]);
|
||||
|
||||
opts.set_level_zero_file_num_compaction_trigger(desc.level0_width);
|
||||
opts.set_level_compaction_dynamic_level_bytes(false);
|
||||
opts.set_ttl(desc.ttl);
|
||||
|
||||
opts.set_max_bytes_for_level_base(desc.level_size);
|
||||
opts.set_max_bytes_for_level_multiplier(1.0);
|
||||
opts.set_max_bytes_for_level_multiplier_additional(&desc.level_shape);
|
||||
|
||||
opts.set_compaction_style(desc.compaction);
|
||||
opts.set_compaction_pri(desc.compaction_pri);
|
||||
opts.set_universal_compaction_options(&uc_options(&desc));
|
||||
|
||||
opts.set_compression_type(desc.compression);
|
||||
opts.set_compression_options(-14, desc.compression_level, 0, 0); // -14 w_bits used by zlib.
|
||||
if let Some(&bottommost_level) = desc.bottommost_level.as_ref() {
|
||||
opts.set_bottommost_compression_type(desc.compression);
|
||||
opts.set_bottommost_zstd_max_train_bytes(0, true);
|
||||
opts.set_bottommost_compression_options(
|
||||
-14, // -14 w_bits is only read by zlib.
|
||||
bottommost_level,
|
||||
0,
|
||||
0,
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(opts)
|
||||
}
|
||||
|
||||
fn set_table_options(opts: &mut Options, desc: &Descriptor, cache: Option<&Cache>) {
|
||||
let mut table = table_options(desc);
|
||||
if let Some(cache) = cache {
|
||||
table.set_block_cache(cache);
|
||||
} else {
|
||||
table.disable_cache();
|
||||
}
|
||||
|
||||
opts.set_block_based_table_factory(&table);
|
||||
}
|
||||
|
||||
fn set_compression(desc: &mut Descriptor, config: &Config) {
|
||||
desc.compression = match config.rocksdb_compression_algo.as_ref() {
|
||||
| "snappy" => CompressionType::Snappy,
|
||||
| "zlib" => CompressionType::Zlib,
|
||||
| "bz2" => CompressionType::Bz2,
|
||||
| "lz4" => CompressionType::Lz4,
|
||||
| "lz4hc" => CompressionType::Lz4hc,
|
||||
| "none" => CompressionType::None,
|
||||
| _ => CompressionType::Zstd,
|
||||
};
|
||||
|
||||
desc.compression_level = config.rocksdb_compression_level;
|
||||
desc.bottommost_level = config
|
||||
.rocksdb_bottommost_compression
|
||||
.then_some(config.rocksdb_bottommost_compression_level);
|
||||
}
|
||||
|
||||
fn uc_options(desc: &Descriptor) -> UniversalCompactOptions {
|
||||
let mut opts = UniversalCompactOptions::default();
|
||||
opts.set_stop_style(UniversalCompactionStopStyle::Total);
|
||||
opts.set_min_merge_width(desc.merge_width.0);
|
||||
opts.set_max_merge_width(desc.merge_width.1);
|
||||
opts.set_max_size_amplification_percent(10000);
|
||||
opts.set_compression_size_percent(-1);
|
||||
opts.set_size_ratio(1);
|
||||
|
||||
opts
|
||||
}
|
||||
|
||||
fn table_options(desc: &Descriptor) -> BlockBasedOptions {
|
||||
let mut opts = BlockBasedOptions::default();
|
||||
|
||||
opts.set_block_size(desc.block_size);
|
||||
opts.set_metadata_block_size(desc.index_size);
|
||||
|
||||
opts.set_cache_index_and_filter_blocks(true);
|
||||
opts.set_pin_top_level_index_and_filter(false);
|
||||
opts.set_pin_l0_filter_and_index_blocks_in_cache(false);
|
||||
opts.set_partition_pinning_tier(BlockBasedPinningTier::None);
|
||||
opts.set_unpartitioned_pinning_tier(BlockBasedPinningTier::None);
|
||||
opts.set_top_level_index_pinning_tier(BlockBasedPinningTier::None);
|
||||
|
||||
opts.set_use_delta_encoding(false);
|
||||
opts.set_index_type(BlockBasedIndexType::TwoLevelIndexSearch);
|
||||
opts.set_data_block_index_type(
|
||||
desc.block_index_hashing
|
||||
.map_or(DataBlockIndexType::BinarySearch, || DataBlockIndexType::BinaryAndHash),
|
||||
);
|
||||
|
||||
opts
|
||||
}
|
||||
|
||||
fn get_cache(ctx: &Context, desc: &Descriptor) -> Option<Cache> {
|
||||
let config = &ctx.server.config;
|
||||
|
||||
// Some cache capacities are overriden by server config in a strange but
|
||||
// legacy-compat way
|
||||
let cap = match desc.name {
|
||||
| "eventid_pduid" => Some(config.eventid_pdu_cache_capacity),
|
||||
| "eventid_shorteventid" => Some(config.eventidshort_cache_capacity),
|
||||
| "shorteventid_eventid" => Some(config.shorteventid_cache_capacity),
|
||||
| "shorteventid_authchain" => Some(config.auth_chain_cache_capacity),
|
||||
| "shortstatekey_statekey" => Some(config.shortstatekey_cache_capacity),
|
||||
| "statekey_shortstatekey" => Some(config.statekeyshort_cache_capacity),
|
||||
| "servernameevent_data" => Some(config.servernameevent_data_cache_capacity),
|
||||
| "pduid_pdu" | "eventid_outlierpdu" => Some(config.pdu_cache_capacity),
|
||||
| _ => None,
|
||||
}
|
||||
.map(TryInto::try_into)
|
||||
.transpose()
|
||||
.expect("u32 to usize");
|
||||
|
||||
let ent_size: usize = desc
|
||||
.key_size_hint
|
||||
.unwrap_or_default()
|
||||
.expected_add(desc.val_size_hint.unwrap_or_default());
|
||||
|
||||
let size = match cap {
|
||||
| Some(cap) => cache_size(config, cap, ent_size),
|
||||
| _ => desc.cache_size,
|
||||
};
|
||||
|
||||
let shard_bits: i32 = desc
|
||||
.cache_shards
|
||||
.ilog2()
|
||||
.try_into()
|
||||
.expect("u32 to i32 conversion");
|
||||
|
||||
debug_assert!(shard_bits <= 6, "cache shards limited to 64");
|
||||
let mut cache_opts = LruCacheOptions::default();
|
||||
cache_opts.set_num_shard_bits(shard_bits);
|
||||
cache_opts.set_capacity(size);
|
||||
|
||||
let mut caches = ctx.col_cache.lock().expect("locked");
|
||||
match desc.cache_disp {
|
||||
| CacheDisp::Unique if desc.cache_size == 0 => None,
|
||||
| CacheDisp::Unique => {
|
||||
let cache = Cache::new_lru_cache_opts(&cache_opts);
|
||||
caches.insert(desc.name.into(), cache.clone());
|
||||
Some(cache)
|
||||
},
|
||||
|
||||
| CacheDisp::SharedWith(other) if !caches.contains_key(other) => {
|
||||
let cache = Cache::new_lru_cache_opts(&cache_opts);
|
||||
caches.insert(desc.name.into(), cache.clone());
|
||||
Some(cache)
|
||||
},
|
||||
|
||||
| CacheDisp::SharedWith(other) => Some(
|
||||
caches
|
||||
.get(other)
|
||||
.cloned()
|
||||
.expect("caches.contains_key(other) must be true"),
|
||||
),
|
||||
|
||||
| CacheDisp::Shared => Some(
|
||||
caches
|
||||
.get("Shared")
|
||||
.cloned()
|
||||
.expect("shared cache must already exist"),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::as_conversions, clippy::cast_sign_loss, clippy::cast_possible_truncation)]
|
||||
pub(crate) fn cache_size(config: &Config, base_size: u32, entity_size: usize) -> usize {
|
||||
let ents = f64::from(base_size) * config.cache_capacity_modifier;
|
||||
|
||||
(ents as usize)
|
||||
.checked_mul(entity_size)
|
||||
.ok_or_else(|| err!(Config("cache_capacity_modifier", "Cache size is too large.")))
|
||||
.expect("invalid cache size")
|
||||
}
|
73
src/database/engine/context.rs
Normal file
73
src/database/engine/context.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
use std::{
|
||||
collections::BTreeMap,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use conduwuit::{debug, utils::math::usize_from_f64, Result, Server};
|
||||
use rocksdb::{Cache, Env};
|
||||
|
||||
use crate::{or_else, pool::Pool};
|
||||
|
||||
/// Some components are constructed prior to opening the database and must
|
||||
/// outlive the database. These can also be shared between database instances
|
||||
/// though at the time of this comment we only open one database per process.
|
||||
/// These assets are housed in the shared Context.
|
||||
pub(crate) struct Context {
|
||||
pub(crate) pool: Arc<Pool>,
|
||||
pub(crate) col_cache: Mutex<BTreeMap<String, Cache>>,
|
||||
pub(crate) row_cache: Mutex<Cache>,
|
||||
pub(crate) env: Mutex<Env>,
|
||||
pub(crate) server: Arc<Server>,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub(crate) fn new(server: &Arc<Server>) -> Result<Arc<Self>> {
|
||||
let config = &server.config;
|
||||
let cache_capacity_bytes = config.db_cache_capacity_mb * 1024.0 * 1024.0;
|
||||
|
||||
let row_cache_capacity_bytes = usize_from_f64(cache_capacity_bytes * 0.50)?;
|
||||
let row_cache = Cache::new_lru_cache(row_cache_capacity_bytes);
|
||||
|
||||
let col_cache_capacity_bytes = usize_from_f64(cache_capacity_bytes * 0.50)?;
|
||||
let col_cache = Cache::new_lru_cache(col_cache_capacity_bytes);
|
||||
|
||||
let col_cache: BTreeMap<_, _> = [("Shared".to_owned(), col_cache)].into();
|
||||
|
||||
let mut env = Env::new().or_else(or_else)?;
|
||||
|
||||
if config.rocksdb_compaction_prio_idle {
|
||||
env.lower_thread_pool_cpu_priority();
|
||||
}
|
||||
|
||||
if config.rocksdb_compaction_ioprio_idle {
|
||||
env.lower_thread_pool_io_priority();
|
||||
}
|
||||
|
||||
Ok(Arc::new(Self {
|
||||
pool: Pool::new(server)?,
|
||||
col_cache: col_cache.into(),
|
||||
row_cache: row_cache.into(),
|
||||
env: env.into(),
|
||||
server: server.clone(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Context {
|
||||
#[cold]
|
||||
fn drop(&mut self) {
|
||||
debug!("Closing frontend pool");
|
||||
self.pool.close();
|
||||
|
||||
let mut env = self.env.lock().expect("locked");
|
||||
|
||||
debug!("Shutting down background threads");
|
||||
env.set_high_priority_background_threads(0);
|
||||
env.set_low_priority_background_threads(0);
|
||||
env.set_bottom_priority_background_threads(0);
|
||||
env.set_background_threads(0);
|
||||
|
||||
debug!("Joining background threads...");
|
||||
env.join_all_threads();
|
||||
}
|
||||
}
|
133
src/database/engine/db_opts.rs
Normal file
133
src/database/engine/db_opts.rs
Normal file
|
@ -0,0 +1,133 @@
|
|||
use std::{cmp, convert::TryFrom};
|
||||
|
||||
use conduwuit::{utils, Config, Result};
|
||||
use rocksdb::{statistics::StatsLevel, Cache, DBRecoveryMode, Env, LogLevel, Options};
|
||||
|
||||
use super::{cf_opts::cache_size, logger::handle as handle_log};
|
||||
|
||||
/// Create database-wide options suitable for opening the database. This also
|
||||
/// sets our default column options in case of opening a column with the same
|
||||
/// resulting value. Note that we require special per-column options on some
|
||||
/// columns, therefor columns should only be opened after passing this result
|
||||
/// through cf_options().
|
||||
pub(crate) fn db_options(config: &Config, env: &Env, row_cache: &Cache) -> Result<Options> {
|
||||
const DEFAULT_STATS_LEVEL: StatsLevel = if cfg!(debug_assertions) {
|
||||
StatsLevel::ExceptDetailedTimers
|
||||
} else {
|
||||
StatsLevel::DisableAll
|
||||
};
|
||||
|
||||
let mut opts = Options::default();
|
||||
|
||||
// Logging
|
||||
set_logging_defaults(&mut opts, config);
|
||||
|
||||
// Processing
|
||||
opts.set_max_background_jobs(num_threads::<i32>(config)?);
|
||||
opts.set_max_subcompactions(num_threads::<u32>(config)?);
|
||||
opts.set_avoid_unnecessary_blocking_io(true);
|
||||
opts.set_max_file_opening_threads(0);
|
||||
|
||||
// IO
|
||||
opts.set_atomic_flush(true);
|
||||
opts.set_manual_wal_flush(true);
|
||||
opts.set_enable_pipelined_write(false);
|
||||
if config.rocksdb_direct_io {
|
||||
opts.set_use_direct_reads(true);
|
||||
opts.set_use_direct_io_for_flush_and_compaction(true);
|
||||
}
|
||||
if config.rocksdb_optimize_for_spinning_disks {
|
||||
// speeds up opening DB on hard drives
|
||||
opts.set_skip_checking_sst_file_sizes_on_db_open(true);
|
||||
opts.set_skip_stats_update_on_db_open(true);
|
||||
//opts.set_max_file_opening_threads(threads.try_into().unwrap());
|
||||
}
|
||||
|
||||
// Blocks
|
||||
opts.set_row_cache(row_cache);
|
||||
|
||||
// Files
|
||||
opts.set_table_cache_num_shard_bits(7);
|
||||
opts.set_wal_size_limit_mb(1024 * 1024 * 1024);
|
||||
opts.set_max_total_wal_size(1024 * 1024 * 512);
|
||||
opts.set_db_write_buffer_size(cache_size(config, 1024 * 1024 * 32, 1));
|
||||
|
||||
// Misc
|
||||
opts.set_disable_auto_compactions(!config.rocksdb_compaction);
|
||||
opts.create_missing_column_families(true);
|
||||
opts.create_if_missing(true);
|
||||
|
||||
opts.set_statistics_level(match config.rocksdb_stats_level {
|
||||
| 0 => StatsLevel::DisableAll,
|
||||
| 1 => DEFAULT_STATS_LEVEL,
|
||||
| 2 => StatsLevel::ExceptHistogramOrTimers,
|
||||
| 3 => StatsLevel::ExceptTimers,
|
||||
| 4 => StatsLevel::ExceptDetailedTimers,
|
||||
| 5 => StatsLevel::ExceptTimeForMutex,
|
||||
| 6_u8..=u8::MAX => StatsLevel::All,
|
||||
});
|
||||
|
||||
opts.set_report_bg_io_stats(match config.rocksdb_stats_level {
|
||||
| 0..=1 => false,
|
||||
| 2_u8..=u8::MAX => true,
|
||||
});
|
||||
|
||||
// Default: https://github.com/facebook/rocksdb/wiki/WAL-Recovery-Modes#ktoleratecorruptedtailrecords
|
||||
//
|
||||
// Unclean shutdowns of a Matrix homeserver are likely to be fine when
|
||||
// recovered in this manner as it's likely any lost information will be
|
||||
// restored via federation.
|
||||
opts.set_wal_recovery_mode(match config.rocksdb_recovery_mode {
|
||||
| 0 => DBRecoveryMode::AbsoluteConsistency,
|
||||
| 1 => DBRecoveryMode::TolerateCorruptedTailRecords,
|
||||
| 2 => DBRecoveryMode::PointInTime,
|
||||
| 3 => DBRecoveryMode::SkipAnyCorruptedRecord,
|
||||
| 4_u8..=u8::MAX => unimplemented!(),
|
||||
});
|
||||
|
||||
// <https://github.com/facebook/rocksdb/wiki/Track-WAL-in-MANIFEST>
|
||||
// "We recommend to set track_and_verify_wals_in_manifest to true for
|
||||
// production, it has been enabled in production for the entire database cluster
|
||||
// serving the social graph for all Meta apps."
|
||||
opts.set_track_and_verify_wals_in_manifest(true);
|
||||
|
||||
opts.set_paranoid_checks(config.rocksdb_paranoid_file_checks);
|
||||
|
||||
opts.set_env(env);
|
||||
|
||||
Ok(opts)
|
||||
}
|
||||
|
||||
fn set_logging_defaults(opts: &mut Options, config: &Config) {
|
||||
let rocksdb_log_level = match config.rocksdb_log_level.as_ref() {
|
||||
| "debug" => LogLevel::Debug,
|
||||
| "info" => LogLevel::Info,
|
||||
| "warn" => LogLevel::Warn,
|
||||
| "fatal" => LogLevel::Fatal,
|
||||
| _ => LogLevel::Error,
|
||||
};
|
||||
|
||||
opts.set_log_level(rocksdb_log_level);
|
||||
opts.set_max_log_file_size(config.rocksdb_max_log_file_size);
|
||||
opts.set_log_file_time_to_roll(config.rocksdb_log_time_to_roll);
|
||||
opts.set_keep_log_file_num(config.rocksdb_max_log_files);
|
||||
opts.set_stats_dump_period_sec(0);
|
||||
|
||||
if config.rocksdb_log_stderr {
|
||||
opts.set_stderr_logger(rocksdb_log_level, "rocksdb");
|
||||
} else {
|
||||
opts.set_callback_logger(rocksdb_log_level, &handle_log);
|
||||
}
|
||||
}
|
||||
|
||||
fn num_threads<T: TryFrom<usize>>(config: &Config) -> Result<T> {
|
||||
const MIN_PARALLELISM: usize = 2;
|
||||
|
||||
let requested = if config.rocksdb_parallelism_threads != 0 {
|
||||
config.rocksdb_parallelism_threads
|
||||
} else {
|
||||
utils::available_parallelism()
|
||||
};
|
||||
|
||||
utils::math::try_into::<T, usize>(cmp::max(MIN_PARALLELISM, requested))
|
||||
}
|
89
src/database/engine/descriptor.rs
Normal file
89
src/database/engine/descriptor.rs
Normal file
|
@ -0,0 +1,89 @@
|
|||
use conduwuit::utils::string::EMPTY;
|
||||
use rocksdb::{
|
||||
DBCompactionPri as CompactionPri, DBCompactionStyle as CompactionStyle,
|
||||
DBCompressionType as CompressionType,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(crate) enum CacheDisp {
|
||||
Unique,
|
||||
Shared,
|
||||
SharedWith(&'static str),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct Descriptor {
|
||||
pub(crate) name: &'static str,
|
||||
pub(crate) cache_disp: CacheDisp,
|
||||
pub(crate) key_size_hint: Option<usize>,
|
||||
pub(crate) val_size_hint: Option<usize>,
|
||||
pub(crate) block_size: usize,
|
||||
pub(crate) index_size: usize,
|
||||
pub(crate) write_size: Option<usize>,
|
||||
pub(crate) cache_size: usize,
|
||||
pub(crate) level_size: u64,
|
||||
pub(crate) level_shape: [i32; 7],
|
||||
pub(crate) file_size: u64,
|
||||
pub(crate) file_shape: [i32; 1],
|
||||
pub(crate) level0_width: i32,
|
||||
pub(crate) merge_width: (i32, i32),
|
||||
pub(crate) ttl: u64,
|
||||
pub(crate) compaction: CompactionStyle,
|
||||
pub(crate) compaction_pri: CompactionPri,
|
||||
pub(crate) compression: CompressionType,
|
||||
pub(crate) compression_level: i32,
|
||||
pub(crate) bottommost_level: Option<i32>,
|
||||
pub(crate) block_index_hashing: bool,
|
||||
pub(crate) cache_shards: u32,
|
||||
}
|
||||
|
||||
pub(crate) static BASE: Descriptor = Descriptor {
|
||||
name: EMPTY,
|
||||
cache_disp: CacheDisp::Shared,
|
||||
key_size_hint: None,
|
||||
val_size_hint: None,
|
||||
block_size: 1024 * 4,
|
||||
index_size: 1024 * 4,
|
||||
write_size: None,
|
||||
cache_size: 1024 * 1024 * 4,
|
||||
level_size: 1024 * 1024 * 8,
|
||||
level_shape: [1, 1, 1, 3, 7, 15, 31],
|
||||
file_size: 1024 * 1024,
|
||||
file_shape: [2],
|
||||
level0_width: 2,
|
||||
merge_width: (2, 16),
|
||||
ttl: 60 * 60 * 24 * 21,
|
||||
compaction: CompactionStyle::Level,
|
||||
compaction_pri: CompactionPri::MinOverlappingRatio,
|
||||
compression: CompressionType::Zstd,
|
||||
compression_level: 32767,
|
||||
bottommost_level: Some(32767),
|
||||
block_index_hashing: false,
|
||||
cache_shards: 64,
|
||||
};
|
||||
|
||||
pub(crate) static RANDOM: Descriptor = Descriptor {
|
||||
compaction_pri: CompactionPri::OldestSmallestSeqFirst,
|
||||
..BASE
|
||||
};
|
||||
|
||||
pub(crate) static SEQUENTIAL: Descriptor = Descriptor {
|
||||
compaction_pri: CompactionPri::OldestLargestSeqFirst,
|
||||
level_size: 1024 * 1024 * 32,
|
||||
file_size: 1024 * 1024 * 2,
|
||||
..BASE
|
||||
};
|
||||
|
||||
pub(crate) static RANDOM_SMALL: Descriptor = Descriptor {
|
||||
compaction: CompactionStyle::Universal,
|
||||
level_size: 1024 * 512,
|
||||
file_size: 1024 * 128,
|
||||
..RANDOM
|
||||
};
|
||||
|
||||
pub(crate) static SEQUENTIAL_SMALL: Descriptor = Descriptor {
|
||||
compaction: CompactionStyle::Universal,
|
||||
level_size: 1024 * 1024,
|
||||
file_size: 1024 * 512,
|
||||
..SEQUENTIAL
|
||||
};
|
32
src/database/engine/files.rs
Normal file
32
src/database/engine/files.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
use std::fmt::Write;
|
||||
|
||||
use conduwuit::{implement, Result};
|
||||
|
||||
use super::Engine;
|
||||
|
||||
#[implement(Engine)]
|
||||
pub fn file_list(&self) -> Result<String> {
|
||||
match self.db.live_files() {
|
||||
| Err(e) => Ok(String::from(e)),
|
||||
| Ok(mut files) => {
|
||||
files.sort_by_key(|f| f.name.clone());
|
||||
let mut res = String::new();
|
||||
writeln!(res, "| lev | sst | keys | dels | size | column |")?;
|
||||
writeln!(res, "| ---: | :--- | ---: | ---: | ---: | :--- |")?;
|
||||
for file in files {
|
||||
writeln!(
|
||||
res,
|
||||
"| {} | {:<13} | {:7}+ | {:4}- | {:9} | {} |",
|
||||
file.level,
|
||||
file.name,
|
||||
file.num_entries,
|
||||
file.num_deletions,
|
||||
file.size,
|
||||
file.column_family_name,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
},
|
||||
}
|
||||
}
|
22
src/database/engine/logger.rs
Normal file
22
src/database/engine/logger.rs
Normal file
|
@ -0,0 +1,22 @@
|
|||
use conduwuit::{debug, error, warn};
|
||||
use rocksdb::LogLevel;
|
||||
|
||||
#[tracing::instrument(
|
||||
parent = None,
|
||||
name = "rocksdb",
|
||||
level = "trace"
|
||||
skip(msg),
|
||||
)]
|
||||
pub(crate) fn handle(level: LogLevel, msg: &str) {
|
||||
let msg = msg.trim();
|
||||
if msg.starts_with("Options") {
|
||||
return;
|
||||
}
|
||||
|
||||
match level {
|
||||
| LogLevel::Header | LogLevel::Debug => debug!("{msg}"),
|
||||
| LogLevel::Error | LogLevel::Fatal => error!("{msg}"),
|
||||
| LogLevel::Info => debug!("{msg}"),
|
||||
| LogLevel::Warn => warn!("{msg}"),
|
||||
};
|
||||
}
|
30
src/database/engine/memory_usage.rs
Normal file
30
src/database/engine/memory_usage.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
use std::fmt::Write;
|
||||
|
||||
use conduwuit::{implement, Result};
|
||||
use rocksdb::perf::get_memory_usage_stats;
|
||||
|
||||
use super::Engine;
|
||||
use crate::or_else;
|
||||
|
||||
#[implement(Engine)]
|
||||
pub fn memory_usage(&self) -> Result<String> {
|
||||
let mut res = String::new();
|
||||
let stats = get_memory_usage_stats(Some(&[&self.db]), Some(&[&*self.ctx.row_cache.lock()?]))
|
||||
.or_else(or_else)?;
|
||||
let mibs = |input| f64::from(u32::try_from(input / 1024).unwrap_or(0)) / 1024.0;
|
||||
writeln!(
|
||||
res,
|
||||
"Memory buffers: {:.2} MiB\nPending write: {:.2} MiB\nTable readers: {:.2} MiB\nRow \
|
||||
cache: {:.2} MiB",
|
||||
mibs(stats.mem_table_total),
|
||||
mibs(stats.mem_table_unflushed),
|
||||
mibs(stats.mem_table_readers_total),
|
||||
mibs(u64::try_from(self.ctx.row_cache.lock()?.get_usage())?),
|
||||
)?;
|
||||
|
||||
for (name, cache) in &*self.ctx.col_cache.lock()? {
|
||||
writeln!(res, "{name} cache: {:.2} MiB", mibs(u64::try_from(cache.get_usage())?))?;
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
121
src/database/engine/open.rs
Normal file
121
src/database/engine/open.rs
Normal file
|
@ -0,0 +1,121 @@
|
|||
use std::{
|
||||
collections::BTreeSet,
|
||||
path::Path,
|
||||
sync::{atomic::AtomicU32, Arc},
|
||||
};
|
||||
|
||||
use conduwuit::{debug, debug_warn, implement, info, warn, Result};
|
||||
use rocksdb::{ColumnFamilyDescriptor, Options};
|
||||
|
||||
use super::{
|
||||
cf_opts::cf_options, db_opts::db_options, descriptor::Descriptor, repair::repair, Db, Engine,
|
||||
};
|
||||
use crate::{or_else, Context};
|
||||
|
||||
#[implement(Engine)]
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(crate) async fn open(ctx: Arc<Context>, desc: &[Descriptor]) -> Result<Arc<Self>> {
|
||||
let server = &ctx.server;
|
||||
let config = &server.config;
|
||||
let path = &config.database_path;
|
||||
|
||||
let db_opts = db_options(
|
||||
config,
|
||||
&ctx.env.lock().expect("environment locked"),
|
||||
&ctx.row_cache.lock().expect("row cache locked"),
|
||||
)?;
|
||||
|
||||
let cfds = Self::configure_cfds(&ctx, &db_opts, desc)?;
|
||||
let num_cfds = cfds.len();
|
||||
debug!("Configured {num_cfds} column descriptors...");
|
||||
|
||||
let load_time = std::time::Instant::now();
|
||||
if config.rocksdb_repair {
|
||||
repair(&db_opts, &config.database_path)?;
|
||||
}
|
||||
|
||||
debug!("Opening database...");
|
||||
let db = if config.rocksdb_read_only {
|
||||
Db::open_cf_descriptors_read_only(&db_opts, path, cfds, false)
|
||||
} else if config.rocksdb_secondary {
|
||||
Db::open_cf_descriptors_as_secondary(&db_opts, path, path, cfds)
|
||||
} else {
|
||||
Db::open_cf_descriptors(&db_opts, path, cfds)
|
||||
}
|
||||
.or_else(or_else)?;
|
||||
|
||||
info!(
|
||||
columns = num_cfds,
|
||||
sequence = %db.latest_sequence_number(),
|
||||
time = ?load_time.elapsed(),
|
||||
"Opened database."
|
||||
);
|
||||
|
||||
Ok(Arc::new(Self {
|
||||
read_only: config.rocksdb_read_only,
|
||||
secondary: config.rocksdb_secondary,
|
||||
corks: AtomicU32::new(0),
|
||||
pool: ctx.pool.clone(),
|
||||
db,
|
||||
ctx,
|
||||
}))
|
||||
}
|
||||
|
||||
#[implement(Engine)]
|
||||
#[tracing::instrument(name = "configure", skip_all)]
|
||||
fn configure_cfds(
|
||||
ctx: &Arc<Context>,
|
||||
db_opts: &Options,
|
||||
desc: &[Descriptor],
|
||||
) -> Result<Vec<ColumnFamilyDescriptor>> {
|
||||
let server = &ctx.server;
|
||||
let config = &server.config;
|
||||
let path = &config.database_path;
|
||||
let existing = Self::discover_cfs(path, db_opts);
|
||||
debug!(
|
||||
"Found {} existing columns; have {} described columns",
|
||||
existing.len(),
|
||||
desc.len()
|
||||
);
|
||||
|
||||
existing
|
||||
.iter()
|
||||
.filter(|&name| name != "default")
|
||||
.filter(|&name| !desc.iter().any(|desc| desc.name == name))
|
||||
.for_each(|name| {
|
||||
debug_warn!("Found unknown column {name:?} in database which will not be opened.");
|
||||
});
|
||||
|
||||
desc.iter()
|
||||
.filter(|desc| !existing.contains(desc.name))
|
||||
.for_each(|desc| {
|
||||
debug!(
|
||||
"Creating new column {:?} which was not found in the existing database.",
|
||||
desc.name,
|
||||
);
|
||||
});
|
||||
|
||||
let cfopts: Vec<_> = desc
|
||||
.iter()
|
||||
.map(|desc| cf_options(ctx, db_opts.clone(), desc))
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
let cfds: Vec<_> = desc
|
||||
.iter()
|
||||
.map(|desc| desc.name)
|
||||
.map(ToOwned::to_owned)
|
||||
.zip(cfopts.into_iter())
|
||||
.map(|(name, opts)| ColumnFamilyDescriptor::new(name, opts))
|
||||
.collect();
|
||||
|
||||
Ok(cfds)
|
||||
}
|
||||
|
||||
#[implement(Engine)]
|
||||
#[tracing::instrument(name = "discover", skip_all)]
|
||||
fn discover_cfs(path: &Path, opts: &Options) -> BTreeSet<String> {
|
||||
Db::list_cf(opts, path)
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect::<BTreeSet<_>>()
|
||||
}
|
16
src/database/engine/repair.rs
Normal file
16
src/database/engine/repair.rs
Normal file
|
@ -0,0 +1,16 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use conduwuit::{info, warn, Err, Result};
|
||||
use rocksdb::Options;
|
||||
|
||||
use super::Db;
|
||||
|
||||
pub(crate) fn repair(db_opts: &Options, path: &PathBuf) -> Result {
|
||||
warn!("Starting database repair. This may take a long time...");
|
||||
match Db::repair(db_opts, path) {
|
||||
| Ok(()) => info!("Database repair successful."),
|
||||
| Err(e) => return Err!("Repair failed: {e:?}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue