Database Refactor

combine service/users data w/ mod unit

split sliding sync related out of service/users

instrument database entry points

remove increment crap from database interface

de-wrap all database get() calls

de-wrap all database insert() calls

de-wrap all database remove() calls

refactor database interface for async streaming

add query key serializer for database

implement Debug for result handle

add query deserializer for database

add deserialization trait for option handle

start a stream utils suite

de-wrap/asyncify/type-query count_one_time_keys()

de-wrap/asyncify users count

add admin query users command suite

de-wrap/asyncify users exists

de-wrap/partially asyncify user filter related

asyncify/de-wrap users device/keys related

asyncify/de-wrap user auth/misc related

asyncify/de-wrap users blurhash

asyncify/de-wrap account_data get; merge Data into Service

partial asyncify/de-wrap uiaa; merge Data into Service

partially asyncify/de-wrap transaction_ids get; merge Data into Service

partially asyncify/de-wrap key_backups; merge Data into Service

asyncify/de-wrap pusher service getters; merge Data into Service

asyncify/de-wrap rooms alias getters/some iterators

asyncify/de-wrap rooms directory getters/iterator

partially asyncify/de-wrap rooms lazy-loading

partially asyncify/de-wrap rooms metadata

asyncify/dewrap rooms outlier

asyncify/dewrap rooms pdu_metadata

dewrap/partially asyncify rooms read receipt

de-wrap rooms search service

de-wrap/partially asyncify rooms user service

partial de-wrap rooms state_compressor

de-wrap rooms state_cache

de-wrap room state et al

de-wrap rooms timeline service

additional users device/keys related

de-wrap/asyncify sender

asyncify services

refactor database to TryFuture/TryStream

refactor services for TryFuture/TryStream

asyncify api handlers

additional asyncification for admin module

abstract stream related; support reverse streams

additional stream conversions

asyncify state-res related

Signed-off-by: Jason Volk <jason@zemos.net>
This commit is contained in:
Jason Volk 2024-08-08 17:18:30 +00:00 committed by strawberry
parent 6001014078
commit 946ca364e0
203 changed files with 12202 additions and 10709 deletions

View file

@ -1,12 +1,20 @@
use std::{
collections::{hash_map, HashMap},
mem::size_of,
sync::{Arc, Mutex},
sync::Arc,
};
use conduit::{checked, error, utils, Error, PduCount, PduEvent, Result};
use database::{Database, Map};
use ruma::{api::client::error::ErrorKind, CanonicalJsonObject, EventId, OwnedRoomId, OwnedUserId, RoomId, UserId};
use conduit::{
err, expected,
result::{LogErr, NotFound},
utils,
utils::{stream::TryIgnore, u64_from_u8, ReadyExt},
Err, PduCount, PduEvent, Result,
};
use database::{Database, Deserialized, KeyVal, Map};
use futures::{FutureExt, Stream, StreamExt};
use ruma::{CanonicalJsonObject, EventId, OwnedRoomId, OwnedUserId, RoomId, UserId};
use tokio::sync::Mutex;
use crate::{rooms, Dep};
@ -25,8 +33,7 @@ struct Services {
short: Dep<rooms::short::Service>,
}
type PdusIterItem = Result<(PduCount, PduEvent)>;
type PdusIterator<'a> = Box<dyn Iterator<Item = PdusIterItem> + 'a>;
pub type PdusIterItem = (PduCount, PduEvent);
type LastTimelineCountCache = Mutex<HashMap<OwnedRoomId, PduCount>>;
impl Data {
@ -46,23 +53,20 @@ impl Data {
}
}
pub(super) fn last_timeline_count(&self, sender_user: &UserId, room_id: &RoomId) -> Result<PduCount> {
pub(super) async fn last_timeline_count(&self, sender_user: &UserId, room_id: &RoomId) -> Result<PduCount> {
match self
.lasttimelinecount_cache
.lock()
.expect("locked")
.await
.entry(room_id.to_owned())
{
hash_map::Entry::Vacant(v) => {
if let Some(last_count) = self
.pdus_until(sender_user, room_id, PduCount::max())?
.find_map(|r| {
// Filter out buggy events
if r.is_err() {
error!("Bad pdu in pdus_since: {:?}", r);
}
r.ok()
}) {
.pdus_until(sender_user, room_id, PduCount::max())
.await?
.next()
.await
{
Ok(*v.insert(last_count.0))
} else {
Ok(PduCount::Normal(0))
@ -73,232 +77,215 @@ impl Data {
}
/// Returns the `count` of this pdu's id.
pub(super) fn get_pdu_count(&self, event_id: &EventId) -> Result<Option<PduCount>> {
pub(super) async fn get_pdu_count(&self, event_id: &EventId) -> Result<PduCount> {
self.eventid_pduid
.get(event_id.as_bytes())?
.qry(event_id)
.await
.map(|pdu_id| pdu_count(&pdu_id))
.transpose()
}
/// Returns the json of a pdu.
pub(super) fn get_pdu_json(&self, event_id: &EventId) -> Result<Option<CanonicalJsonObject>> {
self.get_non_outlier_pdu_json(event_id)?.map_or_else(
|| {
self.eventid_outlierpdu
.get(event_id.as_bytes())?
.map(|pdu| serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db.")))
.transpose()
},
|x| Ok(Some(x)),
)
pub(super) async fn get_pdu_json(&self, event_id: &EventId) -> Result<CanonicalJsonObject> {
if let Ok(pdu) = self.get_non_outlier_pdu_json(event_id).await {
return Ok(pdu);
}
self.eventid_outlierpdu
.qry(event_id)
.await
.deserialized_json()
}
/// Returns the json of a pdu.
pub(super) fn get_non_outlier_pdu_json(&self, event_id: &EventId) -> Result<Option<CanonicalJsonObject>> {
self.eventid_pduid
.get(event_id.as_bytes())?
.map(|pduid| {
self.pduid_pdu
.get(&pduid)?
.ok_or_else(|| Error::bad_database("Invalid pduid in eventid_pduid."))
})
.transpose()?
.map(|pdu| serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db.")))
.transpose()
pub(super) async fn get_non_outlier_pdu_json(&self, event_id: &EventId) -> Result<CanonicalJsonObject> {
let pduid = self.get_pdu_id(event_id).await?;
self.pduid_pdu.qry(&pduid).await.deserialized_json()
}
/// Returns the pdu's id.
#[inline]
pub(super) fn get_pdu_id(&self, event_id: &EventId) -> Result<Option<database::Handle<'_>>> {
self.eventid_pduid.get(event_id.as_bytes())
pub(super) async fn get_pdu_id(&self, event_id: &EventId) -> Result<database::Handle<'_>> {
self.eventid_pduid.qry(event_id).await
}
/// Returns the pdu directly from `eventid_pduid` only.
pub(super) fn get_non_outlier_pdu(&self, event_id: &EventId) -> Result<Option<PduEvent>> {
self.eventid_pduid
.get(event_id.as_bytes())?
.map(|pduid| {
self.pduid_pdu
.get(&pduid)?
.ok_or_else(|| Error::bad_database("Invalid pduid in eventid_pduid."))
})
.transpose()?
.map(|pdu| serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db.")))
.transpose()
pub(super) async fn get_non_outlier_pdu(&self, event_id: &EventId) -> Result<PduEvent> {
let pduid = self.get_pdu_id(event_id).await?;
self.pduid_pdu.qry(&pduid).await.deserialized_json()
}
/// Like get_non_outlier_pdu(), but without the expense of fetching and
/// parsing the PduEvent
pub(super) async fn non_outlier_pdu_exists(&self, event_id: &EventId) -> Result<()> {
let pduid = self.get_pdu_id(event_id).await?;
self.pduid_pdu.qry(&pduid).await?;
Ok(())
}
/// Returns the pdu.
///
/// Checks the `eventid_outlierpdu` Tree if not found in the timeline.
pub(super) fn get_pdu(&self, event_id: &EventId) -> Result<Option<Arc<PduEvent>>> {
if let Some(pdu) = self
.get_non_outlier_pdu(event_id)?
.map_or_else(
|| {
self.eventid_outlierpdu
.get(event_id.as_bytes())?
.map(|pdu| serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db.")))
.transpose()
},
|x| Ok(Some(x)),
)?
.map(Arc::new)
{
Ok(Some(pdu))
} else {
Ok(None)
pub(super) async fn get_pdu(&self, event_id: &EventId) -> Result<Arc<PduEvent>> {
if let Ok(pdu) = self.get_non_outlier_pdu(event_id).await {
return Ok(Arc::new(pdu));
}
self.eventid_outlierpdu
.qry(event_id)
.await
.deserialized_json()
.map(Arc::new)
}
/// Like get_non_outlier_pdu(), but without the expense of fetching and
/// parsing the PduEvent
pub(super) async fn outlier_pdu_exists(&self, event_id: &EventId) -> Result<()> {
self.eventid_outlierpdu.qry(event_id).await?;
Ok(())
}
/// Like get_pdu(), but without the expense of fetching and parsing the data
pub(super) async fn pdu_exists(&self, event_id: &EventId) -> bool {
let non_outlier = self.non_outlier_pdu_exists(event_id).map(|res| res.is_ok());
let outlier = self.outlier_pdu_exists(event_id).map(|res| res.is_ok());
//TODO: parallelize
non_outlier.await || outlier.await
}
/// Returns the pdu.
///
/// This does __NOT__ check the outliers `Tree`.
pub(super) fn get_pdu_from_id(&self, pdu_id: &[u8]) -> Result<Option<PduEvent>> {
self.pduid_pdu.get(pdu_id)?.map_or(Ok(None), |pdu| {
Ok(Some(
serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db."))?,
))
})
pub(super) async fn get_pdu_from_id(&self, pdu_id: &[u8]) -> Result<PduEvent> {
self.pduid_pdu.qry(pdu_id).await.deserialized_json()
}
/// Returns the pdu as a `BTreeMap<String, CanonicalJsonValue>`.
pub(super) fn get_pdu_json_from_id(&self, pdu_id: &[u8]) -> Result<Option<CanonicalJsonObject>> {
self.pduid_pdu.get(pdu_id)?.map_or(Ok(None), |pdu| {
Ok(Some(
serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db."))?,
))
})
pub(super) async fn get_pdu_json_from_id(&self, pdu_id: &[u8]) -> Result<CanonicalJsonObject> {
self.pduid_pdu.qry(pdu_id).await.deserialized_json()
}
pub(super) fn append_pdu(
&self, pdu_id: &[u8], pdu: &PduEvent, json: &CanonicalJsonObject, count: u64,
) -> Result<()> {
pub(super) async fn append_pdu(&self, pdu_id: &[u8], pdu: &PduEvent, json: &CanonicalJsonObject, count: u64) {
self.pduid_pdu.insert(
pdu_id,
&serde_json::to_vec(json).expect("CanonicalJsonObject is always a valid"),
)?;
);
self.lasttimelinecount_cache
.lock()
.expect("locked")
.await
.insert(pdu.room_id.clone(), PduCount::Normal(count));
self.eventid_pduid.insert(pdu.event_id.as_bytes(), pdu_id)?;
self.eventid_outlierpdu.remove(pdu.event_id.as_bytes())?;
Ok(())
self.eventid_pduid.insert(pdu.event_id.as_bytes(), pdu_id);
self.eventid_outlierpdu.remove(pdu.event_id.as_bytes());
}
pub(super) fn prepend_backfill_pdu(
&self, pdu_id: &[u8], event_id: &EventId, json: &CanonicalJsonObject,
) -> Result<()> {
pub(super) fn prepend_backfill_pdu(&self, pdu_id: &[u8], event_id: &EventId, json: &CanonicalJsonObject) {
self.pduid_pdu.insert(
pdu_id,
&serde_json::to_vec(json).expect("CanonicalJsonObject is always a valid"),
)?;
);
self.eventid_pduid.insert(event_id.as_bytes(), pdu_id)?;
self.eventid_outlierpdu.remove(event_id.as_bytes())?;
Ok(())
self.eventid_pduid.insert(event_id.as_bytes(), pdu_id);
self.eventid_outlierpdu.remove(event_id.as_bytes());
}
/// Removes a pdu and creates a new one with the same id.
pub(super) fn replace_pdu(&self, pdu_id: &[u8], pdu_json: &CanonicalJsonObject, _pdu: &PduEvent) -> Result<()> {
if self.pduid_pdu.get(pdu_id)?.is_some() {
self.pduid_pdu.insert(
pdu_id,
&serde_json::to_vec(pdu_json).expect("CanonicalJsonObject is always a valid"),
)?;
} else {
return Err(Error::BadRequest(ErrorKind::NotFound, "PDU does not exist."));
pub(super) async fn replace_pdu(
&self, pdu_id: &[u8], pdu_json: &CanonicalJsonObject, _pdu: &PduEvent,
) -> Result<()> {
if self.pduid_pdu.qry(pdu_id).await.is_not_found() {
return Err!(Request(NotFound("PDU does not exist.")));
}
let pdu = serde_json::to_vec(pdu_json)?;
self.pduid_pdu.insert(pdu_id, &pdu);
Ok(())
}
/// Returns an iterator over all events and their tokens in a room that
/// happened before the event with id `until` in reverse-chronological
/// order.
pub(super) fn pdus_until(&self, user_id: &UserId, room_id: &RoomId, until: PduCount) -> Result<PdusIterator<'_>> {
let (prefix, current) = self.count_to_id(room_id, until, 1, true)?;
pub(super) async fn pdus_until<'a>(
&'a self, user_id: &'a UserId, room_id: &'a RoomId, until: PduCount,
) -> Result<impl Stream<Item = PdusIterItem> + Send + 'a> {
let (prefix, current) = self.count_to_id(room_id, until, 1, true).await?;
let stream = self
.pduid_pdu
.rev_raw_stream_from(&current)
.ignore_err()
.ready_take_while(move |(key, _)| key.starts_with(&prefix))
.map(move |item| Self::each_pdu(item, user_id));
let user_id = user_id.to_owned();
Ok(Box::new(
self.pduid_pdu
.iter_from(&current, true)
.take_while(move |(k, _)| k.starts_with(&prefix))
.map(move |(pdu_id, v)| {
let mut pdu = serde_json::from_slice::<PduEvent>(&v)
.map_err(|_| Error::bad_database("PDU in db is invalid."))?;
if pdu.sender != user_id {
pdu.remove_transaction_id()?;
}
pdu.add_age()?;
let count = pdu_count(&pdu_id)?;
Ok((count, pdu))
}),
))
Ok(stream)
}
pub(super) fn pdus_after(&self, user_id: &UserId, room_id: &RoomId, from: PduCount) -> Result<PdusIterator<'_>> {
let (prefix, current) = self.count_to_id(room_id, from, 1, false)?;
pub(super) async fn pdus_after<'a>(
&'a self, user_id: &'a UserId, room_id: &'a RoomId, from: PduCount,
) -> Result<impl Stream<Item = PdusIterItem> + Send + 'a> {
let (prefix, current) = self.count_to_id(room_id, from, 1, false).await?;
let stream = self
.pduid_pdu
.raw_stream_from(&current)
.ignore_err()
.ready_take_while(move |(key, _)| key.starts_with(&prefix))
.map(move |item| Self::each_pdu(item, user_id));
let user_id = user_id.to_owned();
Ok(stream)
}
Ok(Box::new(
self.pduid_pdu
.iter_from(&current, false)
.take_while(move |(k, _)| k.starts_with(&prefix))
.map(move |(pdu_id, v)| {
let mut pdu = serde_json::from_slice::<PduEvent>(&v)
.map_err(|_| Error::bad_database("PDU in db is invalid."))?;
if pdu.sender != user_id {
pdu.remove_transaction_id()?;
}
pdu.add_age()?;
let count = pdu_count(&pdu_id)?;
Ok((count, pdu))
}),
))
fn each_pdu((pdu_id, pdu): KeyVal<'_>, user_id: &UserId) -> PdusIterItem {
let mut pdu =
serde_json::from_slice::<PduEvent>(pdu).expect("PduEvent in pduid_pdu database column is invalid JSON");
if pdu.sender != user_id {
pdu.remove_transaction_id().log_err().ok();
}
pdu.add_age().log_err().ok();
let count = pdu_count(pdu_id);
(count, pdu)
}
pub(super) fn increment_notification_counts(
&self, room_id: &RoomId, notifies: Vec<OwnedUserId>, highlights: Vec<OwnedUserId>,
) -> Result<()> {
let mut notifies_batch = Vec::new();
let mut highlights_batch = Vec::new();
) {
let _cork = self.db.cork();
for user in notifies {
let mut userroom_id = user.as_bytes().to_vec();
userroom_id.push(0xFF);
userroom_id.extend_from_slice(room_id.as_bytes());
notifies_batch.push(userroom_id);
increment(&self.userroomid_notificationcount, &userroom_id);
}
for user in highlights {
let mut userroom_id = user.as_bytes().to_vec();
userroom_id.push(0xFF);
userroom_id.extend_from_slice(room_id.as_bytes());
highlights_batch.push(userroom_id);
increment(&self.userroomid_highlightcount, &userroom_id);
}
self.userroomid_notificationcount
.increment_batch(notifies_batch.iter().map(Vec::as_slice))?;
self.userroomid_highlightcount
.increment_batch(highlights_batch.iter().map(Vec::as_slice))?;
Ok(())
}
pub(super) fn count_to_id(
pub(super) async fn count_to_id(
&self, room_id: &RoomId, count: PduCount, offset: u64, subtract: bool,
) -> Result<(Vec<u8>, Vec<u8>)> {
let prefix = self
.services
.short
.get_shortroomid(room_id)?
.ok_or_else(|| Error::bad_database("Looked for bad shortroomid in timeline"))?
.get_shortroomid(room_id)
.await
.map_err(|e| err!(Request(NotFound("Room {room_id:?} not found: {e:?}"))))?
.to_be_bytes()
.to_vec();
let mut pdu_id = prefix.clone();
// +1 so we don't send the base event
let count_raw = match count {
@ -326,17 +313,23 @@ impl Data {
}
/// Returns the `count` of this pdu's id.
pub(super) fn pdu_count(pdu_id: &[u8]) -> Result<PduCount> {
let stride = size_of::<u64>();
let pdu_id_len = pdu_id.len();
let last_u64 = utils::u64_from_bytes(&pdu_id[checked!(pdu_id_len - stride)?..])
.map_err(|_| Error::bad_database("PDU has invalid count bytes."))?;
let second_last_u64 =
utils::u64_from_bytes(&pdu_id[checked!(pdu_id_len - 2 * stride)?..checked!(pdu_id_len - stride)?]);
pub(super) fn pdu_count(pdu_id: &[u8]) -> PduCount {
const STRIDE: usize = size_of::<u64>();
if matches!(second_last_u64, Ok(0)) {
Ok(PduCount::Backfilled(u64::MAX.saturating_sub(last_u64)))
let pdu_id_len = pdu_id.len();
let last_u64 = u64_from_u8(&pdu_id[expected!(pdu_id_len - STRIDE)..]);
let second_last_u64 = u64_from_u8(&pdu_id[expected!(pdu_id_len - 2 * STRIDE)..expected!(pdu_id_len - STRIDE)]);
if second_last_u64 == 0 {
PduCount::Backfilled(u64::MAX.saturating_sub(last_u64))
} else {
Ok(PduCount::Normal(last_u64))
PduCount::Normal(last_u64)
}
}
//TODO: this is an ABA
fn increment(db: &Arc<Map>, key: &[u8]) {
let old = db.get(key);
let new = utils::increment(old.ok().as_deref());
db.insert(key, &new);
}