refactor search system
Signed-off-by: Jason Volk <jason@zemos.net>
This commit is contained in:
parent
f245389c02
commit
21a67513f2
4 changed files with 312 additions and 205 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -786,6 +786,7 @@ dependencies = [
|
||||||
name = "conduit_service"
|
name = "conduit_service"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"arrayvec",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"bytes",
|
"bytes",
|
||||||
|
|
|
@ -2,25 +2,32 @@ use std::collections::BTreeMap;
|
||||||
|
|
||||||
use axum::extract::State;
|
use axum::extract::State;
|
||||||
use conduit::{
|
use conduit::{
|
||||||
debug,
|
at, is_true,
|
||||||
utils::{IterStream, ReadyExt},
|
result::FlatOk,
|
||||||
Err,
|
utils::{stream::ReadyExt, IterStream},
|
||||||
|
Err, PduEvent, Result,
|
||||||
};
|
};
|
||||||
use futures::{FutureExt, StreamExt};
|
use futures::{future::OptionFuture, FutureExt, StreamExt, TryFutureExt};
|
||||||
use ruma::{
|
use ruma::{
|
||||||
api::client::{
|
api::client::search::search_events::{
|
||||||
error::ErrorKind,
|
self,
|
||||||
search::search_events::{
|
v3::{Criteria, EventContextResult, ResultCategories, ResultRoomEvents, SearchResult},
|
||||||
self,
|
|
||||||
v3::{EventContextResult, ResultCategories, ResultRoomEvents, SearchResult},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
events::AnyStateEvent,
|
events::AnyStateEvent,
|
||||||
serde::Raw,
|
serde::Raw,
|
||||||
uint, OwnedRoomId,
|
OwnedRoomId, RoomId, UInt, UserId,
|
||||||
};
|
};
|
||||||
|
use search_events::v3::{Request, Response};
|
||||||
|
use service::{rooms::search::RoomQuery, Services};
|
||||||
|
|
||||||
use crate::{Error, Result, Ruma};
|
use crate::Ruma;
|
||||||
|
|
||||||
|
type RoomStates = BTreeMap<OwnedRoomId, RoomState>;
|
||||||
|
type RoomState = Vec<Raw<AnyStateEvent>>;
|
||||||
|
|
||||||
|
const LIMIT_DEFAULT: usize = 10;
|
||||||
|
const LIMIT_MAX: usize = 100;
|
||||||
|
const BATCH_MAX: usize = 20;
|
||||||
|
|
||||||
/// # `POST /_matrix/client/r0/search`
|
/// # `POST /_matrix/client/r0/search`
|
||||||
///
|
///
|
||||||
|
@ -28,173 +35,177 @@ use crate::{Error, Result, Ruma};
|
||||||
///
|
///
|
||||||
/// - Only works if the user is currently joined to the room (TODO: Respect
|
/// - Only works if the user is currently joined to the room (TODO: Respect
|
||||||
/// history visibility)
|
/// history visibility)
|
||||||
pub(crate) async fn search_events_route(
|
pub(crate) async fn search_events_route(State(services): State<crate::State>, body: Ruma<Request>) -> Result<Response> {
|
||||||
State(services): State<crate::State>, body: Ruma<search_events::v3::Request>,
|
let sender_user = body.sender_user();
|
||||||
) -> Result<search_events::v3::Response> {
|
let next_batch = body.next_batch.as_deref();
|
||||||
let sender_user = body.sender_user.as_ref().expect("user is authenticated");
|
let room_events_result: OptionFuture<_> = body
|
||||||
|
.search_categories
|
||||||
|
.room_events
|
||||||
|
.as_ref()
|
||||||
|
.map(|criteria| category_room_events(&services, sender_user, next_batch, criteria))
|
||||||
|
.into();
|
||||||
|
|
||||||
let search_criteria = body.search_categories.room_events.as_ref().unwrap();
|
Ok(Response {
|
||||||
let filter = &search_criteria.filter;
|
search_categories: ResultCategories {
|
||||||
let include_state = &search_criteria.include_state;
|
room_events: room_events_result
|
||||||
|
.await
|
||||||
|
.unwrap_or_else(|| Ok(ResultRoomEvents::default()))?,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
let room_ids = if let Some(room_ids) = &filter.rooms {
|
#[allow(clippy::map_unwrap_or)]
|
||||||
room_ids.clone()
|
async fn category_room_events(
|
||||||
} else {
|
services: &Services, sender_user: &UserId, next_batch: Option<&str>, criteria: &Criteria,
|
||||||
services
|
) -> Result<ResultRoomEvents> {
|
||||||
.rooms
|
let filter = &criteria.filter;
|
||||||
.state_cache
|
|
||||||
.rooms_joined(sender_user)
|
|
||||||
.map(ToOwned::to_owned)
|
|
||||||
.collect()
|
|
||||||
.await
|
|
||||||
};
|
|
||||||
|
|
||||||
// Use limit or else 10, with maximum 100
|
|
||||||
let limit: usize = filter
|
let limit: usize = filter
|
||||||
.limit
|
.limit
|
||||||
.unwrap_or_else(|| uint!(10))
|
.map(TryInto::try_into)
|
||||||
.try_into()
|
.flat_ok()
|
||||||
.unwrap_or(10)
|
.unwrap_or(LIMIT_DEFAULT)
|
||||||
.min(100);
|
.min(LIMIT_MAX);
|
||||||
|
|
||||||
let mut room_states: BTreeMap<OwnedRoomId, Vec<Raw<AnyStateEvent>>> = BTreeMap::new();
|
let next_batch: usize = next_batch
|
||||||
|
.map(str::parse)
|
||||||
|
.transpose()?
|
||||||
|
.unwrap_or(0)
|
||||||
|
.min(limit.saturating_mul(BATCH_MAX));
|
||||||
|
|
||||||
if include_state.is_some_and(|include_state| include_state) {
|
let rooms = filter
|
||||||
for room_id in &room_ids {
|
.rooms
|
||||||
if !services
|
.clone()
|
||||||
.rooms
|
.map(IntoIterator::into_iter)
|
||||||
.state_cache
|
.map(IterStream::stream)
|
||||||
.is_joined(sender_user, room_id)
|
.map(StreamExt::boxed)
|
||||||
.await
|
.unwrap_or_else(|| {
|
||||||
{
|
|
||||||
return Err!(Request(Forbidden("You don't have permission to view this room.")));
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if sender_user can see state events
|
|
||||||
if services
|
|
||||||
.rooms
|
|
||||||
.state_accessor
|
|
||||||
.user_can_see_state_events(sender_user, room_id)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
let room_state: Vec<_> = services
|
|
||||||
.rooms
|
|
||||||
.state_accessor
|
|
||||||
.room_state_full(room_id)
|
|
||||||
.await?
|
|
||||||
.values()
|
|
||||||
.map(|pdu| pdu.to_state_event())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
debug!("Room state: {:?}", room_state);
|
|
||||||
|
|
||||||
room_states.insert(room_id.clone(), room_state);
|
|
||||||
} else {
|
|
||||||
return Err(Error::BadRequest(
|
|
||||||
ErrorKind::forbidden(),
|
|
||||||
"You don't have permission to view this room.",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut search_vecs = Vec::new();
|
|
||||||
|
|
||||||
for room_id in &room_ids {
|
|
||||||
if !services
|
|
||||||
.rooms
|
|
||||||
.state_cache
|
|
||||||
.is_joined(sender_user, room_id)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
return Err(Error::BadRequest(
|
|
||||||
ErrorKind::forbidden(),
|
|
||||||
"You don't have permission to view this room.",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(search) = services
|
|
||||||
.rooms
|
|
||||||
.search
|
|
||||||
.search_pdus(room_id, &search_criteria.search_term)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
search_vecs.push(search.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut searches: Vec<_> = search_vecs
|
|
||||||
.iter()
|
|
||||||
.map(|vec| vec.iter().peekable())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let skip: usize = match body.next_batch.as_ref().map(|s| s.parse()) {
|
|
||||||
Some(Ok(s)) => s,
|
|
||||||
Some(Err(_)) => return Err(Error::BadRequest(ErrorKind::InvalidParam, "Invalid next_batch token.")),
|
|
||||||
None => 0, // Default to the start
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut results = Vec::new();
|
|
||||||
let next_batch = skip.saturating_add(limit);
|
|
||||||
|
|
||||||
for _ in 0..next_batch {
|
|
||||||
if let Some(s) = searches
|
|
||||||
.iter_mut()
|
|
||||||
.map(|s| (s.peek().copied(), s))
|
|
||||||
.max_by_key(|(peek, _)| *peek)
|
|
||||||
.and_then(|(_, i)| i.next())
|
|
||||||
{
|
|
||||||
results.push(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let results: Vec<_> = results
|
|
||||||
.into_iter()
|
|
||||||
.skip(skip)
|
|
||||||
.stream()
|
|
||||||
.filter_map(|id| services.rooms.timeline.get_pdu_from_id(id).map(Result::ok))
|
|
||||||
.ready_filter(|pdu| !pdu.is_redacted())
|
|
||||||
.filter_map(|pdu| async move {
|
|
||||||
services
|
services
|
||||||
.rooms
|
.rooms
|
||||||
.state_accessor
|
.state_cache
|
||||||
.user_can_see_event(sender_user, &pdu.room_id, &pdu.event_id)
|
.rooms_joined(sender_user)
|
||||||
|
.map(ToOwned::to_owned)
|
||||||
|
.boxed()
|
||||||
|
});
|
||||||
|
|
||||||
|
let results: Vec<_> = rooms
|
||||||
|
.filter_map(|room_id| async move {
|
||||||
|
check_room_visible(services, sender_user, &room_id, criteria)
|
||||||
.await
|
.await
|
||||||
.then_some(pdu)
|
.is_ok()
|
||||||
|
.then_some(room_id)
|
||||||
})
|
})
|
||||||
.take(limit)
|
.filter_map(|room_id| async move {
|
||||||
.map(|pdu| pdu.to_room_event())
|
let query = RoomQuery {
|
||||||
.map(|result| SearchResult {
|
room_id: &room_id,
|
||||||
context: EventContextResult {
|
user_id: Some(sender_user),
|
||||||
end: None,
|
criteria,
|
||||||
events_after: Vec::new(),
|
skip: next_batch,
|
||||||
events_before: Vec::new(),
|
limit,
|
||||||
profile_info: BTreeMap::new(),
|
};
|
||||||
start: None,
|
|
||||||
},
|
let (count, results) = services.rooms.search.search_pdus(&query).await.ok()?;
|
||||||
rank: None,
|
|
||||||
result: Some(result),
|
results
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.map(|results| (room_id.clone(), count, results))
|
||||||
|
.map(Some)
|
||||||
|
.await
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
.boxed()
|
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let more_unloaded_results = searches.iter_mut().any(|s| s.peek().is_some());
|
let total: UInt = results
|
||||||
|
.iter()
|
||||||
|
.fold(0, |a: usize, (_, count, _)| a.saturating_add(*count))
|
||||||
|
.try_into()?;
|
||||||
|
|
||||||
let next_batch = more_unloaded_results.then(|| next_batch.to_string());
|
let state: RoomStates = results
|
||||||
|
.iter()
|
||||||
|
.stream()
|
||||||
|
.ready_filter(|_| criteria.include_state.is_some_and(is_true!()))
|
||||||
|
.filter_map(|(room_id, ..)| async move {
|
||||||
|
procure_room_state(services, room_id)
|
||||||
|
.map_ok(|state| (room_id.clone(), state))
|
||||||
|
.await
|
||||||
|
.ok()
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
.await;
|
||||||
|
|
||||||
Ok(search_events::v3::Response::new(ResultCategories {
|
let results: Vec<SearchResult> = results
|
||||||
room_events: ResultRoomEvents {
|
.into_iter()
|
||||||
count: Some(results.len().try_into().unwrap_or_else(|_| uint!(0))),
|
.map(at!(2))
|
||||||
groups: BTreeMap::new(), // TODO
|
.flatten()
|
||||||
next_batch,
|
.stream()
|
||||||
results,
|
.map(|pdu| pdu.to_room_event())
|
||||||
state: room_states,
|
.map(|result| SearchResult {
|
||||||
highlights: search_criteria
|
rank: None,
|
||||||
.search_term
|
result: Some(result),
|
||||||
.split_terminator(|c: char| !c.is_alphanumeric())
|
context: EventContextResult {
|
||||||
.map(str::to_lowercase)
|
profile_info: BTreeMap::new(), //TODO
|
||||||
.collect(),
|
events_after: Vec::new(), //TODO
|
||||||
},
|
events_before: Vec::new(), //TODO
|
||||||
}))
|
start: None, //TODO
|
||||||
|
end: None, //TODO
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let highlights = criteria
|
||||||
|
.search_term
|
||||||
|
.split_terminator(|c: char| !c.is_alphanumeric())
|
||||||
|
.map(str::to_lowercase)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let next_batch = (results.len() >= limit)
|
||||||
|
.then_some(next_batch.saturating_add(results.len()))
|
||||||
|
.as_ref()
|
||||||
|
.map(ToString::to_string);
|
||||||
|
|
||||||
|
Ok(ResultRoomEvents {
|
||||||
|
count: Some(total),
|
||||||
|
next_batch,
|
||||||
|
results,
|
||||||
|
state,
|
||||||
|
highlights,
|
||||||
|
groups: BTreeMap::new(), // TODO
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn procure_room_state(services: &Services, room_id: &RoomId) -> Result<RoomState> {
|
||||||
|
let state_map = services
|
||||||
|
.rooms
|
||||||
|
.state_accessor
|
||||||
|
.room_state_full(room_id)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let state_events = state_map
|
||||||
|
.values()
|
||||||
|
.map(AsRef::as_ref)
|
||||||
|
.map(PduEvent::to_state_event)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(state_events)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn check_room_visible(services: &Services, user_id: &UserId, room_id: &RoomId, search: &Criteria) -> Result {
|
||||||
|
let check_visible = search.filter.rooms.is_some();
|
||||||
|
let check_state = check_visible && search.include_state.is_some_and(is_true!());
|
||||||
|
|
||||||
|
let is_joined = !check_visible || services.rooms.state_cache.is_joined(user_id, room_id).await;
|
||||||
|
|
||||||
|
let state_visible = !check_state
|
||||||
|
|| services
|
||||||
|
.rooms
|
||||||
|
.state_accessor
|
||||||
|
.user_can_see_state_events(user_id, room_id)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if !is_joined || !state_visible {
|
||||||
|
return Err!(Request(Forbidden("You don't have permission to view {room_id:?}")));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,6 +40,7 @@ release_max_log_level = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
arrayvec.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
base64.workspace = true
|
base64.workspace = true
|
||||||
bytes.workspace = true
|
bytes.workspace = true
|
||||||
|
|
|
@ -1,15 +1,23 @@
|
||||||
use std::sync::Arc;
|
use std::{iter, sync::Arc};
|
||||||
|
|
||||||
|
use arrayvec::ArrayVec;
|
||||||
use conduit::{
|
use conduit::{
|
||||||
implement,
|
implement,
|
||||||
utils::{set, stream::TryIgnore, IterStream, ReadyExt},
|
utils::{set, stream::TryIgnore, ArrayVecExt, IterStream, ReadyExt},
|
||||||
Result,
|
PduEvent, Result,
|
||||||
};
|
};
|
||||||
use database::Map;
|
use database::{keyval::Val, Map};
|
||||||
use futures::StreamExt;
|
use futures::{Stream, StreamExt};
|
||||||
use ruma::RoomId;
|
use ruma::{api::client::search::search_events::v3::Criteria, RoomId, UserId};
|
||||||
|
|
||||||
use crate::{rooms, Dep};
|
use crate::{
|
||||||
|
rooms,
|
||||||
|
rooms::{
|
||||||
|
short::ShortRoomId,
|
||||||
|
timeline::{PduId, RawPduId},
|
||||||
|
},
|
||||||
|
Dep,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct Service {
|
pub struct Service {
|
||||||
db: Data,
|
db: Data,
|
||||||
|
@ -22,8 +30,24 @@ struct Data {
|
||||||
|
|
||||||
struct Services {
|
struct Services {
|
||||||
short: Dep<rooms::short::Service>,
|
short: Dep<rooms::short::Service>,
|
||||||
|
state_accessor: Dep<rooms::state_accessor::Service>,
|
||||||
|
timeline: Dep<rooms::timeline::Service>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct RoomQuery<'a> {
|
||||||
|
pub room_id: &'a RoomId,
|
||||||
|
pub user_id: Option<&'a UserId>,
|
||||||
|
pub criteria: &'a Criteria,
|
||||||
|
pub limit: usize,
|
||||||
|
pub skip: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
type TokenId = ArrayVec<u8, TOKEN_ID_MAX_LEN>;
|
||||||
|
|
||||||
|
const TOKEN_ID_MAX_LEN: usize = size_of::<ShortRoomId>() + WORD_MAX_LEN + 1 + size_of::<RawPduId>();
|
||||||
|
const WORD_MAX_LEN: usize = 50;
|
||||||
|
|
||||||
impl crate::Service for Service {
|
impl crate::Service for Service {
|
||||||
fn build(args: crate::Args<'_>) -> Result<Arc<Self>> {
|
fn build(args: crate::Args<'_>) -> Result<Arc<Self>> {
|
||||||
Ok(Arc::new(Self {
|
Ok(Arc::new(Self {
|
||||||
|
@ -32,6 +56,8 @@ impl crate::Service for Service {
|
||||||
},
|
},
|
||||||
services: Services {
|
services: Services {
|
||||||
short: args.depend::<rooms::short::Service>("rooms::short"),
|
short: args.depend::<rooms::short::Service>("rooms::short"),
|
||||||
|
state_accessor: args.depend::<rooms::state_accessor::Service>("rooms::state_accessor"),
|
||||||
|
timeline: args.depend::<rooms::timeline::Service>("rooms::timeline"),
|
||||||
},
|
},
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
@ -70,46 +96,92 @@ pub fn deindex_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[implement(Service)]
|
#[implement(Service)]
|
||||||
pub async fn search_pdus(&self, room_id: &RoomId, search_string: &str) -> Option<(Vec<Vec<u8>>, Vec<String>)> {
|
pub async fn search_pdus<'a>(
|
||||||
let prefix = self
|
&'a self, query: &'a RoomQuery<'a>,
|
||||||
.services
|
) -> Result<(usize, impl Stream<Item = PduEvent> + Send + 'a)> {
|
||||||
.short
|
let pdu_ids: Vec<_> = self.search_pdu_ids(query).await?.collect().await;
|
||||||
.get_shortroomid(room_id)
|
|
||||||
.await
|
|
||||||
.ok()?
|
|
||||||
.to_be_bytes()
|
|
||||||
.to_vec();
|
|
||||||
|
|
||||||
let words: Vec<_> = tokenize(search_string).collect();
|
let count = pdu_ids.len();
|
||||||
|
let pdus = pdu_ids
|
||||||
let bufs: Vec<_> = words
|
|
||||||
.clone()
|
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.stream()
|
.stream()
|
||||||
.then(move |word| {
|
.filter_map(move |result_pdu_id: RawPduId| async move {
|
||||||
let mut prefix2 = prefix.clone();
|
self.services
|
||||||
prefix2.extend_from_slice(word.as_bytes());
|
.timeline
|
||||||
prefix2.push(0xFF);
|
.get_pdu_from_id(&result_pdu_id)
|
||||||
let prefix3 = prefix2.clone();
|
.await
|
||||||
|
.ok()
|
||||||
let mut last_possible_id = prefix2.clone();
|
|
||||||
last_possible_id.extend_from_slice(&u64::MAX.to_be_bytes());
|
|
||||||
|
|
||||||
self.db.tokenids
|
|
||||||
.rev_raw_keys_from(&last_possible_id) // Newest pdus first
|
|
||||||
.ignore_err()
|
|
||||||
.ready_take_while(move |key| key.starts_with(&prefix2))
|
|
||||||
.map(move |key| key[prefix3.len()..].to_vec())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
})
|
})
|
||||||
.collect()
|
.ready_filter(|pdu| !pdu.is_redacted())
|
||||||
.await;
|
.filter_map(move |pdu| async move {
|
||||||
|
self.services
|
||||||
|
.state_accessor
|
||||||
|
.user_can_see_event(query.user_id?, &pdu.room_id, &pdu.event_id)
|
||||||
|
.await
|
||||||
|
.then_some(pdu)
|
||||||
|
})
|
||||||
|
.skip(query.skip)
|
||||||
|
.take(query.limit);
|
||||||
|
|
||||||
let bufs = bufs.iter().map(|buf| buf.iter());
|
Ok((count, pdus))
|
||||||
|
}
|
||||||
|
|
||||||
let results = set::intersection(bufs).cloned().collect();
|
// result is modeled as a stream such that callers don't have to be refactored
|
||||||
|
// though an additional async/wrap still exists for now
|
||||||
|
#[implement(Service)]
|
||||||
|
pub async fn search_pdu_ids(&self, query: &RoomQuery<'_>) -> Result<impl Stream<Item = RawPduId> + Send + '_> {
|
||||||
|
let shortroomid = self.services.short.get_shortroomid(query.room_id).await?;
|
||||||
|
|
||||||
Some((results, words))
|
let pdu_ids = self.search_pdu_ids_query_room(query, shortroomid).await;
|
||||||
|
|
||||||
|
let iters = pdu_ids.into_iter().map(IntoIterator::into_iter);
|
||||||
|
|
||||||
|
Ok(set::intersection(iters).stream())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[implement(Service)]
|
||||||
|
async fn search_pdu_ids_query_room(&self, query: &RoomQuery<'_>, shortroomid: ShortRoomId) -> Vec<Vec<RawPduId>> {
|
||||||
|
tokenize(&query.criteria.search_term)
|
||||||
|
.stream()
|
||||||
|
.then(|word| async move {
|
||||||
|
self.search_pdu_ids_query_words(shortroomid, &word)
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterate over PduId's containing a word
|
||||||
|
#[implement(Service)]
|
||||||
|
fn search_pdu_ids_query_words<'a>(
|
||||||
|
&'a self, shortroomid: ShortRoomId, word: &'a str,
|
||||||
|
) -> impl Stream<Item = RawPduId> + Send + '_ {
|
||||||
|
self.search_pdu_ids_query_word(shortroomid, word)
|
||||||
|
.ready_filter_map(move |key| {
|
||||||
|
key[prefix_len(word)..]
|
||||||
|
.chunks_exact(PduId::LEN)
|
||||||
|
.next()
|
||||||
|
.map(RawPduId::try_from)
|
||||||
|
.and_then(Result::ok)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterate over raw database results for a word
|
||||||
|
#[implement(Service)]
|
||||||
|
fn search_pdu_ids_query_word(&self, shortroomid: ShortRoomId, word: &str) -> impl Stream<Item = Val<'_>> + Send + '_ {
|
||||||
|
const PDUID_LEN: usize = PduId::LEN;
|
||||||
|
// rustc says const'ing this not yet stable
|
||||||
|
let end_id: ArrayVec<u8, PDUID_LEN> = iter::repeat(u8::MAX).take(PduId::LEN).collect();
|
||||||
|
|
||||||
|
// Newest pdus first
|
||||||
|
let end = make_tokenid(shortroomid, word, end_id.as_slice());
|
||||||
|
let prefix = make_prefix(shortroomid, word);
|
||||||
|
self.db
|
||||||
|
.tokenids
|
||||||
|
.rev_raw_keys_from(&end)
|
||||||
|
.ignore_err()
|
||||||
|
.ready_take_while(move |key| key.starts_with(&prefix))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Splits a string into tokens used as keys in the search inverted index
|
/// Splits a string into tokens used as keys in the search inverted index
|
||||||
|
@ -119,6 +191,28 @@ pub async fn search_pdus(&self, room_id: &RoomId, search_string: &str) -> Option
|
||||||
fn tokenize(body: &str) -> impl Iterator<Item = String> + Send + '_ {
|
fn tokenize(body: &str) -> impl Iterator<Item = String> + Send + '_ {
|
||||||
body.split_terminator(|c: char| !c.is_alphanumeric())
|
body.split_terminator(|c: char| !c.is_alphanumeric())
|
||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
.filter(|word| word.len() <= 50)
|
.filter(|word| word.len() <= WORD_MAX_LEN)
|
||||||
.map(str::to_lowercase)
|
.map(str::to_lowercase)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn make_tokenid(shortroomid: ShortRoomId, word: &str, pdu_id: &[u8]) -> TokenId {
|
||||||
|
debug_assert!(pdu_id.len() == PduId::LEN, "pdu_id size mismatch");
|
||||||
|
|
||||||
|
let mut key = make_prefix(shortroomid, word);
|
||||||
|
key.extend_from_slice(pdu_id);
|
||||||
|
key
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_prefix(shortroomid: ShortRoomId, word: &str) -> TokenId {
|
||||||
|
let mut key = TokenId::new();
|
||||||
|
key.extend_from_slice(&shortroomid.to_be_bytes());
|
||||||
|
key.extend_from_slice(word.as_bytes());
|
||||||
|
key.push(database::SEP);
|
||||||
|
key
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prefix_len(word: &str) -> usize {
|
||||||
|
size_of::<ShortRoomId>()
|
||||||
|
.saturating_add(word.len())
|
||||||
|
.saturating_add(1)
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue