remove hardcoded 300kb limit on spider size with config option of 1MB default

modern websites are sadly massive, 300kb is pretty low. 1MB should be enough.

Signed-off-by: strawberry <strawberry@puppygock.gay>
This commit is contained in:
strawberry 2024-02-10 13:29:12 -05:00 committed by June
parent 2ea895199a
commit 48e4b71dd1
4 changed files with 19 additions and 5 deletions

View file

@ -342,14 +342,13 @@ async fn download_image(client: &reqwest::Client, url: &str) -> Result<UrlPrevie
}
async fn download_html(client: &reqwest::Client, url: &str) -> Result<UrlPreviewData> {
let max_download_size = 300_000; // TODO: is this bytes? kilobytes? megabytes?
let mut response = client.get(url).send().await?;
let mut bytes: Vec<u8> = Vec::new();
while let Some(chunk) = response.chunk().await? {
bytes.extend_from_slice(&chunk);
if bytes.len() > max_download_size {
if bytes.len() > services().globals.url_preview_max_spider_size() {
debug!("Response body from URL {} exceeds url_preview_max_spider_size ({}), not processing the rest of the response body and assuming our necessary data is in this range.", url, services().globals.url_preview_max_spider_size());
break;
}
}