add timeout, comment

prevent infinite recursion
Merge branch 'main' into more-url-validation
2025-01-23 11:08:39 +01:00 · 2025-01-22 15:09:51 +01:00 · 2025-01-21 12:14:15 +00:00 · 2025-01-21 12:07:31 +00:00 · 2025-01-21 13:01:02 +01:00 · 2025-01-21 12:52:47 +01:00
5 changed files with 112 additions and 19 deletions
--- a/examples/local_federation/main.rs
+++ b/examples/local_federation/main.rs
@ -7,6 +7,7 @@ use crate::{
 };
 use error::Error;
 use std::{env::args, str::FromStr};
+use tokio::try_join;
 use tracing::log::{info, LevelFilter};

 mod activities;
@ -34,8 +35,10 @@ async fn main() -> Result<(), Error> {
        .map(|arg| Webserver::from_str(&arg).unwrap())
        .unwrap_or(Webserver::Axum);

-    let alpha = new_instance("localhost:8001", "alpha".to_string()).await?;
-    let beta = new_instance("localhost:8002", "beta".to_string()).await?;
+    let (alpha, beta) = try_join!(
+        new_instance("localhost:8001", "alpha".to_string()),
+        new_instance("localhost:8002", "beta".to_string())
+    )?;
    listen(&alpha, &webserver)?;
    listen(&beta, &webserver)?;
    info!("Local instances started");
--- a/src/config.rs
+++ b/src/config.rs
@ -26,11 +26,14 @@ use bytes::Bytes;
 use derive_builder::Builder;
 use dyn_clone::{clone_trait_object, DynClone};
 use moka::future::Cache;
-use reqwest::Request;
+use once_cell::sync::Lazy;
+use regex::Regex;
+use reqwest::{redirect::Policy, Client, Request};
 use reqwest_middleware::{ClientWithMiddleware, RequestBuilder};
 use rsa::{pkcs8::DecodePrivateKey, RsaPrivateKey};
 use serde::de::DeserializeOwned;
 use std::{
+    net::IpAddr,
    ops::Deref,
    sync::{
        atomic::{AtomicU32, Ordering},
@ -38,6 +41,7 @@ use std::{
    },
    time::Duration,
 };
+use tokio::net::lookup_host;
 use url::Url;

 /// Configuration for this library, with various federation related settings
@ -54,9 +58,14 @@ pub struct FederationConfig<T: Clone> {
    /// [crate::fetch::object_id::ObjectId] for more details.
    #[builder(default = "20")]
    pub(crate) http_fetch_limit: u32,
-    #[builder(default = "reqwest::Client::default().into()")]
-    /// HTTP client used for all outgoing requests. Middleware can be used to add functionality
-    /// like log tracing or retry of failed requests.
+    #[builder(default = "default_client()")]
+    /// HTTP client used for all outgoing requests. When passing a custom client here you should
+    /// also disable redirects and set timeouts.
+    ///
+    /// Middleware can be used to add functionality like log tracing or retry of failed requests.
+    /// Redirects are disabled by default, because automatic redirect URLs can't be validated.
+    /// Instead a single redirect is handled manually. The default client sets a timeout of 10s
+    ///  to avoid excessive resource usage when connecting to dead servers.
    pub(crate) client: ClientWithMiddleware,
    /// Run library in debug mode. This allows usage of http and localhost urls. It also sends
    /// outgoing activities synchronously, not in background thread. This helps to make tests
@ -105,6 +114,9 @@ pub struct FederationConfig<T: Clone> {
    pub(crate) queue_retry_count: usize,
 }

+pub(crate) static DOMAIN_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^[a-zA-Z0-9.-]*$").expect("compile regex"));
+
 impl<T: Clone> FederationConfig<T> {
    /// Returns a new config builder with default values.
    pub fn builder() -> FederationConfigBuilder<T> {
@ -159,17 +171,56 @@ impl<T: Clone> FederationConfig<T> {
            return Ok(());
        }

-        if url.domain().is_none() {
+        let Some(domain) = url.domain() else {
            return Err(Error::UrlVerificationError("Url must have a domain"));
+        };
+        if !DOMAIN_REGEX.is_match(domain) {
+            return Err(Error::UrlVerificationError("Invalid characters in domain"));
        }

-        if url.domain() == Some("localhost") && !self.debug {
+        // Extra checks only for production mode
+        if !self.debug {
+            if url.port().is_some() {
+                return Err(Error::UrlVerificationError("Explicit port is not allowed"));
+            }
+
+            // Resolve domain and see if it points to private IP
+            // TODO: Use is_global() once stabilized
+            //       https://doc.rust-lang.org/std/net/enum.IpAddr.html#method.is_global
+            let invalid_ip =
+                lookup_host((domain.to_owned(), 80))
+                    .await?
+                    .any(|addr| match addr.ip() {
+                        IpAddr::V4(addr) => {
+                            addr.is_private()
+                                || addr.is_link_local()
+                                || addr.is_loopback()
+                                || addr.is_multicast()
+                        }
+                        IpAddr::V6(addr) => {
+                            addr.is_loopback()
+                        || addr.is_multicast()
+                        || ((addr.segments()[0] & 0xfe00) == 0xfc00) // is_unique_local
+                        || ((addr.segments()[0] & 0xffc0) == 0xfe80) // is_unicast_link_local
+                        }
+                    });
+            if invalid_ip {
                return Err(Error::UrlVerificationError(
                    "Localhost is only allowed in debug mode",
                ));
            }
+        }

+        // It is valid but uncommon for domains to end with `.` char. Drop this so it cant be used
+        // to bypass domain blocklist. Avoid cloning url in common case.
+        if domain.ends_with('.') {
+            let mut url = url.clone();
+            let domain = &domain[0..domain.len() - 1];
+            url.set_host(Some(domain))?;
+            self.url_verifier.verify(&url).await?;
+        } else {
            self.url_verifier.verify(url).await?;
+        }

        Ok(())
    }
@ -370,6 +421,17 @@ impl<T: Clone> FederationMiddleware<T> {
    }
 }

+fn default_client() -> ClientWithMiddleware {
+    let timeout = Duration::from_secs(10);
+    Client::builder()
+        .redirect(Policy::none())
+        .timeout(timeout)
+        .connect_timeout(timeout)
+        .build()
+        .unwrap_or_else(|_| Client::default())
+        .into()
+}
+
 #[cfg(test)]
 #[allow(clippy::unwrap_used)]
 mod test {
--- a/src/error.rs
+++ b/src/error.rs
@ -78,6 +78,9 @@ pub enum Error {
    /// Attempted to fetch object but the response's id field doesn't match
    #[error("Attempted to fetch object from {0} but the response's id field doesn't match")]
    FetchWrongId(Url),
+    /// I/O error from OS
+    #[error(transparent)]
+    IoError(#[from] std::io::Error),
    /// Other generic errors
    #[error("{0}")]
    Other(String),
--- a/src/fetch/mod.rs
+++ b/src/fetch/mod.rs
@ -11,7 +11,7 @@ use crate::{
    FEDERATION_CONTENT_TYPE,
 };
 use bytes::Bytes;
-use http::{HeaderValue, StatusCode};
+use http::{header::LOCATION, HeaderValue, StatusCode};
 use serde::de::DeserializeOwned;
 use std::sync::atomic::Ordering;
 use tracing::info;
@ -59,7 +59,7 @@ pub async fn fetch_object_http<T: Clone, Kind: DeserializeOwned>(
        r#"application/ld+json; profile="https://www.w3.org/ns/activitystreams""#, // activitypub standard
        r#"application/activity+json; charset=utf-8"#,                             // mastodon
    ];
-    let res = fetch_object_http_with_accept(url, data, &FETCH_CONTENT_TYPE).await?;
+    let res = fetch_object_http_with_accept(url, data, &FETCH_CONTENT_TYPE, false).await?;

    // Ensure correct content-type to prevent vulnerabilities, with case insensitive comparison.
    let content_type = res
@ -74,6 +74,7 @@ pub async fn fetch_object_http<T: Clone, Kind: DeserializeOwned>(
    // Ensure id field matches final url after redirect
    if res.object_id.as_ref() != Some(&res.url) {
        if let Some(res_object_id) = res.object_id {
+            data.config.verify_url_valid(&res_object_id).await?;
            // If id is different but still on the same domain, attempt to request object
            // again from url in id field.
            if res_object_id.domain() == res.url.domain() {
@ -99,6 +100,7 @@ async fn fetch_object_http_with_accept<T: Clone, Kind: DeserializeOwned>(
    url: &Url,
    data: &Data<T>,
    content_type: &HeaderValue,
+    recursive: bool,
 ) -> Result<FetchObjectResponse<Kind>, Error> {
    let config = &data.config;
    config.verify_url_valid(url).await?;
@ -131,6 +133,19 @@ async fn fetch_object_http_with_accept<T: Clone, Kind: DeserializeOwned>(
        req.send().await?
    };

+    // Allow a single redirect using recursion. Further redirects are ignored.
+    let location = res.headers().get(LOCATION).and_then(|l| l.to_str().ok());
+    if let (Some(location), false) = (location, recursive) {
+        let location = location.parse()?;
+        return Box::pin(fetch_object_http_with_accept(
+            &location,
+            data,
+            content_type,
+            true,
+        ))
+        .await;
+    }
+
    if res.status() == StatusCode::GONE {
        return Err(Error::ObjectDeleted(url.clone()));
    }
--- a/src/fetch/webfinger.rs
+++ b/src/fetch/webfinger.rs
@ -1,5 +1,5 @@
 use crate::{
-    config::Data,
+    config::{Data, DOMAIN_REGEX},
    error::Error,
    fetch::{fetch_object_http_with_accept, object_id::ObjectId},
    traits::{Actor, Object},
@ -54,21 +54,31 @@ where
        .splitn(2, '@')
        .collect_tuple()
        .ok_or(WebFingerError::WrongFormat.into_crate_error())?;
+
+    // For production mode make sure that domain doesnt contain any port or path.
+    if !data.config.debug && !DOMAIN_REGEX.is_match(domain) {
+        return Err(Error::UrlVerificationError("Invalid characters in domain").into());
+    }
+
    let protocol = if data.config.debug { "http" } else { "https" };
    let fetch_url =
        format!("{protocol}://{domain}/.well-known/webfinger?resource=acct:{identifier}");
    debug!("Fetching webfinger url: {}", &fetch_url);

-    let res: Webfinger = fetch_object_http_with_accept(
+    let res = fetch_object_http_with_accept::<_, Webfinger>(
        &Url::parse(&fetch_url).map_err(Error::UrlParse)?,
        data,
        &WEBFINGER_CONTENT_TYPE,
+        false,
    )
-    .await?
-    .object;
+    .await?;
+    if res.url.as_str() != fetch_url {
+        data.config.verify_url_valid(&res.url).await?;
+    }

-    debug_assert_eq!(res.subject, format!("acct:{identifier}"));
+    debug_assert_eq!(res.object.subject, format!("acct:{identifier}"));
    let links: Vec<Url> = res
+        .object
        .links
        .iter()
        .filter(|link| {
Author	SHA1	Message	Date
Felix Ableitner	c8212cac1a	add timeout, comment	2025-01-23 11:08:39 +01:00
Felix Ableitner	7f2c303c36	prevent infinite recursion	2025-01-22 15:09:51 +01:00
Nutomic	f822d45c6a	Merge branch 'main' into more-url-validation	2025-01-21 12:14:15 +00:00
Nutomic	93991ba3b5	Merge branch 'main' into more-url-validation	2025-01-21 12:07:31 +00:00
Felix Ableitner	686da0f03a	Merge branch 'main' into more-url-validation	2025-01-21 13:01:02 +01:00
Felix Ableitner	4991c1f9d9	clippy	2025-01-21 12:52:47 +01:00
Felix Ableitner	a53d874a12	manual redirect handling	2025-01-21 12:36:40 +01:00
Felix Ableitner	bbb4a17263	fix	2025-01-13 15:44:21 +01:00
Felix Ableitner	ac094086cd	clippy	2025-01-13 15:34:06 +01:00
Felix Ableitner	531d4264cd	Remove trailing . from domain	2025-01-13 15:28:39 +01:00
Felix Ableitner	9413bad37a	fix lemmy test	2025-01-13 10:39:19 +01:00
Felix Ableitner	8601c5ef04	clippy	2025-01-13 10:27:38 +01:00
Felix Ableitner	dc82698cd5	more domain validation	2025-01-13 10:24:58 +01:00
Felix Ableitner	4e2c5c1196	clippy	2025-01-13 10:16:03 +01:00
Felix Ableitner	1208deafc8	Dont allow redirect for webfinger	2025-01-13 10:08:51 +01:00
Felix Ableitner	e07a9c0075	Verify url after redirect	2025-01-13 10:05:07 +01:00
Felix Ableitner	47bfde132c	more fix	2025-01-09 12:06:43 +01:00
Felix Ableitner	451d388833	fix	2025-01-09 11:56:29 +01:00
Felix Ableitner	e4ea9abdb7	Add more url validation	2025-01-09 11:45:33 +01:00