Merge branch 'LemmyNet:main' into dev
This commit is contained in:
commit
18fe8773c8
10 changed files with 154 additions and 22 deletions
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "activitypub_federation"
|
||||
version = "0.6.1"
|
||||
version = "0.6.2"
|
||||
edition = "2021"
|
||||
description = "High-level Activitypub framework"
|
||||
keywords = ["activitypub", "activitystreams", "federation", "fediverse"]
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ A high-level framework for [ActivityPub](https://www.w3.org/TR/activitypub/) fed
|
|||
|
||||
The ActivityPub protocol is a decentralized social networking protocol. It allows web servers to exchange data using JSON over HTTP. Data can be fetched on demand, and also delivered directly to inboxes for live updates.
|
||||
|
||||
While Activitypub is not in widespread use yet, is has the potential to form the basis of the next generation of social media. This is because it has a number of major advantages compared to existing platforms and alternative technologies:
|
||||
Activitypub has the potential to form the basis of the next generation of social media. This is because it has a number of major advantages compared to existing platforms and alternative technologies:
|
||||
|
||||
- **Interoperability**: Imagine being able to comment under a Youtube video directly from twitter.com, and having the comment shown under the video on youtube.com. Or following a Subreddit from Facebook. Such functionality is already available on the equivalent Fediverse platforms, thanks to common usage of Activitypub.
|
||||
- **Ease of use**: From a user perspective, decentralized social media works almost identically to existing websites: a website with email and password based login. Unlike pure peer-to-peer networks, it is not necessary to handle private keys or install any local software.
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use crate::{
|
|||
};
|
||||
use error::Error;
|
||||
use std::{env::args, str::FromStr};
|
||||
use tokio::try_join;
|
||||
use tracing::log::{info, LevelFilter};
|
||||
|
||||
mod activities;
|
||||
|
|
@ -34,8 +35,10 @@ async fn main() -> Result<(), Error> {
|
|||
.map(|arg| Webserver::from_str(&arg).unwrap())
|
||||
.unwrap_or(Webserver::Axum);
|
||||
|
||||
let alpha = new_instance("localhost:8001", "alpha".to_string()).await?;
|
||||
let beta = new_instance("localhost:8002", "beta".to_string()).await?;
|
||||
let (alpha, beta) = try_join!(
|
||||
new_instance("localhost:8001", "alpha".to_string()),
|
||||
new_instance("localhost:8002", "beta".to_string())
|
||||
)?;
|
||||
listen(&alpha, &webserver)?;
|
||||
listen(&beta, &webserver)?;
|
||||
info!("Local instances started");
|
||||
|
|
|
|||
|
|
@ -26,11 +26,14 @@ use bytes::Bytes;
|
|||
use derive_builder::Builder;
|
||||
use dyn_clone::{clone_trait_object, DynClone};
|
||||
use moka::future::Cache;
|
||||
use reqwest::Request;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use reqwest::{redirect::Policy, Client, Request};
|
||||
use reqwest_middleware::{ClientWithMiddleware, RequestBuilder};
|
||||
use rsa::{pkcs8::DecodePrivateKey, RsaPrivateKey};
|
||||
use serde::de::DeserializeOwned;
|
||||
use std::{
|
||||
net::IpAddr,
|
||||
ops::Deref,
|
||||
sync::{
|
||||
atomic::{AtomicU32, Ordering},
|
||||
|
|
@ -38,6 +41,7 @@ use std::{
|
|||
},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::net::lookup_host;
|
||||
use url::Url;
|
||||
|
||||
/// Configuration for this library, with various federation related settings
|
||||
|
|
@ -54,9 +58,14 @@ pub struct FederationConfig<T: Clone> {
|
|||
/// [crate::fetch::object_id::ObjectId] for more details.
|
||||
#[builder(default = "20")]
|
||||
pub(crate) http_fetch_limit: u32,
|
||||
#[builder(default = "reqwest::Client::default().into()")]
|
||||
/// HTTP client used for all outgoing requests. Middleware can be used to add functionality
|
||||
/// like log tracing or retry of failed requests.
|
||||
#[builder(default = "default_client()")]
|
||||
/// HTTP client used for all outgoing requests. When passing a custom client here you should
|
||||
/// also disable redirects and set timeouts.
|
||||
///
|
||||
/// Middleware can be used to add functionality like log tracing or retry of failed requests.
|
||||
/// Redirects are disabled by default, because automatic redirect URLs can't be validated.
|
||||
/// Instead a single redirect is handled manually. The default client sets a timeout of 10s
|
||||
/// to avoid excessive resource usage when connecting to dead servers.
|
||||
pub(crate) client: ClientWithMiddleware,
|
||||
/// Run library in debug mode. This allows usage of http and localhost urls. It also sends
|
||||
/// outgoing activities synchronously, not in background thread. This helps to make tests
|
||||
|
|
@ -105,6 +114,9 @@ pub struct FederationConfig<T: Clone> {
|
|||
pub(crate) queue_retry_count: usize,
|
||||
}
|
||||
|
||||
pub(crate) static DOMAIN_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"^[a-zA-Z0-9.-]*$").expect("compile regex"));
|
||||
|
||||
impl<T: Clone> FederationConfig<T> {
|
||||
/// Returns a new config builder with default values.
|
||||
pub fn builder() -> FederationConfigBuilder<T> {
|
||||
|
|
@ -159,17 +171,56 @@ impl<T: Clone> FederationConfig<T> {
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
if url.domain().is_none() {
|
||||
let Some(domain) = url.domain() else {
|
||||
return Err(Error::UrlVerificationError("Url must have a domain"));
|
||||
};
|
||||
if !DOMAIN_REGEX.is_match(domain) {
|
||||
return Err(Error::UrlVerificationError("Invalid characters in domain"));
|
||||
}
|
||||
|
||||
if url.domain() == Some("localhost") && !self.debug {
|
||||
// Extra checks only for production mode
|
||||
if !self.debug {
|
||||
if url.port().is_some() {
|
||||
return Err(Error::UrlVerificationError("Explicit port is not allowed"));
|
||||
}
|
||||
|
||||
// Resolve domain and see if it points to private IP
|
||||
// TODO: Use is_global() once stabilized
|
||||
// https://doc.rust-lang.org/std/net/enum.IpAddr.html#method.is_global
|
||||
let invalid_ip =
|
||||
lookup_host((domain.to_owned(), 80))
|
||||
.await?
|
||||
.any(|addr| match addr.ip() {
|
||||
IpAddr::V4(addr) => {
|
||||
addr.is_private()
|
||||
|| addr.is_link_local()
|
||||
|| addr.is_loopback()
|
||||
|| addr.is_multicast()
|
||||
}
|
||||
IpAddr::V6(addr) => {
|
||||
addr.is_loopback()
|
||||
|| addr.is_multicast()
|
||||
|| ((addr.segments()[0] & 0xfe00) == 0xfc00) // is_unique_local
|
||||
|| ((addr.segments()[0] & 0xffc0) == 0xfe80) // is_unicast_link_local
|
||||
}
|
||||
});
|
||||
if invalid_ip {
|
||||
return Err(Error::UrlVerificationError(
|
||||
"Localhost is only allowed in debug mode",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// It is valid but uncommon for domains to end with `.` char. Drop this so it cant be used
|
||||
// to bypass domain blocklist. Avoid cloning url in common case.
|
||||
if domain.ends_with('.') {
|
||||
let mut url = url.clone();
|
||||
let domain = &domain[0..domain.len() - 1];
|
||||
url.set_host(Some(domain))?;
|
||||
self.url_verifier.verify(&url).await?;
|
||||
} else {
|
||||
self.url_verifier.verify(url).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -370,6 +421,17 @@ impl<T: Clone> FederationMiddleware<T> {
|
|||
}
|
||||
}
|
||||
|
||||
fn default_client() -> ClientWithMiddleware {
|
||||
let timeout = Duration::from_secs(10);
|
||||
Client::builder()
|
||||
.redirect(Policy::none())
|
||||
.timeout(timeout)
|
||||
.connect_timeout(timeout)
|
||||
.build()
|
||||
.unwrap_or_else(|_| Client::default())
|
||||
.into()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
mod test {
|
||||
|
|
|
|||
|
|
@ -78,6 +78,9 @@ pub enum Error {
|
|||
/// Attempted to fetch object but the response's id field doesn't match
|
||||
#[error("Attempted to fetch object from {0} but the response's id field doesn't match")]
|
||||
FetchWrongId(Url),
|
||||
/// I/O error from OS
|
||||
#[error(transparent)]
|
||||
IoError(#[from] std::io::Error),
|
||||
/// Other generic errors
|
||||
#[error("{0}")]
|
||||
Other(String),
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ use crate::{
|
|||
FEDERATION_CONTENT_TYPE,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use http::{HeaderValue, StatusCode};
|
||||
use http::{header::LOCATION, HeaderValue, StatusCode};
|
||||
use serde::de::DeserializeOwned;
|
||||
use std::sync::atomic::Ordering;
|
||||
use tracing::info;
|
||||
|
|
@ -59,7 +59,7 @@ pub async fn fetch_object_http<T: Clone, Kind: DeserializeOwned>(
|
|||
r#"application/ld+json; profile="https://www.w3.org/ns/activitystreams""#, // activitypub standard
|
||||
r#"application/activity+json; charset=utf-8"#, // mastodon
|
||||
];
|
||||
let res = fetch_object_http_with_accept(url, data, &FETCH_CONTENT_TYPE).await?;
|
||||
let res = fetch_object_http_with_accept(url, data, &FETCH_CONTENT_TYPE, false).await?;
|
||||
|
||||
// Ensure correct content-type to prevent vulnerabilities, with case insensitive comparison.
|
||||
let content_type = res
|
||||
|
|
@ -74,6 +74,7 @@ pub async fn fetch_object_http<T: Clone, Kind: DeserializeOwned>(
|
|||
// Ensure id field matches final url after redirect
|
||||
if res.object_id.as_ref() != Some(&res.url) {
|
||||
if let Some(res_object_id) = res.object_id {
|
||||
data.config.verify_url_valid(&res_object_id).await?;
|
||||
// If id is different but still on the same domain, attempt to request object
|
||||
// again from url in id field.
|
||||
if res_object_id.domain() == res.url.domain() {
|
||||
|
|
@ -99,6 +100,7 @@ async fn fetch_object_http_with_accept<T: Clone, Kind: DeserializeOwned>(
|
|||
url: &Url,
|
||||
data: &Data<T>,
|
||||
content_type: &HeaderValue,
|
||||
recursive: bool,
|
||||
) -> Result<FetchObjectResponse<Kind>, Error> {
|
||||
let config = &data.config;
|
||||
config.verify_url_valid(url).await?;
|
||||
|
|
@ -131,6 +133,19 @@ async fn fetch_object_http_with_accept<T: Clone, Kind: DeserializeOwned>(
|
|||
req.send().await?
|
||||
};
|
||||
|
||||
// Allow a single redirect using recursion. Further redirects are ignored.
|
||||
let location = res.headers().get(LOCATION).and_then(|l| l.to_str().ok());
|
||||
if let (Some(location), false) = (location, recursive) {
|
||||
let location = location.parse()?;
|
||||
return Box::pin(fetch_object_http_with_accept(
|
||||
&location,
|
||||
data,
|
||||
content_type,
|
||||
true,
|
||||
))
|
||||
.await;
|
||||
}
|
||||
|
||||
if res.status() == StatusCode::GONE {
|
||||
return Err(Error::ObjectDeleted(url.clone()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -360,6 +360,7 @@ const _IMPL_DIESEL_NEW_TYPE_FOR_OBJECT_ID: () = {
|
|||
}
|
||||
};
|
||||
|
||||
/// Internal only
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
#[allow(missing_docs)]
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::{
|
||||
config::Data,
|
||||
config::{Data, DOMAIN_REGEX},
|
||||
error::Error,
|
||||
fetch::{fetch_object_http_with_accept, object_id::ObjectId},
|
||||
traits::{Actor, Object},
|
||||
|
|
@ -53,21 +53,31 @@ where
|
|||
.splitn(2, '@')
|
||||
.collect_tuple()
|
||||
.ok_or(WebFingerError::WrongFormat.into_crate_error())?;
|
||||
|
||||
// For production mode make sure that domain doesnt contain any port or path.
|
||||
if !data.config.debug && !DOMAIN_REGEX.is_match(domain) {
|
||||
return Err(Error::UrlVerificationError("Invalid characters in domain").into());
|
||||
}
|
||||
|
||||
let protocol = if data.config.debug { "http" } else { "https" };
|
||||
let fetch_url =
|
||||
format!("{protocol}://{domain}/.well-known/webfinger?resource=acct:{identifier}");
|
||||
debug!("Fetching webfinger url: {}", &fetch_url);
|
||||
|
||||
let res: Webfinger = fetch_object_http_with_accept(
|
||||
let res = fetch_object_http_with_accept::<_, Webfinger>(
|
||||
&Url::parse(&fetch_url).map_err(Error::UrlParse)?,
|
||||
data,
|
||||
&WEBFINGER_CONTENT_TYPE,
|
||||
false,
|
||||
)
|
||||
.await?
|
||||
.object;
|
||||
.await?;
|
||||
if res.url.as_str() != fetch_url {
|
||||
data.config.verify_url_valid(&res.url).await?;
|
||||
}
|
||||
|
||||
debug_assert_eq!(res.subject, format!("acct:{identifier}"));
|
||||
debug_assert_eq!(res.object.subject, format!("acct:{identifier}"));
|
||||
let links: Vec<Url> = res
|
||||
.object
|
||||
.links
|
||||
.iter()
|
||||
.filter(|link| {
|
||||
|
|
|
|||
|
|
@ -276,6 +276,7 @@ pub(crate) fn verify_body_hash(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Internal only
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
#[allow(missing_docs)]
|
||||
|
|
@ -379,6 +380,7 @@ pub mod test {
|
|||
assert_eq!(invalid, Err(Error::ActivityBodyDigestInvalid));
|
||||
}
|
||||
|
||||
/// Internal only, return hardcoded keypair for testing
|
||||
pub fn test_keypair() -> Keypair {
|
||||
let rsa = RsaPrivateKey::from_pkcs1_pem(PRIVATE_KEY).unwrap();
|
||||
let pkey = RsaPublicKey::from(&rsa);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
//! Verify that received data is valid
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::{config::Data, error::Error, fetch::object_id::ObjectId, traits::Object};
|
||||
use serde::Deserialize;
|
||||
use url::Url;
|
||||
|
||||
/// Check that both urls have the same domain. If not, return UrlVerificationError.
|
||||
|
|
@ -36,3 +37,38 @@ pub fn verify_urls_match(a: &Url, b: &Url) -> Result<(), Error> {
|
|||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check that the given ID doesn't match the local domain.
|
||||
///
|
||||
/// It is important to verify this to avoid local objects from being overwritten. In general
|
||||
/// locally created objects should be considered authorative, while incoming federated data
|
||||
/// is untrusted. Lack of such a check could allow an attacker to rewrite local posts. It could
|
||||
/// also result in an `object.local` field being overwritten with `false` for local objects, resulting in invalid data.
|
||||
///
|
||||
/// ```
|
||||
/// # use activitypub_federation::fetch::object_id::ObjectId;
|
||||
/// # use activitypub_federation::config::FederationConfig;
|
||||
/// # use activitypub_federation::protocol::verification::verify_is_remote_object;
|
||||
/// # use activitypub_federation::traits::tests::{DbConnection, DbUser};
|
||||
/// # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
/// # let config = FederationConfig::builder().domain("example.com").app_data(DbConnection).build().await?;
|
||||
/// # let data = config.to_request_data();
|
||||
/// let id = ObjectId::<DbUser>::parse("https://remote.com/u/name")?;
|
||||
/// assert!(verify_is_remote_object(&id, &data).is_ok());
|
||||
/// # Ok::<(), anyhow::Error>(())
|
||||
/// # }).unwrap();
|
||||
/// ```
|
||||
pub fn verify_is_remote_object<Kind, R: Clone>(
|
||||
id: &ObjectId<Kind>,
|
||||
data: &Data<<Kind as Object>::DataType>,
|
||||
) -> Result<(), Error>
|
||||
where
|
||||
Kind: Object<DataType = R> + Send + 'static,
|
||||
for<'de2> <Kind as Object>::Kind: Deserialize<'de2>,
|
||||
{
|
||||
if id.is_local(data) {
|
||||
Err(Error::UrlVerificationError("Object is not remote"))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue