diff --git a/iroh-net/src/netcheck/reportgen.rs b/iroh-net/src/netcheck/reportgen.rs index c9cbb10610..8cd20b0c40 100644 --- a/iroh-net/src/netcheck/reportgen.rs +++ b/iroh-net/src/netcheck/reportgen.rs @@ -35,6 +35,7 @@ use tokio::{ }; use tokio_util::task::AbortOnDropHandle; use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument, Span}; +use url::Host; use super::NetcheckMetrics; use crate::{ @@ -42,7 +43,7 @@ use crate::{ dns::{DnsResolver, ResolverExt}, netcheck::{self, Report}, ping::{PingError, Pinger}, - relay::{RelayMap, RelayNode, RelayUrl}, + relay::{http::RELAY_PROBE_PATH, RelayMap, RelayNode, RelayUrl}, stun, util::MaybeFuture, }; @@ -466,6 +467,7 @@ impl Actor { .as_ref() .and_then(|l| l.preferred_relay.clone()); + let dns_resolver = self.dns_resolver.clone(); let dm = self.relay_map.clone(); self.outstanding_tasks.captive_task = true; MaybeFuture { @@ -474,7 +476,7 @@ impl Actor { debug!("Captive portal check started after {CAPTIVE_PORTAL_DELAY:?}"); let captive_portal_check = tokio::time::timeout( CAPTIVE_PORTAL_TIMEOUT, - check_captive_portal(&dm, preferred_relay) + check_captive_portal(&dns_resolver, &dm, preferred_relay) .instrument(debug_span!("captive-portal")), ); match captive_portal_check.await { @@ -743,7 +745,7 @@ async fn run_probe( } Probe::Https { ref node, .. } => { debug!("sending probe HTTPS"); - match measure_https_latency(node).await { + match measure_https_latency(&dns_resolver, node, None).await { Ok((latency, ip)) => { result.latency = Some(latency); // We set these IPv4 and IPv6 but they're not really used @@ -853,7 +855,11 @@ async fn run_stun_probe( /// return a "204 No Content" response and checking if that's what we get. /// /// The boolean return is whether we think we have a captive portal. -async fn check_captive_portal(dm: &RelayMap, preferred_relay: Option) -> Result { +async fn check_captive_portal( + dns_resolver: &DnsResolver, + dm: &RelayMap, + preferred_relay: Option, +) -> Result { // If we have a preferred relay node and we can use it for non-STUN requests, try that; // otherwise, pick a random one suitable for non-STUN requests. let preferred_relay = preferred_relay.and_then(|url| match dm.get_node(&url) { @@ -881,9 +887,23 @@ async fn check_captive_portal(dm: &RelayMap, preferred_relay: Option) } }; - let client = reqwest::ClientBuilder::new() - .redirect(reqwest::redirect::Policy::none()) - .build()?; + let mut builder = reqwest::ClientBuilder::new().redirect(reqwest::redirect::Policy::none()); + if let Some(Host::Domain(domain)) = url.host() { + // Use our own resolver rather than getaddrinfo + // + // For some reason reqwest wants SocketAddr rather than IpAddr but then proceeds to + // ignore the port, extracting it from the URL instead. We supply a dummy port. + // + // Ideally we would try to resolve **both** IPv4 and IPv6 rather than purely race + // them. But our resolver doesn't support that yet. + let addrs: Vec<_> = dns_resolver + .lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS) + .await? + .map(|ipaddr| SocketAddr::new(ipaddr, 80)) + .collect(); + builder = builder.resolve_to_addrs(domain, &addrs); + } + let client = builder.build()?; // Note: the set of valid characters in a challenge and the total // length is limited; see is_challenge_char in bin/iroh-relay for more @@ -1023,33 +1043,72 @@ async fn run_icmp_probe( Ok(report) } +/// Executes an HTTPS probe. +/// +/// If `certs` is provided they will be added to the trusted root certificates, allowing the +/// use of self-signed certificates for servers. Currently this is used for testing. #[allow(clippy::unused_async)] -async fn measure_https_latency(_node: &RelayNode) -> Result<(Duration, IpAddr)> { - bail!("not implemented"); - // TODO: - // - needs relayhttp::Client - // - measurement hooks to measure server processing time - - // metricHTTPSend.Add(1) - // let ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout); - // let dc := relayhttp.NewNetcheckClient(c.logf); - // let tlsConn, tcpConn, node := dc.DialRegionTLS(ctx, reg)?; - // if ta, ok := tlsConn.RemoteAddr().(*net.TCPAddr); - // req, err := http.NewRequestWithContext(ctx, "GET", "https://"+node.HostName+"/relay/latency-check", nil); - // resp, err := hc.Do(req); - - // // relays should give us a nominal status code, so anything else is probably - // // an access denied by a MITM proxy (or at the very least a signal not to - // // trust this latency check). - // if resp.StatusCode > 299 { - // return 0, ip, fmt.Errorf("unexpected status code: %d (%s)", resp.StatusCode, resp.Status) - // } - // _, err = io.Copy(io.Discard, io.LimitReader(resp.Body, 8<<10)); - // result.End(c.timeNow()) - - // // TODO: decide best timing heuristic here. - // // Maybe the server should return the tcpinfo_rtt? - // return result.ServerProcessing, ip, nil +async fn measure_https_latency( + dns_resolver: &DnsResolver, + node: &RelayNode, + certs: Option>>, +) -> Result<(Duration, IpAddr)> { + let url = node.url.join(RELAY_PROBE_PATH)?; + + // This should also use same connection establishment as relay client itself, which + // needs to be more configurable so users can do more crazy things: + // https://github.com/n0-computer/iroh/issues/2901 + let mut builder = reqwest::ClientBuilder::new().redirect(reqwest::redirect::Policy::none()); + if let Some(Host::Domain(domain)) = url.host() { + // Use our own resolver rather than getaddrinfo + // + // For some reason reqwest wants SocketAddr rather than IpAddr but then proceeds to + // ignore the port, extracting it from the URL instead. We supply a dummy port. + // + // The relay Client uses `.lookup_ipv4_ipv6` to connect, so use the same function + // but staggered for reliability. Ideally this tries to resolve **both** IPv4 and + // IPv6 though. But our resolver does not have a function for that yet. + let addrs: Vec<_> = dns_resolver + .lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS) + .await? + .map(|ipaddr| SocketAddr::new(ipaddr, 80)) + .collect(); + builder = builder.resolve_to_addrs(domain, &addrs); + } + if let Some(certs) = certs { + for cert in certs { + let cert = reqwest::Certificate::from_der(&cert)?; + builder = builder.add_root_certificate(cert); + } + } + let client = builder.build()?; + + let start = Instant::now(); + let mut response = client.request(reqwest::Method::GET, url).send().await?; + let latency = start.elapsed(); + if response.status().is_success() { + // Drain the response body to be nice to the server, up to a limit. + const MAX_BODY_SIZE: usize = 8 << 10; // 8 KiB + let mut body_size = 0; + while let Some(chunk) = response.chunk().await? { + body_size += chunk.len(); + if body_size >= MAX_BODY_SIZE { + break; + } + } + + // Only `None` if a different hyper HttpConnector in the request. + let remote_ip = response + .remote_addr() + .context("missing HttpInfo from HttpConnector")? + .ip(); + Ok((latency, remote_ip)) + } else { + Err(anyhow!( + "Error response from server: '{}'", + response.status().canonical_reason().unwrap_or_default() + )) + } } /// Updates a netcheck [`Report`] with a new [`ProbeReport`]. @@ -1118,8 +1177,13 @@ fn update_report(report: &mut Report, probe_report: ProbeReport) { mod tests { use std::net::{Ipv4Addr, Ipv6Addr}; + use testresult::TestResult; + use super::*; - use crate::defaults::staging::{default_eu_relay_node, default_na_relay_node}; + use crate::{ + defaults::staging::{default_eu_relay_node, default_na_relay_node}, + test_utils, + }; #[test] fn test_update_report_stun_working() { @@ -1368,4 +1432,28 @@ mod tests { panic!("Ping error: {err:#}"); } } + + #[tokio::test] + async fn test_measure_https_latency() -> TestResult { + let _logging_guard = iroh_test::logging::setup(); + let (_relay_map, relay_url, server) = test_utils::run_relay_server().await?; + let dns_resolver = crate::dns::resolver(); + warn!(?relay_url, "RELAY_URL"); + let node = RelayNode { + stun_only: false, + stun_port: 0, + url: relay_url.clone(), + }; + let (latency, ip) = + measure_https_latency(dns_resolver, &node, server.certificates()).await?; + + assert!(latency > Duration::ZERO); + + let relay_url_ip = relay_url + .host_str() + .context("host")? + .parse::()?; + assert_eq!(ip, relay_url_ip); + Ok(()) + } } diff --git a/iroh-net/src/relay/http.rs b/iroh-net/src/relay/http.rs index 25474e8567..f291eb15af 100644 --- a/iroh-net/src/relay/http.rs +++ b/iroh-net/src/relay/http.rs @@ -10,17 +10,12 @@ pub(crate) const SUPPORTED_WEBSOCKET_VERSION: &str = "13"; /// (over websockets and a custom upgrade protocol). pub const RELAY_PATH: &str = "/relay"; /// The HTTP path under which the relay allows doing latency queries for testing. -pub const RELAY_PROBE_PATH: &str = "/relay/probe"; +pub const RELAY_PROBE_PATH: &str = "/ping"; /// The legacy HTTP path under which the relay used to accept relaying connections. /// We keep this for backwards compatibility. #[cfg(feature = "iroh-relay")] // legacy paths only used on server-side for backwards compat #[cfg_attr(iroh_docsrs, doc(cfg(feature = "iroh-relay")))] pub(crate) const LEGACY_RELAY_PATH: &str = "/derp"; -/// The legacy HTTP path under which the relay used to allow latency queries. -/// We keep this for backwards compatibility. -#[cfg(feature = "iroh-relay")] // legacy paths only used on server-side for backwards compat -#[cfg_attr(iroh_docsrs, doc(cfg(feature = "iroh-relay")))] -pub(crate) const LEGACY_RELAY_PROBE_PATH: &str = "/derp/probe"; /// The HTTP upgrade protocol used for relaying. #[derive(Debug, Copy, Clone, PartialEq, Eq)] diff --git a/iroh-net/src/relay/server.rs b/iroh-net/src/relay/server.rs index 6fce6f318f..5c0c0fcdee 100644 --- a/iroh-net/src/relay/server.rs +++ b/iroh-net/src/relay/server.rs @@ -8,6 +8,13 @@ //! always attached to a handle and when the handle is dropped the tasks abort. So tasks //! can not outlive their handle. It is also always possible to await for completion of a //! task. Some tasks additionally have a method to do graceful shutdown. +//! +//! The relay server hosts the following services: +//! +//! - HTTPS `/relay`: The main URL endpoint to which clients connect and sends traffic over. +//! - HTTPS `/ping`: Used for netcheck probes. +//! - HTTPS `/generate_204`: Used for netcheck probes. +//! - STUN: UDP port for STUN requests/responses. use std::{fmt, future::Future, net::SocketAddr, pin::Pin, sync::Arc}; @@ -27,10 +34,7 @@ use tokio::{ use tokio_util::task::AbortOnDropHandle; use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument}; -use crate::{ - relay::http::{LEGACY_RELAY_PROBE_PATH, RELAY_PROBE_PATH}, - stun, -}; +use crate::{relay::http::RELAY_PROBE_PATH, stun}; pub(crate) mod actor; pub(crate) mod client_conn; @@ -182,6 +186,11 @@ pub struct Server { relay_handle: Option, /// The main task running the server. supervisor: AbortOnDropHandle>, + /// The certificate for the server. + /// + /// If the server has manual certificates configured the certificate chain will be + /// available here, this can be used by a client to authenticate the server. + certificates: Option>>, } impl Server { @@ -227,7 +236,13 @@ impl Server { None => None, }; - // Start the Relay server. + // Start the Relay server, but first clone the certs out. + let certificates = config.relay.as_ref().and_then(|relay| { + relay.tls.as_ref().and_then(|tls| match tls.cert { + CertConfig::LetsEncrypt { .. } => None, + CertConfig::Manual { ref certs, .. } => Some(certs.clone()), + }) + }); let (relay_server, http_addr) = match config.relay { Some(relay_config) => { debug!("Starting Relay server"); @@ -243,11 +258,6 @@ impl Server { .headers(headers) .request_handler(Method::GET, "/", Box::new(root_handler)) .request_handler(Method::GET, "/index.html", Box::new(root_handler)) - .request_handler( - Method::GET, - LEGACY_RELAY_PROBE_PATH, - Box::new(probe_handler), - ) // backwards compat .request_handler(Method::GET, RELAY_PROBE_PATH, Box::new(probe_handler)) .request_handler(Method::GET, "/robots.txt", Box::new(robots_handler)); let http_addr = match relay_config.tls { @@ -284,7 +294,7 @@ impl Server { } CertConfig::Manual { private_key, certs } => { let server_config = - server_config.with_single_cert(certs.clone(), private_key)?; + server_config.with_single_cert(certs, private_key)?; let server_config = Arc::new(server_config); let acceptor = tokio_rustls::TlsAcceptor::from(server_config.clone()); @@ -336,6 +346,7 @@ impl Server { https_addr: http_addr.and(relay_addr), relay_handle, supervisor: AbortOnDropHandle::new(task), + certificates, }) } @@ -373,6 +384,11 @@ impl Server { pub fn stun_addr(&self) -> Option { self.stun_addr } + + /// The certificates chain if configured with manual TLS certificates. + pub fn certificates(&self) -> Option>> { + self.certificates.clone() + } } /// Supervisor for the relay server tasks. diff --git a/iroh-net/src/test_utils.rs b/iroh-net/src/test_utils.rs index 0761d0121a..b080a7dace 100644 --- a/iroh-net/src/test_utils.rs +++ b/iroh-net/src/test_utils.rs @@ -44,7 +44,9 @@ pub async fn run_relay_server() -> Result<(RelayMap, RelayUrl, Server)> { pub async fn run_relay_server_with( stun: Option, ) -> Result<(RelayMap, RelayUrl, Server)> { - let cert = rcgen::generate_simple_self_signed(vec!["localhost".to_string()]).unwrap(); + let cert = + rcgen::generate_simple_self_signed(vec!["localhost".to_string(), "127.0.0.1".to_string()]) + .expect("valid"); let rustls_cert = rustls::pki_types::CertificateDer::from(cert.serialize_der().unwrap()); let private_key = rustls::pki_types::PrivatePkcs8KeyDer::from(cert.get_key_pair().serialize_der()); @@ -67,7 +69,7 @@ pub async fn run_relay_server_with( metrics_addr: None, }; let server = Server::spawn(config).await.unwrap(); - let url: RelayUrl = format!("https://localhost:{}", server.https_addr().unwrap().port()) + let url: RelayUrl = format!("https://{}", server.https_addr().expect("configured")) .parse() .unwrap(); let m = RelayMap::from_nodes([RelayNode {