Skip to content

Commit 1698a7c

Browse files
authored
Merge feca9ef into 911d7a6
2 parents 911d7a6 + feca9ef commit 1698a7c

File tree

4 files changed

+154
-53
lines changed

4 files changed

+154
-53
lines changed

iroh-net/src/netcheck/reportgen.rs

Lines changed: 122 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,15 @@ use tokio::{
3535
};
3636
use tokio_util::task::AbortOnDropHandle;
3737
use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument, Span};
38+
use url::Host;
3839

3940
use super::NetcheckMetrics;
4041
use crate::{
4142
defaults::DEFAULT_STUN_PORT,
4243
dns::{DnsResolver, ResolverExt},
4344
netcheck::{self, Report},
4445
ping::{PingError, Pinger},
45-
relay::{RelayMap, RelayNode, RelayUrl},
46+
relay::{http::RELAY_PROBE_PATH, RelayMap, RelayNode, RelayUrl},
4647
stun,
4748
util::MaybeFuture,
4849
};
@@ -466,6 +467,7 @@ impl Actor {
466467
.as_ref()
467468
.and_then(|l| l.preferred_relay.clone());
468469

470+
let dns_resolver = self.dns_resolver.clone();
469471
let dm = self.relay_map.clone();
470472
self.outstanding_tasks.captive_task = true;
471473
MaybeFuture {
@@ -474,7 +476,7 @@ impl Actor {
474476
debug!("Captive portal check started after {CAPTIVE_PORTAL_DELAY:?}");
475477
let captive_portal_check = tokio::time::timeout(
476478
CAPTIVE_PORTAL_TIMEOUT,
477-
check_captive_portal(&dm, preferred_relay)
479+
check_captive_portal(&dns_resolver, &dm, preferred_relay)
478480
.instrument(debug_span!("captive-portal")),
479481
);
480482
match captive_portal_check.await {
@@ -743,7 +745,7 @@ async fn run_probe(
743745
}
744746
Probe::Https { ref node, .. } => {
745747
debug!("sending probe HTTPS");
746-
match measure_https_latency(node).await {
748+
match measure_https_latency(&dns_resolver, node, None).await {
747749
Ok((latency, ip)) => {
748750
result.latency = Some(latency);
749751
// We set these IPv4 and IPv6 but they're not really used
@@ -853,7 +855,11 @@ async fn run_stun_probe(
853855
/// return a "204 No Content" response and checking if that's what we get.
854856
///
855857
/// The boolean return is whether we think we have a captive portal.
856-
async fn check_captive_portal(dm: &RelayMap, preferred_relay: Option<RelayUrl>) -> Result<bool> {
858+
async fn check_captive_portal(
859+
dns_resolver: &DnsResolver,
860+
dm: &RelayMap,
861+
preferred_relay: Option<RelayUrl>,
862+
) -> Result<bool> {
857863
// If we have a preferred relay node and we can use it for non-STUN requests, try that;
858864
// otherwise, pick a random one suitable for non-STUN requests.
859865
let preferred_relay = preferred_relay.and_then(|url| match dm.get_node(&url) {
@@ -881,9 +887,23 @@ async fn check_captive_portal(dm: &RelayMap, preferred_relay: Option<RelayUrl>)
881887
}
882888
};
883889

884-
let client = reqwest::ClientBuilder::new()
885-
.redirect(reqwest::redirect::Policy::none())
886-
.build()?;
890+
let mut builder = reqwest::ClientBuilder::new().redirect(reqwest::redirect::Policy::none());
891+
if let Some(Host::Domain(domain)) = url.host() {
892+
// Use our own resolver rather than getaddrinfo
893+
//
894+
// For some reason reqwest wants SocketAddr rather than IpAddr but then proceeds to
895+
// ignore the port, extracting it from the URL instead. We supply a dummy port.
896+
//
897+
// Ideally we would try to resolve **both** IPv4 and IPv6 rather than purely race
898+
// them. But our resolver doesn't support that yet.
899+
let addrs: Vec<_> = dns_resolver
900+
.lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS)
901+
.await?
902+
.map(|ipaddr| SocketAddr::new(ipaddr, 80))
903+
.collect();
904+
builder = builder.resolve_to_addrs(domain, &addrs);
905+
}
906+
let client = builder.build()?;
887907

888908
// Note: the set of valid characters in a challenge and the total
889909
// length is limited; see is_challenge_char in bin/iroh-relay for more
@@ -1023,33 +1043,72 @@ async fn run_icmp_probe(
10231043
Ok(report)
10241044
}
10251045

1046+
/// Executes an HTTPS probe.
1047+
///
1048+
/// If `certs` is provided they will be added to the trusted root certificates, allowing the
1049+
/// use of self-signed certificates for servers. Currently this is used for testing.
10261050
#[allow(clippy::unused_async)]
1027-
async fn measure_https_latency(_node: &RelayNode) -> Result<(Duration, IpAddr)> {
1028-
bail!("not implemented");
1029-
// TODO:
1030-
// - needs relayhttp::Client
1031-
// - measurement hooks to measure server processing time
1032-
1033-
// metricHTTPSend.Add(1)
1034-
// let ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout);
1035-
// let dc := relayhttp.NewNetcheckClient(c.logf);
1036-
// let tlsConn, tcpConn, node := dc.DialRegionTLS(ctx, reg)?;
1037-
// if ta, ok := tlsConn.RemoteAddr().(*net.TCPAddr);
1038-
// req, err := http.NewRequestWithContext(ctx, "GET", "https://"+node.HostName+"/relay/latency-check", nil);
1039-
// resp, err := hc.Do(req);
1040-
1041-
// // relays should give us a nominal status code, so anything else is probably
1042-
// // an access denied by a MITM proxy (or at the very least a signal not to
1043-
// // trust this latency check).
1044-
// if resp.StatusCode > 299 {
1045-
// return 0, ip, fmt.Errorf("unexpected status code: %d (%s)", resp.StatusCode, resp.Status)
1046-
// }
1047-
// _, err = io.Copy(io.Discard, io.LimitReader(resp.Body, 8<<10));
1048-
// result.End(c.timeNow())
1049-
1050-
// // TODO: decide best timing heuristic here.
1051-
// // Maybe the server should return the tcpinfo_rtt?
1052-
// return result.ServerProcessing, ip, nil
1051+
async fn measure_https_latency(
1052+
dns_resolver: &DnsResolver,
1053+
node: &RelayNode,
1054+
certs: Option<Vec<rustls::pki_types::CertificateDer<'static>>>,
1055+
) -> Result<(Duration, IpAddr)> {
1056+
let url = node.url.join(RELAY_PROBE_PATH)?;
1057+
1058+
// This should also use same connection establishment as relay client itself, which
1059+
// needs to be more configurable so users can do more crazy things:
1060+
// https://github.com/n0-computer/iroh/issues/2901
1061+
let mut builder = reqwest::ClientBuilder::new().redirect(reqwest::redirect::Policy::none());
1062+
if let Some(Host::Domain(domain)) = url.host() {
1063+
// Use our own resolver rather than getaddrinfo
1064+
//
1065+
// For some reason reqwest wants SocketAddr rather than IpAddr but then proceeds to
1066+
// ignore the port, extracting it from the URL instead. We supply a dummy port.
1067+
//
1068+
// The relay Client uses `.lookup_ipv4_ipv6` to connect, so use the same function
1069+
// but staggered for reliability. Ideally this tries to resolve **both** IPv4 and
1070+
// IPv6 though. But our resolver does not have a function for that yet.
1071+
let addrs: Vec<_> = dns_resolver
1072+
.lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS)
1073+
.await?
1074+
.map(|ipaddr| SocketAddr::new(ipaddr, 80))
1075+
.collect();
1076+
builder = builder.resolve_to_addrs(domain, &addrs);
1077+
}
1078+
if let Some(certs) = certs {
1079+
for cert in certs {
1080+
let cert = reqwest::Certificate::from_der(&cert)?;
1081+
builder = builder.add_root_certificate(cert);
1082+
}
1083+
}
1084+
let client = builder.build()?;
1085+
1086+
let start = Instant::now();
1087+
let mut response = client.request(reqwest::Method::GET, url).send().await?;
1088+
let latency = start.elapsed();
1089+
if response.status().is_success() {
1090+
// Drain the response body to be nice to the server, up to a limit.
1091+
const MAX_BODY_SIZE: usize = 8 << 10; // 8 KiB
1092+
let mut body_size = 0;
1093+
while let Some(chunk) = response.chunk().await? {
1094+
body_size += chunk.len();
1095+
if body_size >= MAX_BODY_SIZE {
1096+
break;
1097+
}
1098+
}
1099+
1100+
// Only `None` if a different hyper HttpConnector in the request.
1101+
let remote_ip = response
1102+
.remote_addr()
1103+
.context("missing HttpInfo from HttpConnector")?
1104+
.ip();
1105+
Ok((latency, remote_ip))
1106+
} else {
1107+
Err(anyhow!(
1108+
"Error response from server: '{}'",
1109+
response.status().canonical_reason().unwrap_or_default()
1110+
))
1111+
}
10531112
}
10541113

10551114
/// Updates a netcheck [`Report`] with a new [`ProbeReport`].
@@ -1118,8 +1177,13 @@ fn update_report(report: &mut Report, probe_report: ProbeReport) {
11181177
mod tests {
11191178
use std::net::{Ipv4Addr, Ipv6Addr};
11201179

1180+
use testresult::TestResult;
1181+
11211182
use super::*;
1122-
use crate::defaults::staging::{default_eu_relay_node, default_na_relay_node};
1183+
use crate::{
1184+
defaults::staging::{default_eu_relay_node, default_na_relay_node},
1185+
test_utils,
1186+
};
11231187

11241188
#[test]
11251189
fn test_update_report_stun_working() {
@@ -1368,4 +1432,28 @@ mod tests {
13681432
panic!("Ping error: {err:#}");
13691433
}
13701434
}
1435+
1436+
#[tokio::test]
1437+
async fn test_measure_https_latency() -> TestResult {
1438+
let _logging_guard = iroh_test::logging::setup();
1439+
let (_relay_map, relay_url, server) = test_utils::run_relay_server().await?;
1440+
let dns_resolver = crate::dns::resolver();
1441+
warn!(?relay_url, "RELAY_URL");
1442+
let node = RelayNode {
1443+
stun_only: false,
1444+
stun_port: 0,
1445+
url: relay_url.clone(),
1446+
};
1447+
let (latency, ip) =
1448+
measure_https_latency(dns_resolver, &node, server.certificates()).await?;
1449+
1450+
assert!(latency > Duration::ZERO);
1451+
1452+
let relay_url_ip = relay_url
1453+
.host_str()
1454+
.context("host")?
1455+
.parse::<std::net::IpAddr>()?;
1456+
assert_eq!(ip, relay_url_ip);
1457+
Ok(())
1458+
}
13711459
}

iroh-net/src/relay/http.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,12 @@ pub(crate) const SUPPORTED_WEBSOCKET_VERSION: &str = "13";
1010
/// (over websockets and a custom upgrade protocol).
1111
pub const RELAY_PATH: &str = "/relay";
1212
/// The HTTP path under which the relay allows doing latency queries for testing.
13-
pub const RELAY_PROBE_PATH: &str = "/relay/probe";
13+
pub const RELAY_PROBE_PATH: &str = "/ping";
1414
/// The legacy HTTP path under which the relay used to accept relaying connections.
1515
/// We keep this for backwards compatibility.
1616
#[cfg(feature = "iroh-relay")] // legacy paths only used on server-side for backwards compat
1717
#[cfg_attr(iroh_docsrs, doc(cfg(feature = "iroh-relay")))]
1818
pub(crate) const LEGACY_RELAY_PATH: &str = "/derp";
19-
/// The legacy HTTP path under which the relay used to allow latency queries.
20-
/// We keep this for backwards compatibility.
21-
#[cfg(feature = "iroh-relay")] // legacy paths only used on server-side for backwards compat
22-
#[cfg_attr(iroh_docsrs, doc(cfg(feature = "iroh-relay")))]
23-
pub(crate) const LEGACY_RELAY_PROBE_PATH: &str = "/derp/probe";
2419

2520
/// The HTTP upgrade protocol used for relaying.
2621
#[derive(Debug, Copy, Clone, PartialEq, Eq)]

iroh-net/src/relay/server.rs

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
//! always attached to a handle and when the handle is dropped the tasks abort. So tasks
99
//! can not outlive their handle. It is also always possible to await for completion of a
1010
//! task. Some tasks additionally have a method to do graceful shutdown.
11+
//!
12+
//! The relay server hosts the following services:
13+
//!
14+
//! - HTTPS `/relay`: The main URL endpoint to which clients connect and sends traffic over.
15+
//! - HTTPS `/ping`: Used for netcheck probes.
16+
//! - HTTPS `/generate_204`: Used for netcheck probes.
17+
//! - STUN: UDP port for STUN requests/responses.
1118
1219
use std::{fmt, future::Future, net::SocketAddr, pin::Pin, sync::Arc};
1320

@@ -27,10 +34,7 @@ use tokio::{
2734
use tokio_util::task::AbortOnDropHandle;
2835
use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument};
2936

30-
use crate::{
31-
relay::http::{LEGACY_RELAY_PROBE_PATH, RELAY_PROBE_PATH},
32-
stun,
33-
};
37+
use crate::{relay::http::RELAY_PROBE_PATH, stun};
3438

3539
pub(crate) mod actor;
3640
pub(crate) mod client_conn;
@@ -182,6 +186,11 @@ pub struct Server {
182186
relay_handle: Option<http_server::ServerHandle>,
183187
/// The main task running the server.
184188
supervisor: AbortOnDropHandle<Result<()>>,
189+
/// The certificate for the server.
190+
///
191+
/// If the server has manual certificates configured the certificate chain will be
192+
/// available here, this can be used by a client to authenticate the server.
193+
certificates: Option<Vec<rustls::pki_types::CertificateDer<'static>>>,
185194
}
186195

187196
impl Server {
@@ -227,7 +236,13 @@ impl Server {
227236
None => None,
228237
};
229238

230-
// Start the Relay server.
239+
// Start the Relay server, but first clone the certs out.
240+
let certificates = config.relay.as_ref().and_then(|relay| {
241+
relay.tls.as_ref().and_then(|tls| match tls.cert {
242+
CertConfig::LetsEncrypt { .. } => None,
243+
CertConfig::Manual { ref certs, .. } => Some(certs.clone()),
244+
})
245+
});
231246
let (relay_server, http_addr) = match config.relay {
232247
Some(relay_config) => {
233248
debug!("Starting Relay server");
@@ -243,11 +258,6 @@ impl Server {
243258
.headers(headers)
244259
.request_handler(Method::GET, "/", Box::new(root_handler))
245260
.request_handler(Method::GET, "/index.html", Box::new(root_handler))
246-
.request_handler(
247-
Method::GET,
248-
LEGACY_RELAY_PROBE_PATH,
249-
Box::new(probe_handler),
250-
) // backwards compat
251261
.request_handler(Method::GET, RELAY_PROBE_PATH, Box::new(probe_handler))
252262
.request_handler(Method::GET, "/robots.txt", Box::new(robots_handler));
253263
let http_addr = match relay_config.tls {
@@ -284,7 +294,7 @@ impl Server {
284294
}
285295
CertConfig::Manual { private_key, certs } => {
286296
let server_config =
287-
server_config.with_single_cert(certs.clone(), private_key)?;
297+
server_config.with_single_cert(certs, private_key)?;
288298
let server_config = Arc::new(server_config);
289299
let acceptor =
290300
tokio_rustls::TlsAcceptor::from(server_config.clone());
@@ -336,6 +346,7 @@ impl Server {
336346
https_addr: http_addr.and(relay_addr),
337347
relay_handle,
338348
supervisor: AbortOnDropHandle::new(task),
349+
certificates,
339350
})
340351
}
341352

@@ -373,6 +384,11 @@ impl Server {
373384
pub fn stun_addr(&self) -> Option<SocketAddr> {
374385
self.stun_addr
375386
}
387+
388+
/// The certificates chain if configured with manual TLS certificates.
389+
pub fn certificates(&self) -> Option<Vec<rustls::pki_types::CertificateDer<'static>>> {
390+
self.certificates.clone()
391+
}
376392
}
377393

378394
/// Supervisor for the relay server tasks.

iroh-net/src/test_utils.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ pub async fn run_relay_server() -> Result<(RelayMap, RelayUrl, Server)> {
4444
pub async fn run_relay_server_with(
4545
stun: Option<StunConfig>,
4646
) -> Result<(RelayMap, RelayUrl, Server)> {
47-
let cert = rcgen::generate_simple_self_signed(vec!["localhost".to_string()]).unwrap();
47+
let cert =
48+
rcgen::generate_simple_self_signed(vec!["localhost".to_string(), "127.0.0.1".to_string()])
49+
.expect("valid");
4850
let rustls_cert = rustls::pki_types::CertificateDer::from(cert.serialize_der().unwrap());
4951
let private_key =
5052
rustls::pki_types::PrivatePkcs8KeyDer::from(cert.get_key_pair().serialize_der());
@@ -67,7 +69,7 @@ pub async fn run_relay_server_with(
6769
metrics_addr: None,
6870
};
6971
let server = Server::spawn(config).await.unwrap();
70-
let url: RelayUrl = format!("https://localhost:{}", server.https_addr().unwrap().port())
72+
let url: RelayUrl = format!("https://{}", server.https_addr().expect("configured"))
7173
.parse()
7274
.unwrap();
7375
let m = RelayMap::from_nodes([RelayNode {

0 commit comments

Comments
 (0)