Skip to content

Commit 91d44dc

Browse files
authored
feat(iroh-net): Implement the https probe (#2903)
## Description This implements the https probe. There does not need to be separate IPv4 or IPv6 HTTPS probes, because we just go with whatever the HTTPS connection works over. This is what we need to establish a relay connection after all, this also does not care. The only reason for this is to get some kind of latency to be able to chose a relay server. Luckily the https probes have been integrated in the probe plan a long time so nothing to do there. ## Breaking Changes The URLs served by the relay changed: - `/relay/probe` has moved to `/ping` - `/derp/probe` has been removed. Unless you were manually using those URLs you will not notice these changes, nothing in the iroh codebase ever used the changed URLs. ## Notes & open questions Currently this uses reqwest with it's default DNS resolution. This is not great as it is not the same DNS resolution used everywhere else. It is however the same as done in the captive portal check. This is worth at least looking at before merging this. ## Change checklist - [x] Use common DNS resolver. - [x] Update URL - [x] Write docs for URLs of relay server. - [x] Self-review. - [x] Documentation updates following the [style guide](https://rust-lang.github.io/rfcs/1574-more-api-documentation-conventions.html#appendix-a-full-conventions-text), if relevant. - [x] Tests if relevant. - [x] All breaking changes documented.
1 parent d6a32f4 commit 91d44dc

File tree

4 files changed

+154
-53
lines changed

4 files changed

+154
-53
lines changed

iroh-net/src/netcheck/reportgen.rs

Lines changed: 122 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,15 @@ use tokio::{
3535
};
3636
use tokio_util::task::AbortOnDropHandle;
3737
use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument, Span};
38+
use url::Host;
3839

3940
use super::NetcheckMetrics;
4041
use crate::{
4142
defaults::DEFAULT_STUN_PORT,
4243
dns::{DnsResolver, ResolverExt},
4344
netcheck::{self, Report},
4445
ping::{PingError, Pinger},
45-
relay::{RelayMap, RelayNode, RelayUrl},
46+
relay::{http::RELAY_PROBE_PATH, RelayMap, RelayNode, RelayUrl},
4647
stun,
4748
util::MaybeFuture,
4849
};
@@ -466,6 +467,7 @@ impl Actor {
466467
.as_ref()
467468
.and_then(|l| l.preferred_relay.clone());
468469

470+
let dns_resolver = self.dns_resolver.clone();
469471
let dm = self.relay_map.clone();
470472
self.outstanding_tasks.captive_task = true;
471473
MaybeFuture {
@@ -474,7 +476,7 @@ impl Actor {
474476
debug!("Captive portal check started after {CAPTIVE_PORTAL_DELAY:?}");
475477
let captive_portal_check = tokio::time::timeout(
476478
CAPTIVE_PORTAL_TIMEOUT,
477-
check_captive_portal(&dm, preferred_relay)
479+
check_captive_portal(&dns_resolver, &dm, preferred_relay)
478480
.instrument(debug_span!("captive-portal")),
479481
);
480482
match captive_portal_check.await {
@@ -743,7 +745,7 @@ async fn run_probe(
743745
}
744746
Probe::Https { ref node, .. } => {
745747
debug!("sending probe HTTPS");
746-
match measure_https_latency(node).await {
748+
match measure_https_latency(&dns_resolver, node, None).await {
747749
Ok((latency, ip)) => {
748750
result.latency = Some(latency);
749751
// We set these IPv4 and IPv6 but they're not really used
@@ -853,7 +855,11 @@ async fn run_stun_probe(
853855
/// return a "204 No Content" response and checking if that's what we get.
854856
///
855857
/// The boolean return is whether we think we have a captive portal.
856-
async fn check_captive_portal(dm: &RelayMap, preferred_relay: Option<RelayUrl>) -> Result<bool> {
858+
async fn check_captive_portal(
859+
dns_resolver: &DnsResolver,
860+
dm: &RelayMap,
861+
preferred_relay: Option<RelayUrl>,
862+
) -> Result<bool> {
857863
// If we have a preferred relay node and we can use it for non-STUN requests, try that;
858864
// otherwise, pick a random one suitable for non-STUN requests.
859865
let preferred_relay = preferred_relay.and_then(|url| match dm.get_node(&url) {
@@ -881,9 +887,23 @@ async fn check_captive_portal(dm: &RelayMap, preferred_relay: Option<RelayUrl>)
881887
}
882888
};
883889

884-
let client = reqwest::ClientBuilder::new()
885-
.redirect(reqwest::redirect::Policy::none())
886-
.build()?;
890+
let mut builder = reqwest::ClientBuilder::new().redirect(reqwest::redirect::Policy::none());
891+
if let Some(Host::Domain(domain)) = url.host() {
892+
// Use our own resolver rather than getaddrinfo
893+
//
894+
// For some reason reqwest wants SocketAddr rather than IpAddr but then proceeds to
895+
// ignore the port, extracting it from the URL instead. We supply a dummy port.
896+
//
897+
// Ideally we would try to resolve **both** IPv4 and IPv6 rather than purely race
898+
// them. But our resolver doesn't support that yet.
899+
let addrs: Vec<_> = dns_resolver
900+
.lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS)
901+
.await?
902+
.map(|ipaddr| SocketAddr::new(ipaddr, 80))
903+
.collect();
904+
builder = builder.resolve_to_addrs(domain, &addrs);
905+
}
906+
let client = builder.build()?;
887907

888908
// Note: the set of valid characters in a challenge and the total
889909
// length is limited; see is_challenge_char in bin/iroh-relay for more
@@ -1023,33 +1043,72 @@ async fn run_icmp_probe(
10231043
Ok(report)
10241044
}
10251045

1046+
/// Executes an HTTPS probe.
1047+
///
1048+
/// If `certs` is provided they will be added to the trusted root certificates, allowing the
1049+
/// use of self-signed certificates for servers. Currently this is used for testing.
10261050
#[allow(clippy::unused_async)]
1027-
async fn measure_https_latency(_node: &RelayNode) -> Result<(Duration, IpAddr)> {
1028-
bail!("not implemented");
1029-
// TODO:
1030-
// - needs relayhttp::Client
1031-
// - measurement hooks to measure server processing time
1032-
1033-
// metricHTTPSend.Add(1)
1034-
// let ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout);
1035-
// let dc := relayhttp.NewNetcheckClient(c.logf);
1036-
// let tlsConn, tcpConn, node := dc.DialRegionTLS(ctx, reg)?;
1037-
// if ta, ok := tlsConn.RemoteAddr().(*net.TCPAddr);
1038-
// req, err := http.NewRequestWithContext(ctx, "GET", "https://"+node.HostName+"/relay/latency-check", nil);
1039-
// resp, err := hc.Do(req);
1040-
1041-
// // relays should give us a nominal status code, so anything else is probably
1042-
// // an access denied by a MITM proxy (or at the very least a signal not to
1043-
// // trust this latency check).
1044-
// if resp.StatusCode > 299 {
1045-
// return 0, ip, fmt.Errorf("unexpected status code: %d (%s)", resp.StatusCode, resp.Status)
1046-
// }
1047-
// _, err = io.Copy(io.Discard, io.LimitReader(resp.Body, 8<<10));
1048-
// result.End(c.timeNow())
1049-
1050-
// // TODO: decide best timing heuristic here.
1051-
// // Maybe the server should return the tcpinfo_rtt?
1052-
// return result.ServerProcessing, ip, nil
1051+
async fn measure_https_latency(
1052+
dns_resolver: &DnsResolver,
1053+
node: &RelayNode,
1054+
certs: Option<Vec<rustls::pki_types::CertificateDer<'static>>>,
1055+
) -> Result<(Duration, IpAddr)> {
1056+
let url = node.url.join(RELAY_PROBE_PATH)?;
1057+
1058+
// This should also use same connection establishment as relay client itself, which
1059+
// needs to be more configurable so users can do more crazy things:
1060+
// https://github.com/n0-computer/iroh/issues/2901
1061+
let mut builder = reqwest::ClientBuilder::new().redirect(reqwest::redirect::Policy::none());
1062+
if let Some(Host::Domain(domain)) = url.host() {
1063+
// Use our own resolver rather than getaddrinfo
1064+
//
1065+
// For some reason reqwest wants SocketAddr rather than IpAddr but then proceeds to
1066+
// ignore the port, extracting it from the URL instead. We supply a dummy port.
1067+
//
1068+
// The relay Client uses `.lookup_ipv4_ipv6` to connect, so use the same function
1069+
// but staggered for reliability. Ideally this tries to resolve **both** IPv4 and
1070+
// IPv6 though. But our resolver does not have a function for that yet.
1071+
let addrs: Vec<_> = dns_resolver
1072+
.lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS)
1073+
.await?
1074+
.map(|ipaddr| SocketAddr::new(ipaddr, 80))
1075+
.collect();
1076+
builder = builder.resolve_to_addrs(domain, &addrs);
1077+
}
1078+
if let Some(certs) = certs {
1079+
for cert in certs {
1080+
let cert = reqwest::Certificate::from_der(&cert)?;
1081+
builder = builder.add_root_certificate(cert);
1082+
}
1083+
}
1084+
let client = builder.build()?;
1085+
1086+
let start = Instant::now();
1087+
let mut response = client.request(reqwest::Method::GET, url).send().await?;
1088+
let latency = start.elapsed();
1089+
if response.status().is_success() {
1090+
// Drain the response body to be nice to the server, up to a limit.
1091+
const MAX_BODY_SIZE: usize = 8 << 10; // 8 KiB
1092+
let mut body_size = 0;
1093+
while let Some(chunk) = response.chunk().await? {
1094+
body_size += chunk.len();
1095+
if body_size >= MAX_BODY_SIZE {
1096+
break;
1097+
}
1098+
}
1099+
1100+
// Only `None` if a different hyper HttpConnector in the request.
1101+
let remote_ip = response
1102+
.remote_addr()
1103+
.context("missing HttpInfo from HttpConnector")?
1104+
.ip();
1105+
Ok((latency, remote_ip))
1106+
} else {
1107+
Err(anyhow!(
1108+
"Error response from server: '{}'",
1109+
response.status().canonical_reason().unwrap_or_default()
1110+
))
1111+
}
10531112
}
10541113

10551114
/// Updates a netcheck [`Report`] with a new [`ProbeReport`].
@@ -1118,8 +1177,13 @@ fn update_report(report: &mut Report, probe_report: ProbeReport) {
11181177
mod tests {
11191178
use std::net::{Ipv4Addr, Ipv6Addr};
11201179

1180+
use testresult::TestResult;
1181+
11211182
use super::*;
1122-
use crate::defaults::staging::{default_eu_relay_node, default_na_relay_node};
1183+
use crate::{
1184+
defaults::staging::{default_eu_relay_node, default_na_relay_node},
1185+
test_utils,
1186+
};
11231187

11241188
#[test]
11251189
fn test_update_report_stun_working() {
@@ -1368,4 +1432,28 @@ mod tests {
13681432
panic!("Ping error: {err:#}");
13691433
}
13701434
}
1435+
1436+
#[tokio::test]
1437+
async fn test_measure_https_latency() -> TestResult {
1438+
let _logging_guard = iroh_test::logging::setup();
1439+
let (_relay_map, relay_url, server) = test_utils::run_relay_server().await?;
1440+
let dns_resolver = crate::dns::resolver();
1441+
warn!(?relay_url, "RELAY_URL");
1442+
let node = RelayNode {
1443+
stun_only: false,
1444+
stun_port: 0,
1445+
url: relay_url.clone(),
1446+
};
1447+
let (latency, ip) =
1448+
measure_https_latency(dns_resolver, &node, server.certificates()).await?;
1449+
1450+
assert!(latency > Duration::ZERO);
1451+
1452+
let relay_url_ip = relay_url
1453+
.host_str()
1454+
.context("host")?
1455+
.parse::<std::net::IpAddr>()?;
1456+
assert_eq!(ip, relay_url_ip);
1457+
Ok(())
1458+
}
13711459
}

iroh-net/src/relay/http.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,12 @@ pub(crate) const SUPPORTED_WEBSOCKET_VERSION: &str = "13";
1010
/// (over websockets and a custom upgrade protocol).
1111
pub const RELAY_PATH: &str = "/relay";
1212
/// The HTTP path under which the relay allows doing latency queries for testing.
13-
pub const RELAY_PROBE_PATH: &str = "/relay/probe";
13+
pub const RELAY_PROBE_PATH: &str = "/ping";
1414
/// The legacy HTTP path under which the relay used to accept relaying connections.
1515
/// We keep this for backwards compatibility.
1616
#[cfg(feature = "iroh-relay")] // legacy paths only used on server-side for backwards compat
1717
#[cfg_attr(iroh_docsrs, doc(cfg(feature = "iroh-relay")))]
1818
pub(crate) const LEGACY_RELAY_PATH: &str = "/derp";
19-
/// The legacy HTTP path under which the relay used to allow latency queries.
20-
/// We keep this for backwards compatibility.
21-
#[cfg(feature = "iroh-relay")] // legacy paths only used on server-side for backwards compat
22-
#[cfg_attr(iroh_docsrs, doc(cfg(feature = "iroh-relay")))]
23-
pub(crate) const LEGACY_RELAY_PROBE_PATH: &str = "/derp/probe";
2419

2520
/// The HTTP upgrade protocol used for relaying.
2621
#[derive(Debug, Copy, Clone, PartialEq, Eq)]

iroh-net/src/relay/server.rs

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
//! always attached to a handle and when the handle is dropped the tasks abort. So tasks
99
//! can not outlive their handle. It is also always possible to await for completion of a
1010
//! task. Some tasks additionally have a method to do graceful shutdown.
11+
//!
12+
//! The relay server hosts the following services:
13+
//!
14+
//! - HTTPS `/relay`: The main URL endpoint to which clients connect and sends traffic over.
15+
//! - HTTPS `/ping`: Used for netcheck probes.
16+
//! - HTTPS `/generate_204`: Used for netcheck probes.
17+
//! - STUN: UDP port for STUN requests/responses.
1118
1219
use std::{fmt, future::Future, net::SocketAddr, pin::Pin, sync::Arc};
1320

@@ -27,10 +34,7 @@ use tokio::{
2734
use tokio_util::task::AbortOnDropHandle;
2835
use tracing::{debug, error, info, info_span, instrument, trace, warn, Instrument};
2936

30-
use crate::{
31-
relay::http::{LEGACY_RELAY_PROBE_PATH, RELAY_PROBE_PATH},
32-
stun,
33-
};
37+
use crate::{relay::http::RELAY_PROBE_PATH, stun};
3438

3539
pub(crate) mod actor;
3640
pub(crate) mod client_conn;
@@ -182,6 +186,11 @@ pub struct Server {
182186
relay_handle: Option<http_server::ServerHandle>,
183187
/// The main task running the server.
184188
supervisor: AbortOnDropHandle<Result<()>>,
189+
/// The certificate for the server.
190+
///
191+
/// If the server has manual certificates configured the certificate chain will be
192+
/// available here, this can be used by a client to authenticate the server.
193+
certificates: Option<Vec<rustls::pki_types::CertificateDer<'static>>>,
185194
}
186195

187196
impl Server {
@@ -227,7 +236,13 @@ impl Server {
227236
None => None,
228237
};
229238

230-
// Start the Relay server.
239+
// Start the Relay server, but first clone the certs out.
240+
let certificates = config.relay.as_ref().and_then(|relay| {
241+
relay.tls.as_ref().and_then(|tls| match tls.cert {
242+
CertConfig::LetsEncrypt { .. } => None,
243+
CertConfig::Manual { ref certs, .. } => Some(certs.clone()),
244+
})
245+
});
231246
let (relay_server, http_addr) = match config.relay {
232247
Some(relay_config) => {
233248
debug!("Starting Relay server");
@@ -243,11 +258,6 @@ impl Server {
243258
.headers(headers)
244259
.request_handler(Method::GET, "/", Box::new(root_handler))
245260
.request_handler(Method::GET, "/index.html", Box::new(root_handler))
246-
.request_handler(
247-
Method::GET,
248-
LEGACY_RELAY_PROBE_PATH,
249-
Box::new(probe_handler),
250-
) // backwards compat
251261
.request_handler(Method::GET, RELAY_PROBE_PATH, Box::new(probe_handler))
252262
.request_handler(Method::GET, "/robots.txt", Box::new(robots_handler));
253263
let http_addr = match relay_config.tls {
@@ -284,7 +294,7 @@ impl Server {
284294
}
285295
CertConfig::Manual { private_key, certs } => {
286296
let server_config =
287-
server_config.with_single_cert(certs.clone(), private_key)?;
297+
server_config.with_single_cert(certs, private_key)?;
288298
let server_config = Arc::new(server_config);
289299
let acceptor =
290300
tokio_rustls::TlsAcceptor::from(server_config.clone());
@@ -336,6 +346,7 @@ impl Server {
336346
https_addr: http_addr.and(relay_addr),
337347
relay_handle,
338348
supervisor: AbortOnDropHandle::new(task),
349+
certificates,
339350
})
340351
}
341352

@@ -373,6 +384,11 @@ impl Server {
373384
pub fn stun_addr(&self) -> Option<SocketAddr> {
374385
self.stun_addr
375386
}
387+
388+
/// The certificates chain if configured with manual TLS certificates.
389+
pub fn certificates(&self) -> Option<Vec<rustls::pki_types::CertificateDer<'static>>> {
390+
self.certificates.clone()
391+
}
376392
}
377393

378394
/// Supervisor for the relay server tasks.

iroh-net/src/test_utils.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ pub async fn run_relay_server() -> Result<(RelayMap, RelayUrl, Server)> {
4444
pub async fn run_relay_server_with(
4545
stun: Option<StunConfig>,
4646
) -> Result<(RelayMap, RelayUrl, Server)> {
47-
let cert = rcgen::generate_simple_self_signed(vec!["localhost".to_string()]).unwrap();
47+
let cert =
48+
rcgen::generate_simple_self_signed(vec!["localhost".to_string(), "127.0.0.1".to_string()])
49+
.expect("valid");
4850
let rustls_cert = rustls::pki_types::CertificateDer::from(cert.serialize_der().unwrap());
4951
let private_key =
5052
rustls::pki_types::PrivatePkcs8KeyDer::from(cert.get_key_pair().serialize_der());
@@ -67,7 +69,7 @@ pub async fn run_relay_server_with(
6769
metrics_addr: None,
6870
};
6971
let server = Server::spawn(config).await.unwrap();
70-
let url: RelayUrl = format!("https://localhost:{}", server.https_addr().unwrap().port())
72+
let url: RelayUrl = format!("https://{}", server.https_addr().expect("configured"))
7173
.parse()
7274
.unwrap();
7375
let m = RelayMap::from_nodes([RelayNode {

0 commit comments

Comments
 (0)