Skip to content

Commit 146f423

Browse files
authored
fix(iroh-dns-server): Fixes for packet expiry (#3297)
## Description Based on #3295 I noticed in the debug logs of our staging server that some entries in the expiry table were never deleted, and thus they were processed again and again on each expiry tick. This PR has the following changes: * Bugfix: When upserting a packet, we were not properly clearing the *old* expiry entry! [This line](https://github.com/n0-computer/iroh/blob/f3c9af35542a7f2e00303090d1ca5273f86bc375/iroh-dns-server/src/store/signed_packets.rs#L148) is wrong: It should be `existing.timestamp()`, because that's the entry we want to remove, not the (not-yet-existing) entry for the timestamp of the *new* packet. * Improve type handling and debug formatting around expiry timestamps, to make logs better readable and be less error prone to bad type conversions * When an expiry entry is being processed, and the corresponding packet does not exist anymore, the expiry entry is deleted ## Breaking Changes <!-- Optional, if there are any breaking changes document them, including how to migrate older code. --> ## Notes & open questions We need tests around expiry. This should've been caught earlier. There's currently only a [single test](https://github.com/n0-computer/iroh/blob/f3c9af35542a7f2e00303090d1ca5273f86bc375/iroh-dns-server/src/lib.rs#L190) around eviction, added in the PR that added eviction (#2997). But that test only tests that expiry works at all. There's no test currently that verifies if entries in the expiry table are removed when a packet is upserted, which is the test that would fail without this PR. ## Change checklist <!-- Remove any that are not relevant. --> - [x] Self-review. - [x] Documentation updates following the [style guide](https://rust-lang.github.io/rfcs/1574-more-api-documentation-conventions.html#appendix-a-full-conventions-text), if relevant. - Tests if relevant: Would be relevant, but needs to come later.
1 parent 74b8baa commit 146f423

File tree

4 files changed

+32
-16
lines changed

4 files changed

+32
-16
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

iroh-dns-server/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ dirs-next = "2.0.0"
2727
governor = "0.8"
2828
hickory-server = { version = "0.25.1", features = ["https-ring"] }
2929
http = "1.0.0"
30+
humantime = "2.2.0"
3031
humantime-serde = "1.1.1"
3132
iroh-metrics = { version = "0.34", features = ["service"] }
3233
lru = "0.12.3"

iroh-dns-server/examples/publish.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ async fn main() -> Result<()> {
105105
.with_direct_addresses(args.addr.into_iter().collect())
106106
.with_user_data(args.user_data);
107107
let signed_packet = node_info.to_pkarr_signed_packet(&secret_key, 30)?;
108+
tracing::debug!("signed packet: {signed_packet:?}");
108109
pkarr.publish(&signed_packet).await?;
109110

110111
println!("signed packet published.");

iroh-dns-server/src/store/signed_packets.rs

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
use std::{future::Future, path::Path, result, sync::Arc, time::Duration};
1+
use std::{
2+
future::Future,
3+
path::Path,
4+
result,
5+
sync::Arc,
6+
time::{Duration, SystemTime},
7+
};
28

39
use anyhow::{Context, Result};
410
use pkarr::{SignedPacket, Timestamp};
@@ -52,7 +58,7 @@ enum Message {
5258
res: oneshot::Sender<Snapshot>,
5359
},
5460
CheckExpired {
55-
time: [u8; 8],
61+
time: Timestamp,
5662
key: PublicKeyBytes,
5763
},
5864
}
@@ -119,7 +125,6 @@ impl Actor {
119125
let transaction = self.db.begin_write()?;
120126
let mut tables = Tables::new(&transaction)?;
121127
let timeout = tokio::time::sleep(self.options.max_batch_time);
122-
let expired = Timestamp::now() - expiry_us;
123128
tokio::pin!(timeout);
124129
for _ in 0..self.options.max_batch_size {
125130
tokio::select! {
@@ -151,8 +156,8 @@ impl Actor {
151156
res.send(false).ok();
152157
continue;
153158
} else {
154-
// remove the packet from the update time index
155-
tables.update_time.remove(&packet.timestamp().to_bytes(), key.as_bytes())?;
159+
// remove the old packet from the update time index
160+
tables.update_time.remove(&existing.timestamp().to_bytes(), key.as_bytes())?;
156161
true
157162
}
158163
} else {
@@ -185,13 +190,21 @@ impl Actor {
185190
res.send(Snapshot::new(&self.db)?).ok();
186191
}
187192
Message::CheckExpired { key, time } => {
188-
trace!("check expired {} at {}", key, u64::from_be_bytes(time));
193+
trace!("check expired {} at {}", key, fmt_time(time));
189194
if let Some(packet) = get_packet(&tables.signed_packets, &key)? {
195+
let expired = Timestamp::now() - expiry_us;
190196
if packet.timestamp() < expired {
191-
tables.update_time.remove(&time, key.as_bytes())?;
197+
tables.update_time.remove(&time.to_bytes(), key.as_bytes())?;
192198
let _ = tables.signed_packets.remove(key.as_bytes())?;
193199
self.metrics.store_packets_expired.inc();
200+
debug!("removed expired packet {key}");
201+
} else {
202+
debug!("packet {key} is no longer expired, removing obsolete expiry entry");
203+
tables.update_time.remove(&time.to_bytes(), key.as_bytes())?;
194204
}
205+
} else {
206+
debug!("expired packet {key} not found, remove from expiry table");
207+
tables.update_time.remove(&time.to_bytes(), key.as_bytes())?;
195208
}
196209
}
197210
}
@@ -205,6 +218,10 @@ impl Actor {
205218
}
206219
}
207220

221+
fn fmt_time(t: Timestamp) -> String {
222+
humantime::format_rfc3339_micros(SystemTime::from(t)).to_string()
223+
}
224+
208225
/// A struct similar to [`redb::Table`] but for all tables that make up the
209226
/// signed packet store.
210227
pub(super) struct Tables<'a> {
@@ -368,7 +385,7 @@ async fn evict_task_inner(send: mpsc::Sender<Message>, options: Options) -> anyh
368385
anyhow::bail!("failed to get snapshot");
369386
};
370387
let expired = Timestamp::now() - expiry_us;
371-
trace!("evicting packets older than {}", expired);
388+
trace!("evicting packets older than {}", fmt_time(expired));
372389
// if getting the range fails we exit the loop and shut down
373390
// if individual reads fail we log the error and limp on
374391
for item in snapshot.update_time.range(..expired.to_bytes())? {
@@ -379,26 +396,22 @@ async fn evict_task_inner(send: mpsc::Sender<Message>, options: Options) -> anyh
379396
continue;
380397
}
381398
};
382-
let time = time.value();
383-
trace!("evicting expired packets at {}", u64::from_be_bytes(time));
399+
let time = Timestamp::from(time.value());
400+
trace!("evicting expired packets at {}", fmt_time(time));
384401
for item in keys {
385402
let key = match item {
386403
Ok(v) => v,
387404
Err(e) => {
388405
error!(
389406
"failed to read update_time item at {}: {:?}",
390-
u64::from_be_bytes(time),
407+
fmt_time(time),
391408
e
392409
);
393410
continue;
394411
}
395412
};
396413
let key = PublicKeyBytes::new(key.value());
397-
debug!(
398-
"evicting expired packet {} {}",
399-
u64::from_be_bytes(time),
400-
key
401-
);
414+
debug!("evicting expired packet {} {}", fmt_time(time), key);
402415
send.send(Message::CheckExpired { time, key }).await?;
403416
}
404417
}

0 commit comments

Comments
 (0)