Skip to content

Commit f25b0b7

Browse files
committed
perf: use GSO
1 parent 1ce5455 commit f25b0b7

File tree

10 files changed

+188
-52
lines changed

10 files changed

+188
-52
lines changed

neqo-bin/src/client/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,8 @@ impl<'a, H: Handler> Runner<'a, H> {
413413
args,
414414
timeout: None,
415415
recv_buf: vec![0; neqo_udp::RECV_BUF_SIZE],
416-
send_buf: Vec::new(),
416+
// TODO
417+
send_buf: Vec::with_capacity(neqo_udp::RECV_BUF_SIZE),
417418
}
418419
}
419420

neqo-bin/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ impl Default for QuicParameters {
146146
max_streams_uni: 16,
147147
idle_timeout: 30,
148148
congestion_control: CongestionControlAlgorithm::NewReno,
149-
no_pacing: false,
149+
no_pacing: true,
150150
no_pmtud: false,
151151
preferred_address_v4: None,
152152
preferred_address_v6: None,

neqo-bin/src/server/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,8 @@ impl ServerRunner {
227227
timeout: None,
228228
sockets,
229229
recv_buf: vec![0; neqo_udp::RECV_BUF_SIZE],
230-
send_buf: vec![],
230+
// TODO
231+
send_buf: Vec::with_capacity(neqo_udp::RECV_BUF_SIZE),
231232
}
232233
}
233234

neqo-common/src/codec.rs

+30-24
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,16 @@ impl<'a, 'b> PartialEq<Decoder<'b>> for Decoder<'a> {
196196
/// Encoder is good for building data structures.
197197
#[derive(PartialEq, Eq)]
198198
pub struct Encoder<'a> {
199+
start: usize,
199200
buf: &'a mut Vec<u8>,
200201
}
201202

202203
impl<'a> Encoder<'a> {
203204
pub fn new(buf: &'a mut Vec<u8>) -> Self {
204-
Self { buf }
205+
Self {
206+
start: buf.len(),
207+
buf,
208+
}
205209
}
206210

207211
/// Static helper function for previewing the results of encoding without doing it.
@@ -234,20 +238,20 @@ impl<'a> Encoder<'a> {
234238
/// been written to the buffer.
235239
#[must_use]
236240
pub fn len(&self) -> usize {
237-
self.buf.len()
241+
self.buf.len() - self.start
238242
}
239243

240244
/// Returns true if the encoder buffer contains no elements.
241245
#[must_use]
242246
pub fn is_empty(&self) -> bool {
243-
self.buf.is_empty()
247+
self.start == self.buf.len()
244248
}
245249

246250
/// Create a view of the current contents of the buffer.
247251
/// Note: for a view of a slice, use `Decoder::new(&enc[s..e])`
248252
#[must_use]
249253
pub fn as_decoder(&self) -> Decoder {
250-
Decoder::new(self.buf)
254+
Decoder::new(&self.buf[self.start..])
251255
}
252256

253257
/// Don't use this except in testing.
@@ -272,7 +276,7 @@ impl<'a> Encoder<'a> {
272276

273277
#[cfg(test)]
274278
fn to_hex(&self) -> String {
275-
crate::hex(&self.buf)
279+
crate::hex(&self.buf[self.start..])
276280
}
277281

278282
/// Generic encode routine for arbitrary data.
@@ -402,19 +406,19 @@ impl<'a> Encoder<'a> {
402406

403407
/// Truncate the encoder to the given size.
404408
pub fn truncate(&mut self, len: usize) {
405-
self.buf.truncate(len);
409+
self.buf.truncate(len + self.start);
406410
}
407411

408412
/// Pad the buffer to `len` with bytes set to `v`.
409413
pub fn pad_to(&mut self, len: usize, v: u8) {
410-
if len > self.buf.len() {
411-
self.buf.resize(len, v);
414+
if len > self.buf.len() - self.start {
415+
self.buf.resize(len + self.start, v);
412416
}
413417
}
414418

415419
#[must_use]
416420
pub fn to_vec(&self) -> Vec<u8> {
417-
self.buf.clone()
421+
self.buf[self.start..].to_vec()
418422
}
419423
}
420424

@@ -426,29 +430,31 @@ impl<'a> Debug for Encoder<'a> {
426430

427431
impl<'a> AsRef<[u8]> for Encoder<'a> {
428432
fn as_ref(&self) -> &[u8] {
429-
self.buf
433+
&self.buf[self.start..]
430434
}
431435
}
432436

433437
impl<'a> AsMut<[u8]> for Encoder<'a> {
434438
fn as_mut(&mut self) -> &mut [u8] {
435-
self.buf
436-
}
437-
}
438-
439-
impl<'a> From<Encoder<'a>> for &'a [u8] {
440-
#[must_use]
441-
fn from(encoder: Encoder<'a>) -> &'a [u8] {
442-
encoder.buf
439+
&mut self.buf[self.start..]
443440
}
444441
}
445442

446-
impl<'a> From<Encoder<'a>> for &'a mut Vec<u8> {
447-
#[must_use]
448-
fn from(buf: Encoder<'a>) -> &'a mut Vec<u8> {
449-
buf.buf
450-
}
451-
}
443+
// TODO: Needed? Non-intuiv at least.
444+
// impl<'a> From<Encoder<'a>> for &'a [u8] {
445+
// #[must_use]
446+
// fn from(encoder: Encoder<'a>) -> &'a [u8] {
447+
// encoder.buf
448+
// }
449+
// }
450+
451+
// TODO: Needed? Non-intuiv at least.
452+
// impl<'a> From<Encoder<'a>> for &'a mut Vec<u8> {
453+
// #[must_use]
454+
// fn from(buf: Encoder<'a>) -> &'a mut Vec<u8> {
455+
// buf.buf
456+
// }
457+
// }
452458

453459
#[cfg(test)]
454460
mod tests {

neqo-common/src/datagram.rs

+4
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ impl<D> Datagram<D> {
4646
pub const fn segment_size(&self) -> usize {
4747
self.segment_size
4848
}
49+
50+
pub fn set_segment_size(&mut self, segment_size: usize) {
51+
self.segment_size = segment_size;
52+
}
4953
}
5054

5155
impl<D: AsRef<[u8]>> Datagram<D> {

neqo-transport/src/connection/mod.rs

+89-17
Original file line numberDiff line numberDiff line change
@@ -2343,28 +2343,104 @@ impl Connection {
23432343
self.stats.borrow_mut().frame_tx.connection_close += 1;
23442344
}
23452345

2346+
// TODO: There is a limit to the number of segments supported. E.g. you
2347+
// can't pass 200 segments to the Linux Kernel.
2348+
fn output_path<'a>(
2349+
&mut self,
2350+
path: &PathRef,
2351+
now: Instant,
2352+
closing_frame: &Option<ClosingFrame>,
2353+
out: &'a mut Vec<u8>,
2354+
) -> Res<SendOption<'a>> {
2355+
assert_eq!(out.len(), 0);
2356+
2357+
qinfo!("\n===== output_path");
2358+
let mut initial_capacity = out.capacity();
2359+
// TODO
2360+
if initial_capacity == 0 {
2361+
initial_capacity = usize::MAX;
2362+
}
2363+
qinfo!("initial_capacity: {initial_capacity}");
2364+
let mut segment_size = None;
2365+
2366+
loop {
2367+
let limit = segment_size.map(|s| min(s, initial_capacity - out.len()));
2368+
if limit < segment_size {
2369+
qinfo!("no more space for another segment");
2370+
break;
2371+
}
2372+
qinfo!("limit: {limit:?}, out len: {}", out.len());
2373+
// Determine how we are sending packets (PTO, etc..).
2374+
let profile = self
2375+
.loss_recovery
2376+
.send_profile_2(limit, &path.borrow(), now);
2377+
qinfo!("output_path send_profile {:?}", profile);
2378+
qdebug!([self], "output_path send_profile {:?}", profile);
2379+
2380+
let start = out.len();
2381+
if let Some(pace) = self.output_path_segment(profile, path, now, closing_frame, out)? {
2382+
qinfo!("SendOption::No");
2383+
if segment_size.is_none() {
2384+
return Ok(SendOption::No(pace));
2385+
}
2386+
break;
2387+
}
2388+
qinfo!("SendOption::Yes");
2389+
2390+
if segment_size.is_none() {
2391+
segment_size = Some(out.len());
2392+
if out.len() < path.borrow().plpmtu() / 2 {
2393+
qinfo!("Segment is smaller than half of the mtu: {}", out.len());
2394+
break;
2395+
}
2396+
}
2397+
2398+
if out.len() % segment_size.expect("TODO") != 0 {
2399+
qinfo!("Segment is smaller than previous");
2400+
break;
2401+
}
2402+
2403+
if path.borrow().pmtud().needs_probe() {
2404+
qinfo!("about to probe, thus larger than previous segment");
2405+
break;
2406+
}
2407+
2408+
assert!(out.len() - start <= segment_size.unwrap());
2409+
}
2410+
2411+
let mut dgram = path
2412+
.borrow_mut()
2413+
.datagram(out, &mut self.stats.borrow_mut());
2414+
dgram.set_segment_size(segment_size.expect("TODO"));
2415+
2416+
qinfo!(
2417+
"Sending datagram: {} size {} segments",
2418+
dgram.len(),
2419+
dgram.num_segments()
2420+
);
2421+
2422+
return Ok(SendOption::Yes(dgram));
2423+
}
2424+
23462425
/// Build a datagram, possibly from multiple packets (for different PN
23472426
/// spaces) and each containing 1+ frames.
23482427
#[allow(clippy::too_many_lines)] // Yeah, that's just the way it is.
2349-
fn output_path<'a>(
2428+
fn output_path_segment<'a>(
23502429
&mut self,
2430+
profile: SendProfile,
23512431
path: &PathRef,
23522432
now: Instant,
23532433
closing_frame: &Option<ClosingFrame>,
23542434
out: &'a mut Vec<u8>,
2355-
) -> Res<SendOption<'a>> {
2435+
// TODO: Option is misleading here.
2436+
) -> Res<Option<bool>> {
23562437
let mut initial_sent = None;
23572438
let mut needs_padding = false;
23582439
let grease_quic_bit = self.can_grease_quic_bit();
23592440
let version = self.version();
23602441

2361-
// Determine how we are sending packets (PTO, etc..).
2362-
let profile = self.loss_recovery.send_profile(&path.borrow(), now);
2363-
qdebug!([self], "output_path send_profile {:?}", profile);
2364-
23652442
// Frames for different epochs must go in different packets, but then these
23662443
// packets can go in a single datagram
2367-
assert_eq!(out.len(), 0);
23682444
let mut encoder = Encoder::new(out);
23692445
for space in PacketNumberSpace::iter() {
23702446
// Ensure we have tx crypto state for this epoch, or skip it.
@@ -2492,30 +2568,26 @@ impl Connection {
24922568

24932569
if encoder.is_empty() {
24942570
qdebug!("TX blocked, profile={:?}", profile);
2495-
Ok(SendOption::No(profile.paced()))
2571+
Ok(Some(profile.paced()))
24962572
} else {
24972573
// Perform additional padding for Initial packets as necessary.
2498-
let packets: &mut Vec<u8> = encoder.into();
24992574
if let Some(mut initial) = initial_sent.take() {
25002575
if needs_padding {
25012576
qdebug!(
25022577
[self],
25032578
"pad Initial from {} to PLPMTU {}",
2504-
packets.len(),
2579+
encoder.len(),
25052580
profile.limit()
25062581
);
2507-
initial.track_padding(profile.limit() - packets.len());
2582+
initial.track_padding(profile.limit() - encoder.len());
25082583
// These zeros aren't padding frames, they are an invalid all-zero coalesced
25092584
// packet, which is why we don't increase `frame_tx.padding` count here.
2510-
packets.resize(profile.limit(), 0);
2585+
encoder.pad_to(profile.limit(), 0);
25112586
}
25122587
self.loss_recovery.on_packet_sent(path, initial);
25132588
}
2514-
path.borrow_mut().add_sent(packets.len());
2515-
Ok(SendOption::Yes(
2516-
path.borrow_mut()
2517-
.datagram(packets, &mut self.stats.borrow_mut()),
2518-
))
2589+
path.borrow_mut().add_sent(encoder.len());
2590+
Ok(None)
25192591
}
25202592
}
25212593

neqo-transport/src/packet/mod.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -499,8 +499,10 @@ impl<'a> PacketBuilder<'a> {
499499
Ok(aead.encrypt(0, encoder.as_ref(), &[], &mut buf)?.to_vec())
500500
})?;
501501
encoder.encode(&tag);
502-
let complete: &[u8] = encoder.into();
503-
Ok(&complete[start..])
502+
503+
// TODO: Tripple check that this is correct. Ideally improve Encoder instead of this hack here.
504+
let encoder_len = encoder.len();
505+
Ok(&out[out.len() - encoder_len + start..])
504506
}
505507

506508
/// Make a Version Negotiation packet.
@@ -534,7 +536,9 @@ impl<'a> PacketBuilder<'a> {
534536
grease[3] = (client_version.wrapping_add(0x10) & 0xf0) as u8 | 0x0a;
535537
encoder.encode(&grease[..4]);
536538

537-
encoder.into()
539+
// TODO
540+
let encoder_len = encoder.len();
541+
&out[(out.len() - encoder_len)..]
538542
}
539543
}
540544

neqo-transport/src/recovery/mod.rs

+19-2
Original file line numberDiff line numberDiff line change
@@ -885,13 +885,30 @@ impl LossRecovery {
885885
lost_packets
886886
}
887887

888+
#[cfg(test)]
889+
pub fn send_profile(&mut self, path: &Path, now: Instant) -> SendProfile {
890+
self.send_profile_2(None, path, now)
891+
}
892+
888893
/// Check how packets should be sent, based on whether there is a PTO,
889894
/// what the current congestion window is, and what the pacer says.
890895
#[allow(clippy::option_if_let_else)]
891-
pub fn send_profile(&mut self, path: &Path, now: Instant) -> SendProfile {
896+
pub fn send_profile_2(
897+
&mut self,
898+
// TODO: Maybe `segment_limit` is better? Because it might be the last
899+
// in the vector, smaller than the overall segment size.
900+
segment_size: Option<usize>,
901+
path: &Path,
902+
now: Instant,
903+
) -> SendProfile {
892904
qtrace!([self], "get send profile {:?}", now);
893905
let sender = path.sender();
894-
let mtu = path.plpmtu();
906+
let mut mtu = path.plpmtu();
907+
// TODO: Cleanup?
908+
if let Some(segment_size) = segment_size {
909+
mtu = min(mtu, segment_size);
910+
}
911+
895912
if let Some(profile) = self
896913
.pto_state
897914
.as_mut()

0 commit comments

Comments
 (0)