Skip to content

Commit 31235a2

Browse files
authored
Upgrade nom dependency to 7.1.3 (#41)
* upgrade nom to 6.2.2 * upgrade nom to 7.1.3 * cargo clippy * fix formatting and clippy lint * bump major version because of Display trait implementation
1 parent b34caac commit 31235a2

12 files changed

+111
-88
lines changed

Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "warc"
3-
version = "0.3.3"
3+
version = "0.4.0"
44
description = "A Rust library for reading and writing WARC files."
55
readme = "README.md"
66
repository = "https://github.com/jedireza/warc"
@@ -12,7 +12,7 @@ edition = "2018"
1212

1313
[dependencies]
1414
chrono = "0.4.11"
15-
nom = "5.1.1"
15+
nom = "7.1.3"
1616
url = "2"
1717
uuid = { version = "0.8.1", features = ["v4"] }
1818

examples/read_file.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ fn main() -> Result<(), std::io::Error> {
1010
match record {
1111
Err(err) => println!("ERROR: {}\r\n", err),
1212
Ok(record) => {
13-
println!("{}: {}", WarcHeader::RecordID.to_string(), record.warc_id(),);
14-
println!("{}: {}", WarcHeader::Date.to_string(), record.date(),);
15-
println!("");
13+
println!("{}: {}", WarcHeader::RecordID, record.warc_id(),);
14+
println!("{}: {}", WarcHeader::Date, record.date(),);
15+
println!();
1616
}
1717
}
1818
}

examples/read_filtered.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ fn main() -> std::io::Result<()> {
1616

1717
let filtered_file_names: Vec<_> = args.map(|s| s.to_string_lossy().to_string()).collect();
1818
if filtered_file_names.is_empty() {
19-
return Err(usage_err!("one or more filtered file names not supplied"))?;
19+
Err(usage_err!("one or more filtered file names not supplied"))?;
2020
}
2121

2222
let mut file = WarcReader::from_path_gzip(warc_name)?;

examples/read_gzip.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ fn main() -> Result<(), std::io::Error> {
1010
match record {
1111
Err(err) => println!("ERROR: {}\r\n", err),
1212
Ok(record) => {
13-
println!("{}: {}", WarcHeader::RecordID.to_string(), record.warc_id());
14-
println!("{}: {}", WarcHeader::Date.to_string(), record.date());
15-
println!("");
13+
println!("{}: {}", WarcHeader::RecordID, record.warc_id());
14+
println!("{}: {}", WarcHeader::Date, record.date());
15+
println!();
1616
}
1717
}
1818
}

examples/read_raw.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ fn main() -> Result<(), std::io::Error> {
1212
Ok((headers, _)) => {
1313
println!(
1414
"{}: {}",
15-
WarcHeader::RecordID.to_string(),
15+
WarcHeader::RecordID,
1616
String::from_utf8_lossy(headers.as_ref().get(&WarcHeader::RecordID).unwrap())
1717
);
1818
println!(
1919
"{}: {}",
20-
WarcHeader::Date.to_string(),
20+
WarcHeader::Date,
2121
String::from_utf8_lossy(headers.as_ref().get(&WarcHeader::Date).unwrap())
2222
);
23-
println!("");
23+
println!();
2424
}
2525
}
2626
}

src/error.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ impl fmt::Display for Error {
2626
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2727
match self {
2828
Error::ParseHeaders(_) => write!(f, "Error parsing headers."),
29-
Error::MissingHeader(ref h) => write!(f, "Missing required header: {}", h.to_string()),
29+
Error::MissingHeader(ref h) => write!(f, "Missing required header: {}", h),
3030
Error::MalformedHeader(ref h, ref r) => {
31-
write!(f, "Malformed header: {}: {}", h.to_string(), r)
31+
write!(f, "Malformed header: {}: {}", h, r)
3232
}
3333
Error::ReadData(_) => write!(f, "Error reading data source."),
3434
Error::ReadOverflow => write!(f, "Read further than expected."),

src/parser.rs

+29-15
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ fn version(input: &[u8]) -> IResult<&[u8], &str> {
1414

1515
let version_str = match str::from_utf8(version) {
1616
Err(_) => {
17-
return Err(nom::Err::Error((input, ErrorKind::Verify)));
17+
return Err(nom::Err::Error(nom::error::Error::new(
18+
input,
19+
ErrorKind::Verify,
20+
)));
1821
}
1922
Ok(version) => version,
2023
};
@@ -23,8 +26,7 @@ fn version(input: &[u8]) -> IResult<&[u8], &str> {
2326
}
2427

2528
fn is_header_token_char(chr: u8) -> bool {
26-
match chr {
27-
0..=31
29+
!matches!(chr, 0..=31
2830
| 128..=255
2931
| b'('
3032
| b')'
@@ -43,9 +45,7 @@ fn is_header_token_char(chr: u8) -> bool {
4345
| b'{'
4446
| b'}'
4547
| b' '
46-
| b'\\' => false,
47-
_ => true,
48-
}
48+
| b'\\')
4949
}
5050

5151
fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
@@ -63,6 +63,7 @@ fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
6363

6464
/// Parse a WARC header block.
6565
// TODO: evaluate the use of `ErrorKind::Verify` here.
66+
#[allow(clippy::type_complexity)]
6667
pub fn headers(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, usize)> {
6768
let (input, version) = version(input)?;
6869
let (input, headers) = many1(header)(input)?;
@@ -73,22 +74,31 @@ pub fn headers(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, usize)
7374
for header in headers {
7475
let token_str = match str::from_utf8(header.0) {
7576
Err(_) => {
76-
return Err(nom::Err::Error((input, ErrorKind::Verify)));
77+
return Err(nom::Err::Error(nom::error::Error::new(
78+
input,
79+
ErrorKind::Verify,
80+
)));
7781
}
7882
Ok(token) => token,
7983
};
8084

81-
if content_length == None && token_str.to_lowercase() == "content-length" {
85+
if content_length.is_none() && token_str.to_lowercase() == "content-length" {
8286
let value_str = match str::from_utf8(header.1) {
8387
Err(_) => {
84-
return Err(nom::Err::Error((input, ErrorKind::Verify)));
88+
return Err(nom::Err::Error(nom::error::Error::new(
89+
input,
90+
ErrorKind::Verify,
91+
)));
8592
}
8693
Ok(value) => value,
8794
};
8895

8996
match value_str.parse::<usize>() {
9097
Err(_) => {
91-
return Err(nom::Err::Error((input, ErrorKind::Verify)));
98+
return Err(nom::Err::Error(nom::error::Error::new(
99+
input,
100+
ErrorKind::Verify,
101+
)));
92102
}
93103
Ok(len) => {
94104
content_length = Some(len);
@@ -101,14 +111,15 @@ pub fn headers(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, usize)
101111

102112
// TODO: Technically if we didn't find a `content-length` header, the record is invalid. Should
103113
// we be returning an error here instead?
104-
if content_length == None {
114+
if content_length.is_none() {
105115
content_length = Some(0);
106116
}
107117

108118
Ok((input, (version, warc_headers, content_length.unwrap())))
109119
}
110120

111121
/// Parse an entire WARC record.
122+
#[allow(clippy::type_complexity)]
112123
pub fn record(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, &[u8])> {
113124
let (input, (headers, _)) = tuple((headers, line_ending))(input)?;
114125
let (input, (body, _, _)) = tuple((take(headers.2), line_ending, line_ending))(input)?;
@@ -125,13 +136,13 @@ mod tests {
125136

126137
#[test]
127138
fn version_parsing() {
128-
assert_eq!(version(&b"WARC/0.0\r\n"[..]), Ok((&b""[..], &"0.0"[..])));
139+
assert_eq!(version(&b"WARC/0.0\r\n"[..]), Ok((&b""[..], "0.0")));
129140

130-
assert_eq!(version(&b"WARC/1.0\r\n"[..]), Ok((&b""[..], &"1.0"[..])));
141+
assert_eq!(version(&b"WARC/1.0\r\n"[..]), Ok((&b""[..], "1.0")));
131142

132143
assert_eq!(
133144
version(&b"WARC/2.0-alpha\r\n"[..]),
134-
Ok((&b""[..], &"2.0-alpha"[..]))
145+
Ok((&b""[..], "2.0-alpha"))
135146
);
136147
}
137148

@@ -168,7 +179,10 @@ mod tests {
168179

169180
assert_eq!(
170181
headers(&raw_invalid[..]),
171-
Err(Err::Error((&b"\r\n"[..], ErrorKind::Verify)))
182+
Err(Err::Error(nom::error::Error::new(
183+
&b"\r\n"[..],
184+
ErrorKind::Verify
185+
)))
172186
);
173187

174188
let raw = b"\

src/record.rs

+4-5
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,8 @@ mod streaming_trait {
5151
impl<'t, T: Read + 't> Read for StreamingBody<'t, T> {
5252
fn read(&mut self, data: &mut [u8]) -> std::io::Result<usize> {
5353
let max_read = std::cmp::min(data.len(), *self.1 as usize);
54-
self.0.read(&mut data[..max_read as usize]).map(|n| {
54+
self.0.read(&mut data[..max_read]).inspect(|&n| {
5555
*self.1 -= n as u64;
56-
n
5756
})
5857
}
5958
}
@@ -156,7 +155,7 @@ impl std::fmt::Display for RawRecordHeader {
156155
fn fmt(&self, w: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
157156
writeln!(w, "WARC/{}", self.version)?;
158157
for (key, value) in self.as_ref().iter() {
159-
writeln!(w, "{}: {}", key.to_string(), String::from_utf8_lossy(value))?;
158+
writeln!(w, "{}: {}", key, String::from_utf8_lossy(value))?;
160159
}
161160
writeln!(w)?;
162161

@@ -263,7 +262,7 @@ impl<T: BodyKind> Record<T> {
263262
/// The current implementation generates random values based on UUID version 4.
264263
///
265264
pub fn generate_record_id() -> String {
266-
format!("<{}>", Uuid::new_v4().to_urn().to_string())
265+
format!("<{}>", Uuid::new_v4().to_urn())
267266
}
268267

269268
fn parse_content_length(len: &str) -> Result<u64, WarcError> {
@@ -1058,7 +1057,7 @@ mod raw_tests {
10581057

10591058
let output = headers.to_string();
10601059

1061-
let expected_lines = vec![
1060+
let expected_lines = [
10621061
"WARC/1.0",
10631062
"warc-type: dunno",
10641063
"warc-date: 2024-01-01T00:00:00Z",

src/record_type.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#![allow(missing_docs)]
2+
3+
use std::fmt::Display;
24
#[derive(Clone, Debug, PartialEq)]
35
pub enum RecordType {
46
WarcInfo,
@@ -12,8 +14,8 @@ pub enum RecordType {
1214
Unknown(String),
1315
}
1416

15-
impl ToString for RecordType {
16-
fn to_string(&self) -> String {
17+
impl Display for RecordType {
18+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1719
let stringified = match *self {
1820
RecordType::WarcInfo => "warcinfo",
1921
RecordType::Response => "response",
@@ -25,7 +27,7 @@ impl ToString for RecordType {
2527
RecordType::Continuation => "continuation",
2628
RecordType::Unknown(ref val) => val.as_ref(),
2729
};
28-
stringified.to_string()
30+
f.write_str(stringified)
2931
}
3032
}
3133

src/truncated_type.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#![allow(missing_docs)]
2+
3+
use std::fmt::Display;
24
#[derive(Clone, Debug, PartialEq)]
35
pub enum TruncatedType {
46
Length,
@@ -8,16 +10,16 @@ pub enum TruncatedType {
810
Unknown(String),
911
}
1012

11-
impl ToString for TruncatedType {
12-
fn to_string(&self) -> String {
13+
impl Display for TruncatedType {
14+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1315
let stringified = match *self {
1416
TruncatedType::Length => "length",
1517
TruncatedType::Time => "time",
1618
TruncatedType::Disconnect => "disconnect",
1719
TruncatedType::Unspecified => "unspecified",
1820
TruncatedType::Unknown(ref val) => val.as_ref(),
1921
};
20-
stringified.to_string()
22+
f.write_str(stringified)
2123
}
2224
}
2325

0 commit comments

Comments
 (0)