Skip to content

Commit 57ff533

Browse files
authored
Show link we failed on parsing index pages (#9118)
For #8172, show the link we failed on. This should e.g. give a hint on permission denied pages such as #8172 (comment).
1 parent b37170d commit 57ff533

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

crates/uv-client/src/html.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::str::FromStr;
22

3-
use tl::HTMLTag;
3+
use tl::{HTMLTag, Parser};
44
use tracing::{instrument, warn};
55
use url::Url;
66

@@ -44,7 +44,7 @@ impl SimpleHtml {
4444
.iter()
4545
.filter_map(|node| node.as_tag())
4646
.filter(|link| link.name().as_bytes() == b"a")
47-
.map(|link| Self::parse_anchor(link))
47+
.map(|link| Self::parse_anchor(link, dom.parser()))
4848
.collect::<Result<Vec<_>, _>>()?;
4949
// While it has not been positively observed, we sort the files
5050
// to ensure we have a defined ordering. Otherwise, if we rely on
@@ -70,14 +70,14 @@ impl SimpleHtml {
7070
}
7171

7272
/// Parse a [`File`] from an `<a>` tag.
73-
fn parse_anchor(link: &HTMLTag) -> Result<File, Error> {
73+
fn parse_anchor(link: &HTMLTag, parser: &Parser) -> Result<File, Error> {
7474
// Extract the href.
7575
let href = link
7676
.attributes()
7777
.get("href")
7878
.flatten()
7979
.filter(|bytes| !bytes.as_bytes().is_empty())
80-
.ok_or(Error::MissingHref)?;
80+
.ok_or(Error::MissingHref(link.inner_text(parser).to_string()))?;
8181
let href = std::str::from_utf8(href.as_bytes())?;
8282

8383
// Extract the hash, which should be in the fragment.
@@ -187,8 +187,8 @@ pub enum Error {
187187
#[error(transparent)]
188188
HtmlParse(#[from] tl::ParseError),
189189

190-
#[error("Missing href attribute on anchor link")]
191-
MissingHref,
190+
#[error("Missing href attribute on anchor link: `{0}`")]
191+
MissingHref(String),
192192

193193
#[error("Expected distribution filename as last path component of URL: {0}")]
194194
MissingFilename(String),

crates/uv-client/src/html/tests.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ fn parse_missing_href() {
419419
";
420420
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
421421
let result = SimpleHtml::parse(text, &base).unwrap_err();
422-
insta::assert_snapshot!(result, @"Missing href attribute on anchor link");
422+
insta::assert_snapshot!(result, @"Missing href attribute on anchor link: `Jinja2-3.1.2-py3-none-any.whl`");
423423
}
424424

425425
#[test]
@@ -436,7 +436,7 @@ fn parse_empty_href() {
436436
"#;
437437
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
438438
let result = SimpleHtml::parse(text, &base).unwrap_err();
439-
insta::assert_snapshot!(result, @"Missing href attribute on anchor link");
439+
insta::assert_snapshot!(result, @"Missing href attribute on anchor link: `Jinja2-3.1.2-py3-none-any.whl`");
440440
}
441441

442442
#[test]

0 commit comments

Comments
 (0)