Skip to content

Commit 228e99b

Browse files
ehussDylan-DPC
authored andcommitted
Fix even more print page links. (#963)
1 parent 4b569ed commit 228e99b

File tree

5 files changed

+222
-36
lines changed

5 files changed

+222
-36
lines changed

src/renderer/html_handlebars/hbs_renderer.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,10 @@ impl HtmlHandlebars {
3333
let content = ch.content.clone();
3434
let content = utils::render_markdown(&content, ctx.html_config.curly_quotes);
3535

36-
let string_path = ch.path.parent().unwrap().display().to_string();
37-
38-
let fixed_content = utils::render_markdown_with_base(
36+
let fixed_content = utils::render_markdown_with_path(
3937
&ch.content,
4038
ctx.html_config.curly_quotes,
41-
&string_path,
39+
Some(&ch.path),
4240
);
4341
print_content.push_str(&fixed_content);
4442

src/utils/mod.rs

+64-10
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ use regex::Regex;
88
use pulldown_cmark::{html, CowStr, Event, Options, Parser, Tag};
99

1010
use std::borrow::Cow;
11+
use std::fmt::Write;
12+
use std::path::Path;
1113

1214
pub use self::string::take_lines;
1315

@@ -65,20 +67,47 @@ pub fn id_from_content(content: &str) -> String {
6567
normalize_id(trimmed)
6668
}
6769

68-
fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> {
70+
/// Fix links to the correct location.
71+
///
72+
/// This adjusts links, such as turning `.md` extensions to `.html`.
73+
///
74+
/// `path` is the path to the page being rendered relative to the root of the
75+
/// book. This is used for the `print.html` page so that links on the print
76+
/// page go to the original location. Normal page rendering sets `path` to
77+
/// None. Ideally, print page links would link to anchors on the print page,
78+
/// but that is very difficult.
79+
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
6980
lazy_static! {
7081
static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
7182
static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
7283
}
7384

74-
fn fix<'a>(dest: CowStr<'a>, base: &str) -> CowStr<'a> {
85+
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
86+
if dest.starts_with('#') {
87+
// Fragment-only link.
88+
if let Some(path) = path {
89+
let mut base = path.display().to_string();
90+
if base.ends_with(".md") {
91+
base.replace_range(base.len() - 3.., ".html");
92+
}
93+
return format!("{}{}", base, dest).into();
94+
} else {
95+
return dest;
96+
}
97+
}
7598
// Don't modify links with schemes like `https`.
7699
if !SCHEME_LINK.is_match(&dest) {
77100
// This is a relative link, adjust it as necessary.
78101
let mut fixed_link = String::new();
79-
if !base.is_empty() {
80-
fixed_link.push_str(base);
81-
fixed_link.push_str("/");
102+
if let Some(path) = path {
103+
let base = path
104+
.parent()
105+
.expect("path can't be empty")
106+
.to_str()
107+
.expect("utf-8 paths only");
108+
if !base.is_empty() {
109+
write!(fixed_link, "{}/", base).unwrap();
110+
}
82111
}
83112

84113
if let Some(caps) = MD_LINK.captures(&dest) {
@@ -95,20 +124,45 @@ fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> {
95124
dest
96125
}
97126

127+
fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
128+
// This is a terrible hack, but should be reasonably reliable. Nobody
129+
// should ever parse a tag with a regex. However, there isn't anything
130+
// in Rust that I know of that is suitable for handling partial html
131+
// fragments like those generated by pulldown_cmark.
132+
//
133+
// There are dozens of HTML tags/attributes that contain paths, so
134+
// feel free to add more tags if desired; these are the only ones I
135+
// care about right now.
136+
lazy_static! {
137+
static ref HTML_LINK: Regex =
138+
Regex::new(r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""#).unwrap();
139+
}
140+
141+
HTML_LINK
142+
.replace_all(&html, |caps: &regex::Captures<'_>| {
143+
let fixed = fix(caps[2].into(), path);
144+
format!("{}{}\"", &caps[1], fixed)
145+
})
146+
.into_owned()
147+
.into()
148+
}
149+
98150
match event {
99151
Event::Start(Tag::Link(link_type, dest, title)) => {
100-
Event::Start(Tag::Link(link_type, fix(dest, with_base), title))
152+
Event::Start(Tag::Link(link_type, fix(dest, path), title))
101153
}
102154
Event::Start(Tag::Image(link_type, dest, title)) => {
103-
Event::Start(Tag::Image(link_type, fix(dest, with_base), title))
155+
Event::Start(Tag::Image(link_type, fix(dest, path), title))
104156
}
157+
Event::Html(html) => Event::Html(fix_html(html, path)),
158+
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
105159
_ => event,
106160
}
107161
}
108162

109163
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
110164
pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
111-
render_markdown_with_base(text, curly_quotes, "")
165+
render_markdown_with_path(text, curly_quotes, None)
112166
}
113167

114168
pub fn new_cmark_parser(text: &str) -> Parser<'_> {
@@ -120,13 +174,13 @@ pub fn new_cmark_parser(text: &str) -> Parser<'_> {
120174
Parser::new_ext(text, opts)
121175
}
122176

123-
pub fn render_markdown_with_base(text: &str, curly_quotes: bool, base: &str) -> String {
177+
pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
124178
let mut s = String::with_capacity(text.len() * 3 / 2);
125179
let p = new_cmark_parser(text);
126180
let mut converter = EventQuoteConverter::new(curly_quotes);
127181
let events = p
128182
.map(clean_codeblock_headers)
129-
.map(|event| adjust_links(event, base))
183+
.map(|event| adjust_links(event, path))
130184
.map(|event| converter.convert(event));
131185

132186
html::push_html(&mut s, events);

tests/dummy_book/src/second/nested.md

+8
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@
33
When we link to [the first section](../first/nested.md), it should work on
44
both the print page and the non-print page.
55

6+
A [fragment link](#some-section) should work.
7+
68
Link [outside](../../std/foo/bar.html).
79

810
![Some image](../images/picture.png)
11+
12+
<a href="../first/markdown.md">HTML Link</a>
13+
14+
<img src="../images/picture.png" alt="raw html">
15+
16+
## Some section

tests/rendered_output.rs

+3
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ fn check_correct_relative_links_in_print_page() {
124124
r##"<a href="second/../first/nested.html">the first section</a>,"##,
125125
r##"<a href="second/../../std/foo/bar.html">outside</a>"##,
126126
r##"<img src="second/../images/picture.png" alt="Some image" />"##,
127+
r##"<a href="second/nested.html#some-section">fragment link</a>"##,
128+
r##"<a href="second/../first/markdown.html">HTML Link</a>"##,
129+
r##"<img src="second/../images/picture.png" alt="raw html">"##,
127130
],
128131
);
129132
}

0 commit comments

Comments
 (0)