@@ -8,6 +8,8 @@ use regex::Regex;
8
8
use pulldown_cmark:: { html, CowStr , Event , Options , Parser , Tag } ;
9
9
10
10
use std:: borrow:: Cow ;
11
+ use std:: fmt:: Write ;
12
+ use std:: path:: Path ;
11
13
12
14
pub use self :: string:: take_lines;
13
15
@@ -65,20 +67,47 @@ pub fn id_from_content(content: &str) -> String {
65
67
normalize_id ( trimmed)
66
68
}
67
69
68
- fn adjust_links < ' a > ( event : Event < ' a > , with_base : & str ) -> Event < ' a > {
70
+ /// Fix links to the correct location.
71
+ ///
72
+ /// This adjusts links, such as turning `.md` extensions to `.html`.
73
+ ///
74
+ /// `path` is the path to the page being rendered relative to the root of the
75
+ /// book. This is used for the `print.html` page so that links on the print
76
+ /// page go to the original location. Normal page rendering sets `path` to
77
+ /// None. Ideally, print page links would link to anchors on the print page,
78
+ /// but that is very difficult.
79
+ fn adjust_links < ' a > ( event : Event < ' a > , path : Option < & Path > ) -> Event < ' a > {
69
80
lazy_static ! {
70
81
static ref SCHEME_LINK : Regex = Regex :: new( r"^[a-z][a-z0-9+.-]*:" ) . unwrap( ) ;
71
82
static ref MD_LINK : Regex = Regex :: new( r"(?P<link>.*)\.md(?P<anchor>#.*)?" ) . unwrap( ) ;
72
83
}
73
84
74
- fn fix < ' a > ( dest : CowStr < ' a > , base : & str ) -> CowStr < ' a > {
85
+ fn fix < ' a > ( dest : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
86
+ if dest. starts_with ( '#' ) {
87
+ // Fragment-only link.
88
+ if let Some ( path) = path {
89
+ let mut base = path. display ( ) . to_string ( ) ;
90
+ if base. ends_with ( ".md" ) {
91
+ base. replace_range ( base. len ( ) - 3 .., ".html" ) ;
92
+ }
93
+ return format ! ( "{}{}" , base, dest) . into ( ) ;
94
+ } else {
95
+ return dest;
96
+ }
97
+ }
75
98
// Don't modify links with schemes like `https`.
76
99
if !SCHEME_LINK . is_match ( & dest) {
77
100
// This is a relative link, adjust it as necessary.
78
101
let mut fixed_link = String :: new ( ) ;
79
- if !base. is_empty ( ) {
80
- fixed_link. push_str ( base) ;
81
- fixed_link. push_str ( "/" ) ;
102
+ if let Some ( path) = path {
103
+ let base = path
104
+ . parent ( )
105
+ . expect ( "path can't be empty" )
106
+ . to_str ( )
107
+ . expect ( "utf-8 paths only" ) ;
108
+ if !base. is_empty ( ) {
109
+ write ! ( fixed_link, "{}/" , base) . unwrap ( ) ;
110
+ }
82
111
}
83
112
84
113
if let Some ( caps) = MD_LINK . captures ( & dest) {
@@ -95,20 +124,45 @@ fn adjust_links<'a>(event: Event<'a>, with_base: &str) -> Event<'a> {
95
124
dest
96
125
}
97
126
127
+ fn fix_html < ' a > ( html : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
128
+ // This is a terrible hack, but should be reasonably reliable. Nobody
129
+ // should ever parse a tag with a regex. However, there isn't anything
130
+ // in Rust that I know of that is suitable for handling partial html
131
+ // fragments like those generated by pulldown_cmark.
132
+ //
133
+ // There are dozens of HTML tags/attributes that contain paths, so
134
+ // feel free to add more tags if desired; these are the only ones I
135
+ // care about right now.
136
+ lazy_static ! {
137
+ static ref HTML_LINK : Regex =
138
+ Regex :: new( r#"(<(?:a|img) [^>]*?(?:src|href)=")([^"]+?)""# ) . unwrap( ) ;
139
+ }
140
+
141
+ HTML_LINK
142
+ . replace_all ( & html, |caps : & regex:: Captures < ' _ > | {
143
+ let fixed = fix ( caps[ 2 ] . into ( ) , path) ;
144
+ format ! ( "{}{}\" " , & caps[ 1 ] , fixed)
145
+ } )
146
+ . into_owned ( )
147
+ . into ( )
148
+ }
149
+
98
150
match event {
99
151
Event :: Start ( Tag :: Link ( link_type, dest, title) ) => {
100
- Event :: Start ( Tag :: Link ( link_type, fix ( dest, with_base ) , title) )
152
+ Event :: Start ( Tag :: Link ( link_type, fix ( dest, path ) , title) )
101
153
}
102
154
Event :: Start ( Tag :: Image ( link_type, dest, title) ) => {
103
- Event :: Start ( Tag :: Image ( link_type, fix ( dest, with_base ) , title) )
155
+ Event :: Start ( Tag :: Image ( link_type, fix ( dest, path ) , title) )
104
156
}
157
+ Event :: Html ( html) => Event :: Html ( fix_html ( html, path) ) ,
158
+ Event :: InlineHtml ( html) => Event :: InlineHtml ( fix_html ( html, path) ) ,
105
159
_ => event,
106
160
}
107
161
}
108
162
109
163
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
110
164
pub fn render_markdown ( text : & str , curly_quotes : bool ) -> String {
111
- render_markdown_with_base ( text, curly_quotes, "" )
165
+ render_markdown_with_path ( text, curly_quotes, None )
112
166
}
113
167
114
168
pub fn new_cmark_parser ( text : & str ) -> Parser < ' _ > {
@@ -120,13 +174,13 @@ pub fn new_cmark_parser(text: &str) -> Parser<'_> {
120
174
Parser :: new_ext ( text, opts)
121
175
}
122
176
123
- pub fn render_markdown_with_base ( text : & str , curly_quotes : bool , base : & str ) -> String {
177
+ pub fn render_markdown_with_path ( text : & str , curly_quotes : bool , path : Option < & Path > ) -> String {
124
178
let mut s = String :: with_capacity ( text. len ( ) * 3 / 2 ) ;
125
179
let p = new_cmark_parser ( text) ;
126
180
let mut converter = EventQuoteConverter :: new ( curly_quotes) ;
127
181
let events = p
128
182
. map ( clean_codeblock_headers)
129
- . map ( |event| adjust_links ( event, base ) )
183
+ . map ( |event| adjust_links ( event, path ) )
130
184
. map ( |event| converter. convert ( event) ) ;
131
185
132
186
html:: push_html ( & mut s, events) ;
0 commit comments