@@ -140,44 +140,28 @@ impl<'s> ScriptSource<'s> {
140
140
content : input,
141
141
} ;
142
142
143
- // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
144
- // Shebang must start with `#!` literally, without any preceding whitespace.
145
- // For simplicity we consider any line starting with `#!` a shebang,
146
- // regardless of restrictions put on shebangs by specific platforms.
147
- if let Some ( rest) = source. content . strip_prefix ( "#!" ) {
148
- // Ok, this is a shebang but if the next non-whitespace token is `[`,
149
- // then it may be valid Rust code, so consider it Rust code.
150
- //
151
- // NOTE: rustc considers line and block comments to be whitespace but to avoid
152
- // any more awareness of Rust grammar, we are excluding it.
153
- if rest. trim_start ( ) . starts_with ( '[' ) {
154
- return Ok ( source) ;
155
- }
156
-
157
- // No other choice than to consider this a shebang.
158
- let newline_end = source
159
- . content
160
- . find ( '\n' )
161
- . map ( |pos| pos + 1 )
162
- . unwrap_or ( source. content . len ( ) ) ;
163
- let ( shebang, content) = source. content . split_at ( newline_end) ;
143
+ if let Some ( shebang_end) = strip_shebang ( source. content ) {
144
+ let ( shebang, content) = source. content . split_at ( shebang_end) ;
164
145
source. shebang = Some ( shebang) ;
165
146
source. content = content;
166
147
}
167
148
168
149
const FENCE_CHAR : char = '-' ;
169
150
170
- let mut trimmed_content = source. content ;
171
- while !trimmed_content . is_empty ( ) {
172
- let c = trimmed_content ;
173
- let c = c . trim_start_matches ( [ ' ' , '\t ' ] ) ;
174
- let c = c . trim_start_matches ( [ '\r' , '\n' ] ) ;
175
- if c == trimmed_content {
151
+ let mut rest = source. content ;
152
+ while !rest . is_empty ( ) {
153
+ let without_spaces = rest . trim_start_matches ( [ ' ' , '\t' ] ) ;
154
+ let without_nl = without_spaces . trim_start_matches ( [ '\r ' , '\n ' ] ) ;
155
+ if without_nl == rest {
156
+ // nothing trimmed
176
157
break ;
158
+ } else if without_nl == without_spaces {
159
+ // frontmatter must come after a newline
160
+ return Ok ( source) ;
177
161
}
178
- trimmed_content = c ;
162
+ rest = without_nl ;
179
163
}
180
- let fence_end = trimmed_content
164
+ let fence_end = rest
181
165
. char_indices ( )
182
166
. find_map ( |( i, c) | ( c != FENCE_CHAR ) . then_some ( i) )
183
167
. unwrap_or ( source. content . len ( ) ) ;
@@ -190,20 +174,21 @@ impl<'s> ScriptSource<'s> {
190
174
"found {fence_end} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
191
175
)
192
176
}
193
- _ => trimmed_content . split_at ( fence_end) ,
177
+ _ => rest . split_at ( fence_end) ,
194
178
} ;
179
+ let nl_fence_pattern = format ! ( "\n {fence_pattern}" ) ;
195
180
let ( info, content) = rest. split_once ( "\n " ) . unwrap_or ( ( rest, "" ) ) ;
196
181
let info = info. trim ( ) ;
197
182
if !info. is_empty ( ) {
198
183
source. info = Some ( info) ;
199
184
}
200
185
source. content = content;
201
186
202
- let Some ( ( frontmatter , content ) ) = source. content . split_once ( fence_pattern ) else {
187
+ let Some ( frontmatter_nl ) = source. content . find ( & nl_fence_pattern ) else {
203
188
anyhow:: bail!( "no closing `{fence_pattern}` found for frontmatter" ) ;
204
189
} ;
205
- source. frontmatter = Some ( frontmatter ) ;
206
- source. content = content;
190
+ source. frontmatter = Some ( & source . content [ ..frontmatter_nl + 1 ] ) ;
191
+ source. content = & source . content [ frontmatter_nl + nl_fence_pattern . len ( ) .. ] ;
207
192
208
193
let ( line, content) = source
209
194
. content
@@ -235,6 +220,26 @@ impl<'s> ScriptSource<'s> {
235
220
}
236
221
}
237
222
223
+ fn strip_shebang ( input : & str ) -> Option < usize > {
224
+ // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
225
+ // Shebang must start with `#!` literally, without any preceding whitespace.
226
+ // For simplicity we consider any line starting with `#!` a shebang,
227
+ // regardless of restrictions put on shebangs by specific platforms.
228
+ if let Some ( rest) = input. strip_prefix ( "#!" ) {
229
+ // Ok, this is a shebang but if the next non-whitespace token is `[`,
230
+ // then it may be valid Rust code, so consider it Rust code.
231
+ //
232
+ // NOTE: rustc considers line and block comments to be whitespace but to avoid
233
+ // any more awareness of Rust grammar, we are excluding it.
234
+ if !rest. trim_start ( ) . starts_with ( '[' ) {
235
+ // No other choice than to consider this a shebang.
236
+ let newline_end = input. find ( '\n' ) . map ( |pos| pos + 1 ) . unwrap_or ( input. len ( ) ) ;
237
+ return Some ( newline_end) ;
238
+ }
239
+ }
240
+ None
241
+ }
242
+
238
243
#[ cfg( test) ]
239
244
mod test_expand {
240
245
use snapbox:: assert_data_eq;
@@ -466,6 +471,86 @@ fn main() {}
466
471
) ;
467
472
}
468
473
474
+ #[ test]
475
+ fn split_indent ( ) {
476
+ assert_source (
477
+ r#"#!/usr/bin/env cargo
478
+ ---
479
+ [dependencies]
480
+ time="0.1.25"
481
+ ----
482
+
483
+ fn main() {}
484
+ "# ,
485
+ str![ [ r##"
486
+ shebang: "#!/usr/bin/env cargo\n"
487
+ info: None
488
+ frontmatter: None
489
+ content: " ---\n [dependencies]\n time=\"0.1.25\"\n ----\n\nfn main() {}\n"
490
+
491
+ "## ] ] ,
492
+ ) ;
493
+ }
494
+
495
+ #[ test]
496
+ fn split_escaped ( ) {
497
+ assert_source (
498
+ r#"#!/usr/bin/env cargo
499
+ -----
500
+ ---
501
+ ---
502
+ -----
503
+
504
+ fn main() {}
505
+ "# ,
506
+ str![ [ r##"
507
+ shebang: "#!/usr/bin/env cargo\n"
508
+ info: None
509
+ frontmatter: "---\n---\n"
510
+ content: "\nfn main() {}\n"
511
+
512
+ "## ] ] ,
513
+ ) ;
514
+ }
515
+
516
+ #[ test]
517
+ fn split_invalid_escaped ( ) {
518
+ assert_err (
519
+ ScriptSource :: parse (
520
+ r#"#!/usr/bin/env cargo
521
+ ---
522
+ -----
523
+ -----
524
+ ---
525
+
526
+ fn main() {}
527
+ "# ,
528
+ ) ,
529
+ str![ "unexpected trailing content on closing fence: `--`" ] ,
530
+ ) ;
531
+ }
532
+
533
+ #[ test]
534
+ fn split_dashes_in_body ( ) {
535
+ assert_source (
536
+ r#"#!/usr/bin/env cargo
537
+ ---
538
+ Hello---
539
+ World
540
+ ---
541
+
542
+ fn main() {}
543
+ "# ,
544
+ str![ [ r##"
545
+ shebang: "#!/usr/bin/env cargo\n"
546
+ info: None
547
+ frontmatter: "Hello---\nWorld\n"
548
+ content: "\nfn main() {}\n"
549
+
550
+ "## ] ] ,
551
+ ) ;
552
+ }
553
+
469
554
#[ test]
470
555
fn split_mismatched_dashes ( ) {
471
556
assert_err (
0 commit comments