Skip to content

Commit eeb7ac5

Browse files
authored
Merge pull request #1034 from Feder1co5oave/new_autolink
[commonmark]+[gfm] make autolinks compliant
2 parents 84c330b + 49982e8 commit eeb7ac5

File tree

7 files changed

+387
-20
lines changed

7 files changed

+387
-20
lines changed

lib/marked.js

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@ block.list = replace(block.list)
5050
block._tag = '(?!(?:'
5151
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code'
5252
+ '|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo'
53-
+ '|span|br|wbr|ins|del|img)\\b)\\w+(?!:/|[^\\w\\s@]*@)\\b';
53+
+ '|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b';
5454

5555
block.html = replace(block.html)
5656
('comment', /<!--[\s\S]*?-->/)
5757
('closed', /<(tag)[\s\S]+?<\/\1>/)
58-
('closing', /<tag(?:"[^"]*"|'[^']*'|[^'">])*?>/)
58+
('closing', /<tag(?:"[^"]*"|'[^']*'|\s[^'"\/>]*)*?\/?>/)
5959
(/tag/g, block._tag)
6060
();
6161

@@ -460,9 +460,9 @@ Lexer.prototype.token = function(src, top) {
460460

461461
var inline = {
462462
escape: /^\\([\\`*{}\[\]()#+\-.!_>])/,
463-
autolink: /^<([^ <>]+(@|:\/)[^ <>]+)>/,
463+
autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
464464
url: noop,
465-
tag: /^<!--[\s\S]*?-->|^<\/?\w+(?:"[^"]*"|'[^']*'|[^<'">])*?>/,
465+
tag: /^<!--[\s\S]*?-->|^<\/?[a-zA-Z0-9\-]+(?:"[^"]*"|'[^']*'|\s[^<'">\/]*)*?\/?>/,
466466
link: /^!?\[(inside)\]\(href\)/,
467467
reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/,
468468
nolink: /^!?\[((?:\[[^\]]*\]|\\[\[\]]|[^\[\]])*)\]/,
@@ -474,6 +474,14 @@ var inline = {
474474
text: /^[\s\S]+?(?=[\\<!\[_*`]| {2,}\n|$)/
475475
};
476476

477+
inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
478+
inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
479+
480+
inline.autolink = replace(inline.autolink)
481+
('scheme', inline._scheme)
482+
('email', inline._email)
483+
()
484+
477485
inline._inside = /(?:\[[^\]]*\]|\\[\[\]]|[^\[\]]|\](?=[^\[]*\]))*/;
478486
inline._href = /\s*<?([\s\S]*?)>?(?:\s+['"]([\s\S]*?)['"])?\s*/;
479487

@@ -507,11 +515,14 @@ inline.pedantic = merge({}, inline.normal, {
507515

508516
inline.gfm = merge({}, inline.normal, {
509517
escape: replace(inline.escape)('])', '~|])')(),
510-
url: /^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])/,
518+
url: replace(/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/)
519+
('email', inline._email)
520+
(),
521+
_backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
511522
del: /^~~(?=\S)([\s\S]*?\S)~~/,
512523
text: replace(inline.text)
513524
(']|', '~]|')
514-
('|', '|https?://|')
525+
('|', '|https?://|ftp://|www\\.|[a-zA-Z0-9.!#$%&\'*+/=?^_`{\\|}~-]+@|')
515526
()
516527
});
517528

@@ -589,12 +600,8 @@ InlineLexer.prototype.output = function(src) {
589600
if (cap = this.rules.autolink.exec(src)) {
590601
src = src.substring(cap[0].length);
591602
if (cap[2] === '@') {
592-
text = escape(
593-
cap[1].charAt(6) === ':'
594-
? this.mangle(cap[1].substring(7))
595-
: this.mangle(cap[1])
596-
);
597-
href = this.mangle('mailto:') + text;
603+
text = escape(this.mangle(cap[1]));
604+
href = 'mailto:' + text;
598605
} else {
599606
text = escape(cap[1]);
600607
href = text;
@@ -605,9 +612,19 @@ InlineLexer.prototype.output = function(src) {
605612

606613
// url (gfm)
607614
if (!this.inLink && (cap = this.rules.url.exec(src))) {
615+
cap[0] = this.rules._backpedal.exec(cap[0])[0];
608616
src = src.substring(cap[0].length);
609-
text = escape(cap[1]);
610-
href = text;
617+
if (cap[2] === '@') {
618+
text = escape(cap[0]);
619+
href = 'mailto:' + text;
620+
} else {
621+
text = escape(cap[0]);
622+
if (cap[1] === 'www.') {
623+
href = 'http://' + text;
624+
} else {
625+
href = text;
626+
}
627+
}
611628
out += this.renderer.link(href, null, text);
612629
continue;
613630
}

test/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ function fix() {
351351
fs.readdirSync(path.resolve(__dirname, 'original')).forEach(function(file) {
352352
var text = fs.readFileSync(path.resolve(__dirname, 'original', file));
353353

354-
if (file === 'hard_wrapped_paragraphs_with_list_like_lines.md') {
354+
if (path.extname(file) === '.md') {
355355
text = '---\ngfm: false\n---\n' + text;
356356
}
357357

test/new/cm_autolinks.html

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
<p>Here are some valid autolinks:</p>
2+
3+
<h3 id="example-565">Example 565</h3>
4+
5+
<p><a href="http://foo.bar.baz">http://foo.bar.baz</a></p>
6+
7+
<h3 id="example-566">Example 566</h3>
8+
9+
<p><a href="http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean">http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean</a></p>
10+
11+
<h3 id="example-567">Example 567</h3>
12+
13+
<p><a href="irc://foo.bar:2233/baz">irc://foo.bar:2233/baz</a></p>
14+
15+
<h3 id="example-568">Example 568</h3>
16+
17+
<p>Uppercase is also fine:</p>
18+
19+
<p><a href="MAILTO:[email protected]">MAILTO:[email protected]</a></p>
20+
21+
<p>Note that many strings that count as absolute URIs for purposes of this spec are not valid URIs, because their schemes are not registered or because of other problems with their syntax:</p>
22+
23+
<h3 id="example-569">Example 569</h3>
24+
25+
<p><a href="a+b+c:d">a+b+c:d</a></p>
26+
27+
<h3 id="example-570">Example 570</h3>
28+
29+
<p><a href="made-up-scheme://foo,bar">made-up-scheme://foo,bar</a></p>
30+
31+
<h3 id="example-571">Example 571</h3>
32+
33+
<p><a href="http://../">http://../</a></p>
34+
35+
<h3 id="example-572">Example 572</h3>
36+
37+
<p><a href="localhost:5001/foo">localhost:5001/foo</a></p>
38+
39+
<h3 id="example-573">Example 573</h3>
40+
41+
<p>Spaces are not allowed in autolinks:</p>
42+
43+
<p>&lt;http://foo.bar/baz bim&gt;</p>
44+
45+
<h3 id="example-574">Example 574</h3>
46+
47+
<p>Backslash-escapes do not work inside autolinks:</p>
48+
49+
<p><a href="http://example.com/%5C%5B%5C">http://example.com/\[\</a></p>
50+
51+
<p>Examples of email autolinks:</p>
52+
53+
<h3 id="example-575">Example 575</h3>
54+
55+
<p><a href="mailto:[email protected]">[email protected]</a></p>
56+
57+
<h3 id="example-576">Example 576</h3>
58+
59+
<p><a href="mailto:[email protected]">[email protected]</a></p>
60+
61+
<h3 id="example-577">Example 577</h3>
62+
63+
<p>Backslash-escapes do not work inside email autolinks:</p>
64+
65+
<p>&lt;[email protected]&gt;</p>
66+
67+
<p>These are not autolinks:</p>
68+
69+
<h3 id="example-578">Example 578</h3>
70+
71+
<p>&lt;&gt;</p>
72+
73+
<h3 id="example-579">Example 579</h3>
74+
75+
<p>&lt; http://foo.bar &gt;</p>
76+
77+
<h3 id="example-580">Example 580</h3>
78+
79+
<p>&lt;m:abc&gt;</p>
80+
81+
<h3 id="example-581">Example 581</h3>
82+
83+
<p>&lt;foo.bar.baz&gt;</p>
84+
85+
<h3 id="example-582">Example 582</h3>
86+
87+
<p>http://example.com</p>
88+
89+
<h3 id="example-583">Example 583</h3>
90+
91+

test/new/cm_autolinks.md

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
---
2+
gfm: false
3+
mangle: false
4+
---
5+
6+
Here are some valid autolinks:
7+
8+
### Example 565
9+
10+
<http://foo.bar.baz>
11+
12+
### Example 566
13+
14+
<http://foo.bar.baz/test?q=hello&id=22&boolean>
15+
16+
### Example 567
17+
18+
<irc://foo.bar:2233/baz>
19+
20+
### Example 568
21+
22+
Uppercase is also fine:
23+
24+
25+
26+
Note that many strings that count as absolute URIs for purposes of this spec are not valid URIs, because their schemes are not registered or because of other problems with their syntax:
27+
28+
### Example 569
29+
30+
<a+b+c:d>
31+
32+
### Example 570
33+
34+
<made-up-scheme://foo,bar>
35+
36+
### Example 571
37+
38+
<http://../>
39+
40+
### Example 572
41+
42+
<localhost:5001/foo>
43+
44+
### Example 573
45+
46+
Spaces are not allowed in autolinks:
47+
48+
<http://foo.bar/baz bim>
49+
50+
### Example 574
51+
52+
Backslash-escapes do not work inside autolinks:
53+
54+
<http://example.com/\[\>
55+
56+
Examples of email autolinks:
57+
58+
### Example 575
59+
60+
61+
62+
### Example 576
63+
64+
65+
66+
### Example 577
67+
68+
Backslash-escapes do not work inside email autolinks:
69+
70+
<foo\+@bar.example.com>
71+
72+
These are not autolinks:
73+
74+
### Example 578
75+
76+
<>
77+
78+
### Example 579
79+
80+
< http://foo.bar >
81+
82+
### Example 580
83+
84+
<m:abc>
85+
86+
### Example 581
87+
88+
<foo.bar.baz>
89+
90+
### Example 582
91+
92+
http://example.com
93+
94+
### Example 583
95+
96+

test/new/gfm_links.html

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,83 @@
1-
<p>This should be a link:
2-
<a href="http://example.com/hello-world">http://example.com/hello-world</a>.</p>
1+
<p>link with . <a href="http://example.com/hello-world">http://example.com/hello-world</a>.</p>
2+
3+
<p>link with ! <a href="http://example.com/hello-world">http://example.com/hello-world</a>!</p>
4+
5+
<p>link with : <a href="http://example.com/hello-world">http://example.com/hello-world</a>:</p>
6+
7+
<p>link with , <a href="http://example.com/hello-world">http://example.com/hello-world</a>,</p>
8+
9+
<p>link with ; <a href="http://example.com/hello-world">http://example.com/hello-world</a>;</p>
10+
11+
<p>link with ) <a href="http://example.com/hello-world">http://example.com/hello-world</a>)</p>
12+
13+
<p>link with nothing <a href="http://example.com/hello-world">http://example.com/hello-world</a></p>
14+
15+
<h3 id="example-597">Example 597</h3>
16+
17+
<p>The scheme http will be inserted automatically:</p>
18+
19+
<p><a href="http://www.commonmark.org">www.commonmark.org</a></p>
20+
<h3 id="example-598">Example 598</h3>
21+
22+
<p>After a valid domain, zero or more non-space non-&lt; characters may follow:</p>
23+
24+
<p>Visit <a href="http://www.commonmark.org/help">www.commonmark.org/help</a> for more information.</p>
25+
26+
<h3 id="example-599">Example 599</h3>
27+
28+
<p>Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not be considered part of the autolink, though they may be included in the interior of the link:</p>
29+
30+
<p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p>
31+
32+
<p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p>
33+
34+
<h3 id="example-600">Example 600</h3>
35+
36+
<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p>
37+
38+
<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p>
39+
40+
<h3 id="example-601">Example 601</h3>
41+
42+
<p><a href="http://www.google.com/search?q=(business))+ok">www.google.com/search?q=(business))+ok</a></p>
43+
44+
<h3 id="example-602">Example 602</h3>
45+
46+
<p><a href="http://www.google.com/search?q=commonmark&amp;hl=en">www.google.com/search?q=commonmark&amp;hl=en</a></p>
47+
48+
<p><a href="http://www.google.com/search?q=commonmark">www.google.com/search?q=commonmark</a>&amp;</p>
49+
50+
<h3 id="example-603">Example 603</h3>
51+
52+
<p>&lt; immediately ends an autolink.</p>
53+
54+
<p><a href="http://www.commonmark.org/he">www.commonmark.org/he</a>&lt;lp</p>
55+
56+
<h3 id="example-604">Example 604</h3>
57+
58+
<p><a href="http://commonmark.org">http://commonmark.org</a></p>
59+
60+
<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>
61+
62+
<p>Anonymous FTP is available at <a href="ftp://foo.bar.baz">ftp://foo.bar.baz</a>.</p>
63+
64+
<p>Extended email autolinks:</p>
65+
66+
<h3 id="example-605">Example 605</h3>
67+
68+
<p><a href="mailto:[email protected]">[email protected]</a></p>
69+
70+
<h3 id="example-606">Example 606</h3>
71+
72+
<p>hello@mail+xyz.example isn&#39;t valid, but <a href="mailto:[email protected]">[email protected]</a> is.</p>
73+
74+
<h3 id="example-607">Example 607</h3>
75+
76+
<p><a href="mailto:[email protected]">[email protected]</a></p>
77+
78+
<p><a href="mailto:[email protected]">[email protected]</a>.</p>
79+
80+
81+
82+
<pre><code>[email protected]_
83+
</code></pre>

0 commit comments

Comments
 (0)