Skip to content

[commonmark]+[gfm] make autolinks compliant #1034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 24, 2018
45 changes: 31 additions & 14 deletions lib/marked.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ block.list = replace(block.list)
block._tag = '(?!(?:'
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code'
+ '|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo'
+ '|span|br|wbr|ins|del|img)\\b)\\w+(?!:/|[^\\w\\s@]*@)\\b';
+ '|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b';

block.html = replace(block.html)
('comment', /<!--[\s\S]*?-->/)
('closed', /<(tag)[\s\S]+?<\/\1>/)
('closing', /<tag(?:"[^"]*"|'[^']*'|[^'">])*?>/)
('closing', /<tag(?:"[^"]*"|'[^']*'|\s[^'"\/>]*)*?\/?>/)
(/tag/g, block._tag)
();

Expand Down Expand Up @@ -460,9 +460,9 @@ Lexer.prototype.token = function(src, top) {

var inline = {
escape: /^\\([\\`*{}\[\]()#+\-.!_>])/,
autolink: /^<([^ <>]+(@|:\/)[^ <>]+)>/,
autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
url: noop,
tag: /^<!--[\s\S]*?-->|^<\/?\w+(?:"[^"]*"|'[^']*'|[^<'">])*?>/,
tag: /^<!--[\s\S]*?-->|^<\/?[a-zA-Z0-9\-]+(?:"[^"]*"|'[^']*'|\s[^<'">\/]*)*?\/?>/,
link: /^!?\[(inside)\]\(href\)/,
reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/,
nolink: /^!?\[((?:\[[^\]]*\]|\\[\[\]]|[^\[\]])*)\]/,
Expand All @@ -474,6 +474,14 @@ var inline = {
text: /^[\s\S]+?(?=[\\<!\[_*`]| {2,}\n|$)/
};

inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;

inline.autolink = replace(inline.autolink)
('scheme', inline._scheme)
('email', inline._email)
()

inline._inside = /(?:\[[^\]]*\]|\\[\[\]]|[^\[\]]|\](?=[^\[]*\]))*/;
inline._href = /\s*<?([\s\S]*?)>?(?:\s+['"]([\s\S]*?)['"])?\s*/;

Expand Down Expand Up @@ -507,11 +515,14 @@ inline.pedantic = merge({}, inline.normal, {

inline.gfm = merge({}, inline.normal, {
escape: replace(inline.escape)('])', '~|])')(),
url: /^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])/,
url: replace(/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/)
('email', inline._email)
(),
_backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/,
del: /^~~(?=\S)([\s\S]*?\S)~~/,
text: replace(inline.text)
(']|', '~]|')
('|', '|https?://|')
('|', '|https?://|ftp://|www\\.|[a-zA-Z0-9.!#$%&\'*+/=?^_`{\\|}~-]+@|')
()
});

Expand Down Expand Up @@ -589,12 +600,8 @@ InlineLexer.prototype.output = function(src) {
if (cap = this.rules.autolink.exec(src)) {
src = src.substring(cap[0].length);
if (cap[2] === '@') {
text = escape(
cap[1].charAt(6) === ':'
? this.mangle(cap[1].substring(7))
: this.mangle(cap[1])
);
href = this.mangle('mailto:') + text;
text = escape(this.mangle(cap[1]));
href = 'mailto:' + text;
} else {
text = escape(cap[1]);
href = text;
Expand All @@ -605,9 +612,19 @@ InlineLexer.prototype.output = function(src) {

// url (gfm)
if (!this.inLink && (cap = this.rules.url.exec(src))) {
cap[0] = this.rules._backpedal.exec(cap[0])[0];
src = src.substring(cap[0].length);
text = escape(cap[1]);
href = text;
if (cap[2] === '@') {
text = escape(cap[0]);
href = 'mailto:' + text;
} else {
text = escape(cap[0]);
if (cap[1] === 'www.') {
href = 'http://' + text;
} else {
href = text;
}
}
out += this.renderer.link(href, null, text);
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ function fix() {
fs.readdirSync(path.resolve(__dirname, 'original')).forEach(function(file) {
var text = fs.readFileSync(path.resolve(__dirname, 'original', file));

if (file === 'hard_wrapped_paragraphs_with_list_like_lines.md') {
if (path.extname(file) === '.md') {
text = '---\ngfm: false\n---\n' + text;
}

Expand Down
91 changes: 91 additions & 0 deletions test/new/cm_autolinks.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<p>Here are some valid autolinks:</p>

<h3 id="example-565">Example 565</h3>

<p><a href="http://foo.bar.baz">http://foo.bar.baz</a></p>

<h3 id="example-566">Example 566</h3>

<p><a href="http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean">http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean</a></p>

<h3 id="example-567">Example 567</h3>

<p><a href="irc://foo.bar:2233/baz">irc://foo.bar:2233/baz</a></p>

<h3 id="example-568">Example 568</h3>

<p>Uppercase is also fine:</p>

<p><a href="MAILTO:[email protected]">MAILTO:[email protected]</a></p>

<p>Note that many strings that count as absolute URIs for purposes of this spec are not valid URIs, because their schemes are not registered or because of other problems with their syntax:</p>

<h3 id="example-569">Example 569</h3>

<p><a href="a+b+c:d">a+b+c:d</a></p>

<h3 id="example-570">Example 570</h3>

<p><a href="made-up-scheme://foo,bar">made-up-scheme://foo,bar</a></p>

<h3 id="example-571">Example 571</h3>

<p><a href="http://../">http://../</a></p>

<h3 id="example-572">Example 572</h3>

<p><a href="localhost:5001/foo">localhost:5001/foo</a></p>

<h3 id="example-573">Example 573</h3>

<p>Spaces are not allowed in autolinks:</p>

<p>&lt;http://foo.bar/baz bim&gt;</p>

<h3 id="example-574">Example 574</h3>

<p>Backslash-escapes do not work inside autolinks:</p>

<p><a href="http://example.com/%5C%5B%5C">http://example.com/\[\</a></p>

<p>Examples of email autolinks:</p>

<h3 id="example-575">Example 575</h3>

<p><a href="mailto:[email protected]">[email protected]</a></p>

<h3 id="example-576">Example 576</h3>

<p><a href="mailto:[email protected]">[email protected]</a></p>

<h3 id="example-577">Example 577</h3>

<p>Backslash-escapes do not work inside email autolinks:</p>

<p>&lt;[email protected]&gt;</p>

<p>These are not autolinks:</p>

<h3 id="example-578">Example 578</h3>

<p>&lt;&gt;</p>

<h3 id="example-579">Example 579</h3>

<p>&lt; http://foo.bar &gt;</p>

<h3 id="example-580">Example 580</h3>

<p>&lt;m:abc&gt;</p>

<h3 id="example-581">Example 581</h3>

<p>&lt;foo.bar.baz&gt;</p>

<h3 id="example-582">Example 582</h3>

<p>http://example.com</p>

<h3 id="example-583">Example 583</h3>

<p>[email protected]</p>
96 changes: 96 additions & 0 deletions test/new/cm_autolinks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
---
gfm: false
mangle: false
---

Here are some valid autolinks:

### Example 565

<http://foo.bar.baz>

### Example 566

<http://foo.bar.baz/test?q=hello&id=22&boolean>

### Example 567

<irc://foo.bar:2233/baz>

### Example 568

Uppercase is also fine:

<MAILTO:[email protected]>

Note that many strings that count as absolute URIs for purposes of this spec are not valid URIs, because their schemes are not registered or because of other problems with their syntax:

### Example 569

<a+b+c:d>

### Example 570

<made-up-scheme://foo,bar>

### Example 571

<http://../>

### Example 572

<localhost:5001/foo>

### Example 573

Spaces are not allowed in autolinks:

<http://foo.bar/baz bim>

### Example 574

Backslash-escapes do not work inside autolinks:

<http://example.com/\[\>

Examples of email autolinks:

### Example 575

<[email protected]>

### Example 576

<[email protected]>

### Example 577

Backslash-escapes do not work inside email autolinks:

<foo\+@bar.example.com>

These are not autolinks:

### Example 578

<>

### Example 579

< http://foo.bar >

### Example 580

<m:abc>

### Example 581

<foo.bar.baz>

### Example 582

http://example.com

### Example 583

[email protected]
85 changes: 83 additions & 2 deletions test/new/gfm_links.html
Original file line number Diff line number Diff line change
@@ -1,2 +1,83 @@
<p>This should be a link:
<a href="http://example.com/hello-world">http://example.com/hello-world</a>.</p>
<p>link with . <a href="http://example.com/hello-world">http://example.com/hello-world</a>.</p>

<p>link with ! <a href="http://example.com/hello-world">http://example.com/hello-world</a>!</p>

<p>link with : <a href="http://example.com/hello-world">http://example.com/hello-world</a>:</p>

<p>link with , <a href="http://example.com/hello-world">http://example.com/hello-world</a>,</p>

<p>link with ; <a href="http://example.com/hello-world">http://example.com/hello-world</a>;</p>

<p>link with ) <a href="http://example.com/hello-world">http://example.com/hello-world</a>)</p>

<p>link with nothing <a href="http://example.com/hello-world">http://example.com/hello-world</a></p>

<h3 id="example-597">Example 597</h3>

<p>The scheme http will be inserted automatically:</p>

<p><a href="http://www.commonmark.org">www.commonmark.org</a></p>
<h3 id="example-598">Example 598</h3>

<p>After a valid domain, zero or more non-space non-&lt; characters may follow:</p>

<p>Visit <a href="http://www.commonmark.org/help">www.commonmark.org/help</a> for more information.</p>

<h3 id="example-599">Example 599</h3>

<p>Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not be considered part of the autolink, though they may be included in the interior of the link:</p>

<p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p>

<p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p>

<h3 id="example-600">Example 600</h3>

<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p>

<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p>

<h3 id="example-601">Example 601</h3>

<p><a href="http://www.google.com/search?q=(business))+ok">www.google.com/search?q=(business))+ok</a></p>

<h3 id="example-602">Example 602</h3>

<p><a href="http://www.google.com/search?q=commonmark&amp;hl=en">www.google.com/search?q=commonmark&amp;hl=en</a></p>

<p><a href="http://www.google.com/search?q=commonmark">www.google.com/search?q=commonmark</a>&amp;</p>

<h3 id="example-603">Example 603</h3>

<p>&lt; immediately ends an autolink.</p>

<p><a href="http://www.commonmark.org/he">www.commonmark.org/he</a>&lt;lp</p>

<h3 id="example-604">Example 604</h3>

<p><a href="http://commonmark.org">http://commonmark.org</a></p>

<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>

<p>Anonymous FTP is available at <a href="ftp://foo.bar.baz">ftp://foo.bar.baz</a>.</p>

<p>Extended email autolinks:</p>

<h3 id="example-605">Example 605</h3>

<p><a href="mailto:[email protected]">[email protected]</a></p>

<h3 id="example-606">Example 606</h3>

<p>hello@mail+xyz.example isn&#39;t valid, but <a href="mailto:[email protected]">[email protected]</a> is.</p>

<h3 id="example-607">Example 607</h3>

<p><a href="mailto:[email protected]">[email protected]</a></p>

<p><a href="mailto:[email protected]">[email protected]</a>.</p>

<p>[email protected]</p>

<pre><code>[email protected]_
</code></pre>
Loading