-
Notifications
You must be signed in to change notification settings - Fork 3.4k
security: finish fixing unsafe heading regex #1226
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
0e07a9f
990b452
5736014
943d995
0cfe39e
29be5f5
4d5cfc7
dd26af8
24d4a5e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,47 @@ | |
* https://github.com/markedjs/marked | ||
*/ | ||
|
||
var NEW_TEXT = true; | ||
|
||
var doLog = true; | ||
function log(msg) { | ||
if (doLog) { | ||
console.log(msg); | ||
} | ||
} | ||
|
||
// Return str with all trailing {c | all but c} removed | ||
// allButC: Default false | ||
function rtrim(str, c, allButC) { | ||
if (typeof allButC === 'undefined') { | ||
allButC = false; | ||
} else { | ||
allButC = true; | ||
} | ||
var mustMatchC = !allButC; | ||
|
||
if (str.length === 0) { | ||
return ''; | ||
} | ||
|
||
// ix+1 of leftmost that fits description | ||
// i.e. the length of the string we should return | ||
var curr = str.length; | ||
|
||
while (curr > 0) { | ||
var currChar = str.charAt(curr - 1); | ||
if (mustMatchC && currChar === c) { | ||
curr--; | ||
} else if (!mustMatchC && currChar !== c) { | ||
curr--; | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
return str.substr(0, curr); | ||
} | ||
|
||
;(function(root) { | ||
'use strict'; | ||
|
||
|
@@ -16,7 +57,8 @@ var block = { | |
code: /^( {4}[^\n]+\n*)+/, | ||
fences: noop, | ||
hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/, | ||
heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/, | ||
// cap[2] might be ' HEADING # ' and must be trimmed appropriately. | ||
heading: /^ {0,3}(#{1,6})(?:[^\S\n](.*))?(?:\n+|$)/, | ||
nptable: noop, | ||
blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, | ||
list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/, | ||
|
@@ -92,8 +134,7 @@ block.normal = merge({}, block); | |
|
||
block.gfm = merge({}, block.normal, { | ||
fences: /^ *(`{3,}|~{3,})[ \.]*(\S+)? *\n([\s\S]*?)\n? *\1 *(?:\n+|$)/, | ||
paragraph: /^/, | ||
heading: /^ *(#{1,6}) +([^\n]+?) *#* *(?:\n+|$)/ | ||
paragraph: /^/ | ||
}); | ||
|
||
block.gfm.paragraph = edit(block.paragraph) | ||
|
@@ -116,6 +157,7 @@ block.tables = merge({}, block.gfm, { | |
*/ | ||
|
||
block.pedantic = merge({}, block.normal, { | ||
heading: /^ *(#{1,6})(.*)(?:\n+|$)/, | ||
html: edit( | ||
'^ *(?:comment *(?:\\n|\\s*$)' | ||
+ '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag | ||
|
@@ -215,7 +257,7 @@ Lexer.prototype.token = function(src, top) { | |
this.tokens.push({ | ||
type: 'code', | ||
text: !this.options.pedantic | ||
? cap.replace(/\n+$/, '') | ||
? rtrim(cap, '\n') | ||
: cap | ||
}); | ||
continue; | ||
|
@@ -235,10 +277,19 @@ Lexer.prototype.token = function(src, top) { | |
// heading | ||
if (cap = this.rules.heading.exec(src)) { | ||
src = src.substring(cap[0].length); | ||
// cap[2] might be ' HEADING # ' | ||
item = (cap[2] || '').trim(); | ||
if (this.options.pedantic) { | ||
item = rtrim(item, '#'); | ||
} else { | ||
// CM requires a space before additional #s | ||
item = item.replace(/(\s|^)#+$/, ''); | ||
} | ||
item = item.trim(); | ||
this.tokens.push({ | ||
type: 'heading', | ||
depth: cap[1].length, | ||
text: cap[2] | ||
text: item | ||
}); | ||
continue; | ||
} | ||
|
@@ -510,6 +561,14 @@ var inline = { | |
text: /^[\s\S]+?(?=[\\<!\[`*]|\b_| {2,}\n|$)/ | ||
}; | ||
|
||
if (NEW_TEXT) { | ||
// TODO: If we replace ' {2,}\n' with ' \n' and address trailing whitespace, | ||
// we break the definition of GFM inline.breaks further down (affects the gfm_break test). | ||
// Furthermore, we still have trouble with the email pattern substituted in: /|[...]+@/, which | ||
// is vulnerable to REDOS just like /| {2,}\n/ was | ||
inline.text = /[\s\S](?:[\\<!\[`*]|\b_| \n|$)/; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking for other thoughts here.
and the We could do a linear pass for each of these options, but @UziTech likes regexes. This particular case seems pretty friendly to a linear pass IMO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is that email regex in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm fine with doing more work in the lexer if it is going to speed things up. Would the fix be to look for an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I sketched an approach in 24d4a5e. I used my typical tactic of "mimic Before I debug it all the way and deal with TODOs like this, I'd like to hear opinions about this approach strategy. As a proof of concept, the PoC from the Sonatype folks is fine after 24d4a5e.
It shows linear growth as string becomes very large, to be expected with any O(n) algorithm. Without 24d4a53 it takes more than 10 seconds to evaluate just the first case. |
||
} | ||
|
||
inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g; | ||
|
||
inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; | ||
|
@@ -776,11 +835,22 @@ InlineLexer.prototype.output = function(src) { | |
} | ||
|
||
// text | ||
log(`lexer: Matching text: ${this.rules.text.source}\n <${src}>`); | ||
if (cap = this.rules.text.exec(src)) { | ||
src = src.substring(cap[0].length); | ||
out += this.renderer.text(escape(this.smartypants(cap[0]))); | ||
continue; | ||
if (NEW_TEXT) { | ||
log(`lexer: Match: ${cap} ${cap.index}`); | ||
var textLen = cap.index + 1; | ||
// text is not in cap[0], so extract text before advancing src. | ||
out += this.renderer.text(escape(this.smartypants(src.substr(0, textLen)))); | ||
src = src.substring(textLen); | ||
continue; | ||
} else { | ||
src = src.substring(cap[0].length); | ||
out += this.renderer.text(escape(this.smartypants(cap[0]))); | ||
continue; | ||
} | ||
} | ||
log(`lexer: Mismatch`); | ||
|
||
if (src) { | ||
throw new Error('Infinite loop on byte: ' + src.charCodeAt(0)); | ||
|
@@ -1274,7 +1344,7 @@ function resolveUrl(base, href) { | |
if (/^[^:]+:\/*[^/]*$/.test(base)) { | ||
baseUrls[' ' + base] = base + '/'; | ||
} else { | ||
baseUrls[' ' + base] = base.replace(/[^/]*$/, ''); | ||
baseUrls[' ' + base] = rtrim(base, '/', true); | ||
} | ||
} | ||
base = baseUrls[' ' + base]; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
--- | ||
gfm: false | ||
pedantic: true | ||
--- | ||
#header | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should there be unit tests for this function?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We would need to export the function. I think that would constitute testing the implementation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @UziTech.