markedjs · davisjam · Apr 16, 2018 · Apr 18, 2018 · Apr 18, 2018 · Apr 18, 2018
diff --git a/lib/marked.js b/lib/marked.js
@@ -4,6 +4,47 @@
  * https://github.com/markedjs/marked
  */
 
+var NEW_TEXT = true;
+
+var doLog = true;
+function log(msg) {
+  if (doLog) {
+    console.log(msg);
+  }
+}
+
+// Return str with all trailing {c | all but c} removed
+// allButC: Default false
+function rtrim(str, c, allButC) {
+  if (typeof allButC === 'undefined') {
+    allButC = false;
+  } else {
+    allButC = true;
+  }
+  var mustMatchC = !allButC;
+
+  if (str.length === 0) {
+    return '';
+  }
+
+  // ix+1 of leftmost that fits description
+  // i.e. the length of the string we should return
+  var curr = str.length;
+
+  while (curr > 0) {
+    var currChar = str.charAt(curr - 1);
+    if (mustMatchC && currChar === c) {
+      curr--;
+    } else if (!mustMatchC && currChar !== c) {
+      curr--;
+    } else {
+      break;
+    }
+  }
+
+  return str.substr(0, curr);
+}
+
 ;(function(root) {
 'use strict';
 
@@ -16,7 +57,8 @@ var block = {
   code: /^( {4}[^\n]+\n*)+/,
   fences: noop,
   hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
-  heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/,
+  // cap[2] might be ' HEADING # ' and must be trimmed appropriately.
+  heading: /^ {0,3}(#{1,6})(?:[^\S\n](.*))?(?:\n+|$)/,
   nptable: noop,
   blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
   list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/,
@@ -92,8 +134,7 @@ block.normal = merge({}, block);
 
 block.gfm = merge({}, block.normal, {
   fences: /^ *(`{3,}|~{3,})[ \.]*(\S+)? *\n([\s\S]*?)\n? *\1 *(?:\n+|$)/,
-  paragraph: /^/,
-  heading: /^ *(#{1,6}) +([^\n]+?) *#* *(?:\n+|$)/
+  paragraph: /^/
 });
 
 block.gfm.paragraph = edit(block.paragraph)
@@ -116,6 +157,7 @@ block.tables = merge({}, block.gfm, {
  */
 
 block.pedantic = merge({}, block.normal, {
+  heading: /^ *(#{1,6})(.*)(?:\n+|$)/,
   html: edit(
     '^ *(?:comment *(?:\\n|\\s*$)'
     + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
@@ -215,7 +257,7 @@ Lexer.prototype.token = function(src, top) {
       this.tokens.push({
         type: 'code',
         text: !this.options.pedantic
-          ? cap.replace(/\n+$/, '')
+          ? rtrim(cap, '\n')
           : cap
       });
       continue;
@@ -235,10 +277,19 @@ Lexer.prototype.token = function(src, top) {
     // heading
     if (cap = this.rules.heading.exec(src)) {
       src = src.substring(cap[0].length);
+      // cap[2] might be ' HEADING # '
+      item = (cap[2] || '').trim();
+      if (this.options.pedantic) {
+        item = rtrim(item, '#');
+      } else {
+        // CM requires a space before additional #s
+        item = item.replace(/(\s|^)#+$/, '');
+      }
+      item = item.trim();
       this.tokens.push({
         type: 'heading',
         depth: cap[1].length,
-        text: cap[2]
+        text: item
       });
       continue;
     }
@@ -510,6 +561,14 @@ var inline = {
   text: /^[\s\S]+?(?=[\\<!\[`*]|\b_| {2,}\n|$)/
 };
 
+if (NEW_TEXT) {
+  // TODO: If we replace ' {2,}\n' with '  \n' and address trailing whitespace,
+  //       we break the definition of GFM inline.breaks further down (affects the gfm_break test).
+  // Furthermore, we still have trouble with the email pattern substituted in: /|[...]+@/, which
+  // is vulnerable to REDOS just like /| {2,}\n/ was
+  inline.text = /[\s\S](?:[\\<!\[`*]|\b_|  \n|$)/;
+}
+
 inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
 
 inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
@@ -776,11 +835,22 @@ InlineLexer.prototype.output = function(src) {
     }
 
     // text
+    log(`lexer: Matching text: ${this.rules.text.source}\n  <${src}>`);
     if (cap = this.rules.text.exec(src)) {
-      src = src.substring(cap[0].length);
-      out += this.renderer.text(escape(this.smartypants(cap[0])));
-      continue;
+      if (NEW_TEXT) {
+        log(`lexer: Match: ${cap} ${cap.index}`);
+        var textLen = cap.index + 1;
+        // text is not in cap[0], so extract text before advancing src.
+        out += this.renderer.text(escape(this.smartypants(src.substr(0, textLen))));
+        src = src.substring(textLen);
+        continue;
+      } else {
+        src = src.substring(cap[0].length);
+        out += this.renderer.text(escape(this.smartypants(cap[0])));
+        continue;
+      }
     }
+    log(`lexer: Mismatch`);
 
     if (src) {
       throw new Error('Infinite loop on byte: ' + src.charCodeAt(0));
@@ -1274,7 +1344,7 @@ function resolveUrl(base, href) {
     if (/^[^:]+:\/*[^/]*$/.test(base)) {
       baseUrls[' ' + base] = base + '/';
     } else {
-      baseUrls[' ' + base] = base.replace(/[^/]*$/, '');
+      baseUrls[' ' + base] = rtrim(base, '/', true);
     }
   }
   base = baseUrls[' ' + base];

diff --git a/test/new/nogfm_hashtag.md b/test/new/nogfm_hashtag.md
@@ -1,5 +1,5 @@
 ---
-gfm: false
+pedantic: true
 ---
 #header