Skip to content

Commit e0005d8

Browse files
fix: speed up parsing long lists (#2302)
Co-authored-by: Tony Brix <[email protected]>
1 parent a06cec4 commit e0005d8

File tree

8 files changed

+151
-166
lines changed

8 files changed

+151
-166
lines changed

lib/marked.cjs

+36-43
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ var Tokenizer = /*#__PURE__*/function () {
533533
var cap = this.rules.block.list.exec(src);
534534

535535
if (cap) {
536-
var raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine, line, lines, itemContents;
536+
var raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine, line, nextLine, rawLine, itemContents;
537537
var bull = cap[1].trim();
538538
var isordered = bull.length > 1;
539539
var list = {
@@ -551,83 +551,77 @@ var Tokenizer = /*#__PURE__*/function () {
551551
} // Get next list item
552552

553553

554-
var itemRegex = new RegExp("^( {0,3}" + bull + ")((?: [^\\n]*| *)(?:\\n[^\\n]*)*(?:\\n|$))"); // Get each top-level item
554+
var itemRegex = new RegExp("^( {0,3}" + bull + ")((?: [^\\n]*)?(?:\\n|$))"); // Check if current bullet point can start a new List Item
555555

556556
while (src) {
557-
if (this.rules.block.hr.test(src)) {
558-
// End list if we encounter an HR (possibly move into itemRegex?)
557+
if (!(cap = itemRegex.exec(src))) {
559558
break;
560559
}
561560

562-
if (!(cap = itemRegex.exec(src))) {
561+
if (this.rules.block.hr.test(src)) {
562+
// End list if bullet was actually HR (possibly move into itemRegex?)
563563
break;
564564
}
565565

566-
lines = cap[2].split('\n');
566+
raw = cap[0];
567+
src = src.substring(raw.length);
568+
line = cap[2].split('\n', 1)[0];
569+
nextLine = src.split('\n', 1)[0];
567570

568571
if (this.options.pedantic) {
569572
indent = 2;
570-
itemContents = lines[0].trimLeft();
573+
itemContents = line.trimLeft();
571574
} else {
572575
indent = cap[2].search(/[^ ]/); // Find first non-space char
573576

574-
indent = cap[1].length + (indent > 4 ? 1 : indent); // intented code blocks after 4 spaces; indent is always 1
577+
indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent
575578

576-
itemContents = lines[0].slice(indent - cap[1].length);
579+
itemContents = line.slice(indent);
580+
indent += cap[1].length;
577581
}
578582

579583
blankLine = false;
580-
raw = cap[0];
581584

582-
if (!lines[0] && /^ *$/.test(lines[1])) {
583-
// items begin with at most one blank line
584-
raw = cap[1] + lines.slice(0, 2).join('\n') + '\n';
585+
if (!line && /^ *$/.test(nextLine)) {
586+
// Items begin with at most one blank line
587+
raw += nextLine + '\n';
588+
src = src.substring(nextLine.length + 1);
585589
list.loose = true;
586-
lines = [];
587590
}
588591

589-
var nextBulletRegex = new RegExp("^ {0," + Math.min(3, indent - 1) + "}(?:[*+-]|\\d{1,9}[.)])");
592+
var nextBulletRegex = new RegExp("^ {0," + Math.min(3, indent - 1) + "}(?:[*+-]|\\d{1,9}[.)])"); // Check if following lines should be included in List Item
590593

591-
for (i = 1; i < lines.length; i++) {
592-
line = lines[i];
594+
while (src && !list.loose) {
595+
rawLine = src.split('\n', 1)[0];
596+
line = rawLine; // Re-align to follow commonmark nesting rules
593597

594598
if (this.options.pedantic) {
595-
// Re-align to follow commonmark nesting rules
596599
line = line.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
597600
} // End list item if found start of new bullet
598601

599602

600603
if (nextBulletRegex.test(line)) {
601-
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
602604
break;
603-
} // Until we encounter a blank line, item contents do not need indentation
604-
605-
606-
if (!blankLine) {
607-
if (!line.trim()) {
608-
// Check if current line is empty
609-
blankLine = true;
610-
} // Dedent if possible
611-
612-
613-
if (line.search(/[^ ]/) >= indent) {
614-
itemContents += '\n' + line.slice(indent);
615-
} else {
616-
itemContents += '\n' + line;
617-
}
618-
619-
continue;
620-
} // Dedent this line
621-
605+
}
622606

623607
if (line.search(/[^ ]/) >= indent || !line.trim()) {
608+
// Dedent if possible
624609
itemContents += '\n' + line.slice(indent);
625-
continue;
610+
} else if (!blankLine) {
611+
// Until blank line, item doesn't need indentation
612+
itemContents += '\n' + line;
626613
} else {
627-
// Line was not properly indented; end of this item
628-
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
614+
// Otherwise, improper indentation ends this item
629615
break;
630616
}
617+
618+
if (!blankLine && !line.trim()) {
619+
// Check if current line is blank
620+
blankLine = true;
621+
}
622+
623+
raw += rawLine + '\n';
624+
src = src.substring(rawLine.length + 1);
631625
}
632626

633627
if (!list.loose) {
@@ -658,7 +652,6 @@ var Tokenizer = /*#__PURE__*/function () {
658652
text: itemContents
659653
});
660654
list.raw += raw;
661-
src = src.slice(raw.length);
662655
} // Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
663656

664657

@@ -671,7 +664,7 @@ var Tokenizer = /*#__PURE__*/function () {
671664
this.lexer.state.top = false;
672665
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
673666

674-
if (list.items[i].tokens.some(function (t) {
667+
if (!list.loose && list.items[i].tokens.some(function (t) {
675668
return t.type === 'space';
676669
})) {
677670
list.loose = true;

lib/marked.esm.js

+35-39
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ class Tokenizer {
452452
let cap = this.rules.block.list.exec(src);
453453
if (cap) {
454454
let raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine,
455-
line, lines, itemContents;
455+
line, nextLine, rawLine, itemContents;
456456

457457
let bull = cap[1].trim();
458458
const isordered = bull.length > 1;
@@ -473,76 +473,73 @@ class Tokenizer {
473473
}
474474

475475
// Get next list item
476-
const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*| *)(?:\\n[^\\n]*)*(?:\\n|$))`);
476+
const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*)?(?:\\n|$))`);
477477

478-
// Get each top-level item
478+
// Check if current bullet point can start a new List Item
479479
while (src) {
480-
if (this.rules.block.hr.test(src)) { // End list if we encounter an HR (possibly move into itemRegex?)
480+
if (!(cap = itemRegex.exec(src))) {
481481
break;
482482
}
483483

484-
if (!(cap = itemRegex.exec(src))) {
484+
if (this.rules.block.hr.test(src)) { // End list if bullet was actually HR (possibly move into itemRegex?)
485485
break;
486486
}
487487

488-
lines = cap[2].split('\n');
488+
raw = cap[0];
489+
src = src.substring(raw.length);
490+
491+
line = cap[2].split('\n', 1)[0];
492+
nextLine = src.split('\n', 1)[0];
489493

490494
if (this.options.pedantic) {
491495
indent = 2;
492-
itemContents = lines[0].trimLeft();
496+
itemContents = line.trimLeft();
493497
} else {
494498
indent = cap[2].search(/[^ ]/); // Find first non-space char
495-
indent = cap[1].length + (indent > 4 ? 1 : indent); // intented code blocks after 4 spaces; indent is always 1
496-
itemContents = lines[0].slice(indent - cap[1].length);
499+
indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent
500+
itemContents = line.slice(indent);
501+
indent += cap[1].length;
497502
}
498503

499504
blankLine = false;
500-
raw = cap[0];
501505

502-
if (!lines[0] && /^ *$/.test(lines[1])) { // items begin with at most one blank line
503-
raw = cap[1] + lines.slice(0, 2).join('\n') + '\n';
506+
if (!line && /^ *$/.test(nextLine)) { // Items begin with at most one blank line
507+
raw += nextLine + '\n';
508+
src = src.substring(nextLine.length + 1);
504509
list.loose = true;
505-
lines = [];
506510
}
507511

508512
const nextBulletRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])`);
509513

510-
for (i = 1; i < lines.length; i++) {
511-
line = lines[i];
514+
// Check if following lines should be included in List Item
515+
while (src && !list.loose) {
516+
rawLine = src.split('\n', 1)[0];
517+
line = rawLine;
512518

513-
if (this.options.pedantic) { // Re-align to follow commonmark nesting rules
519+
// Re-align to follow commonmark nesting rules
520+
if (this.options.pedantic) {
514521
line = line.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
515522
}
516523

517524
// End list item if found start of new bullet
518525
if (nextBulletRegex.test(line)) {
519-
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
520526
break;
521527
}
522528

523-
// Until we encounter a blank line, item contents do not need indentation
524-
if (!blankLine) {
525-
if (!line.trim()) { // Check if current line is empty
526-
blankLine = true;
527-
}
528-
529-
// Dedent if possible
530-
if (line.search(/[^ ]/) >= indent) {
531-
itemContents += '\n' + line.slice(indent);
532-
} else {
533-
itemContents += '\n' + line;
534-
}
535-
continue;
536-
}
537-
538-
// Dedent this line
539-
if (line.search(/[^ ]/) >= indent || !line.trim()) {
529+
if (line.search(/[^ ]/) >= indent || !line.trim()) { // Dedent if possible
540530
itemContents += '\n' + line.slice(indent);
541-
continue;
542-
} else { // Line was not properly indented; end of this item
543-
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
531+
} else if (!blankLine) { // Until blank line, item doesn't need indentation
532+
itemContents += '\n' + line;
533+
} else { // Otherwise, improper indentation ends this item
544534
break;
545535
}
536+
537+
if (!blankLine && !line.trim()) { // Check if current line is blank
538+
blankLine = true;
539+
}
540+
541+
raw += rawLine + '\n';
542+
src = src.substring(rawLine.length + 1);
546543
}
547544

548545
if (!list.loose) {
@@ -573,7 +570,6 @@ class Tokenizer {
573570
});
574571

575572
list.raw += raw;
576-
src = src.slice(raw.length);
577573
}
578574

579575
// Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
@@ -587,7 +583,7 @@ class Tokenizer {
587583
for (i = 0; i < l; i++) {
588584
this.lexer.state.top = false;
589585
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
590-
if (list.items[i].tokens.some(t => t.type === 'space')) {
586+
if (!list.loose && list.items[i].tokens.some(t => t.type === 'space')) {
591587
list.loose = true;
592588
list.items[i].loose = true;
593589
}

0 commit comments

Comments
 (0)