4
4
* https://github.com/markedjs/marked
5
5
*/
6
6
7
- var NEW_TEXT = false ;
7
+ var NEW_TEXT = true ;
8
8
9
9
var doLog = false ;
10
10
function log ( msg ) {
@@ -526,15 +526,146 @@ var inline = {
526
526
code : / ^ ( ` + ) \s * ( [ \s \S ] * ?[ ^ ` ] ? ) \s * \1(? ! ` ) / ,
527
527
br : / ^ { 2 , } \n (? ! \s * $ ) / ,
528
528
del : noop ,
529
- text : / ^ [ \s \S ] + ?(? = [ \\ < ! \[ ` * ] | \b _ | { 2 , } \n | $ ) /
529
+ text : / ^ [ \s \S ] + ?(? = [ \\ < ! \[ ` * ] | \b _ | { 2 , } \n | $ ) / // TODO Vulnerable
530
530
} ;
531
531
532
+ // Helper for the offsetOf routines.
533
+ function offsetOfRegex ( str , regex ) {
534
+ var cap = regex . exec ( str ) ;
535
+ if ( cap ) {
536
+ log ( `offsetOfRegex: str ${ str } matches regex ${ regex . source } ` ) ;
537
+ return cap . index ;
538
+ }
539
+ return - 1 ;
540
+ }
541
+
542
+ // Returns earliest offset of "special characters"
543
+ function offsetOfSpecialChars ( str ) {
544
+ return offsetOfRegex ( str , / [ \\ < ! \[ ` * ] / ) ;
545
+ }
546
+
547
+ // Returns earliest offset of a command to italicize
548
+ function offsetOfItalics ( str ) {
549
+ return offsetOfRegex ( str , / \b _ / ) ;
550
+ }
551
+
552
+ // Returns earliest offset of a run of 2+ spaces then a newline
553
+ function offsetOfSpacesThenNewline ( str ) {
554
+ // linear-time implementation of / {2,}\n/
555
+ var spaceRunBegins = - 1 ;
556
+ var nSpaces = 0 ;
557
+ for ( var i = 0 ; i < str . length ; i ++ ) {
558
+ if ( str . charAt ( i ) === ' ' ) {
559
+ if ( nSpaces === 0 ) {
560
+ spaceRunBegins = i ;
561
+ }
562
+ nSpaces ++ ;
563
+ } else if ( 2 <= nSpaces && str . charAt ( i ) === '\n' ) {
564
+ return spaceRunBegins ;
565
+ } else {
566
+ nSpaces = 0 ;
567
+ }
568
+ }
569
+ return - 1 ;
570
+ }
571
+
572
+ // Returns earliest offset of an http protocol
573
+ function offsetOfHTTP ( str ) {
574
+ return offsetOfRegex ( str , / h t t p s ? : \/ \/ / ) ;
575
+ }
576
+
577
+ // Returns earliest offset of an ftp protocol
578
+ function offsetOfFTP ( str ) {
579
+ return offsetOfRegex ( str , / f t p : \/ \/ / ) ;
580
+ }
581
+
582
+ // Returns earliest offset of a www URL
583
+ function offsetOfWWW ( str ) {
584
+ return offsetOfRegex ( str , / w w w \. / ) ;
585
+ }
586
+
587
+ // Returns earliest offset of an email (username + @)
588
+ function offsetOfEmail ( str ) {
589
+ var atSymbolIx = 0 ;
590
+ var emailUsernameChar = / ^ [ a - z A - Z 0 - 9 . ! # $ % & ' * + \/ = ? _ ` { \| } ~ - ] $ / ;
591
+ // Look for email-like things at every '@'.
592
+ while ( 0 < ( atSymbolIx = str . indexOf ( '@' , atSymbolIx ) ) ) {
593
+ // Found an @, work backwards through valid username chars until we run out of string.
594
+ var i = atSymbolIx ;
595
+ while ( 0 < i && emailUsernameChar . exec ( str . charAt ( i - 1 ) ) ) {
596
+ i -- ;
597
+ }
598
+ // If we found any, this looks like an email.
599
+ if ( i < atSymbolIx ) {
600
+ return i ;
601
+ }
602
+ }
603
+
604
+ return - 1 ;
605
+ }
606
+
607
+ // Returns earliest offset of a text break in str, based on an array of textBreakFinders functions
608
+ // textBreakFinders should be a subset of the offsetOfX functions
609
+ // Imitates RegExp.exec
610
+ function offsetOfTextBreak ( str , textBreakFinders ) {
611
+ // Clean code means doing several O(n) operations.
612
+ // A more complex state machine (like a linear-time regex) might test all options
613
+ // in parallel more efficiently, but I don't know how to write one.
614
+ log ( `Looking for tb in \'${ str } \'` ) ;
615
+ if ( str . length === 0 ) {
616
+ return null ;
617
+ }
618
+ var strToSearch = str . substr ( 1 ) ; // Must be at least one character of text before the break.
619
+
620
+ // Find the earliest instance of each kind of text break.
621
+ var textBreaks = textBreakFinders . map ( function ( f ) {
622
+ return f ( strToSearch ) ;
623
+ } ) ;
624
+ log ( `textBreaks: ${ textBreaks } ` ) ;
625
+
626
+ // Pick earliest among them.
627
+ var validTextBreaks = textBreaks . filter ( function ( brk ) {
628
+ return 0 <= brk ;
629
+ } ) ;
630
+
631
+ var earliestBreakOffset ;
632
+ if ( validTextBreaks . length ) {
633
+ // Why doesn't Math.min work here?
634
+ var min = validTextBreaks . reduce ( function ( accum , b ) {
635
+ if ( b < accum ) {
636
+ return b ;
637
+ }
638
+ return accum ;
639
+ } ) ;
640
+ earliestBreakOffset = min + 1 ; // +1 because strToSearch is missing 1st char of str
641
+ } else {
642
+ // No text breaks? Then the whole string is text.
643
+ earliestBreakOffset = str . length ;
644
+ }
645
+
646
+ // Mimic RegExp 'exec' for compatibility.
647
+ var result = { } ;
648
+ result [ 0 ] = str . substr ( 0 , earliestBreakOffset ) ;
649
+ result . index = earliestBreakOffset ;
650
+ log ( `Returning: earliestBreakOffset ${ earliestBreakOffset } result ${ JSON . stringify ( result ) } ` ) ;
651
+ return result ;
652
+ }
653
+
654
+ // Find earliest text break according to the rules of the Inline Lexer.
655
+ // Imitates RegExp.exec
656
+ function offsetOfTextBreakInline ( str ) {
657
+ return offsetOfTextBreak ( str , [ offsetOfSpecialChars , offsetOfItalics , offsetOfSpacesThenNewline ] ) ;
658
+ }
659
+
660
+ // Find earliest text break according to the rules of the Inline GFM Lexer.
661
+ // Imitates RegExp.exec
662
+ function offsetOfTextBreakInlineGFM ( str ) {
663
+ return offsetOfTextBreak ( str , [ offsetOfSpecialChars , offsetOfItalics , offsetOfSpacesThenNewline , offsetOfHTTP , offsetOfFTP , offsetOfWWW , offsetOfEmail ] ) ;
664
+ }
665
+
666
+ // Override vulnerable but readable regex
532
667
if ( NEW_TEXT ) {
533
- // TODO: If we replace ' {2,}\n' with ' \n' and address trailing whitespace,
534
- // we break the definition of GFM inline.breaks further down (affects the gfm_break test).
535
- // Furthermore, we still have trouble with the email pattern substituted in: /|[...]+@/, which
536
- // is vulnerable to REDOS just like /| {2,}\n/ was
537
- inline . text = / [ \s \S ] (?: [ \\ < ! \[ ` * ] | \b _ | { 2 } \n | $ ) / ;
668
+ inline . text = { exec : offsetOfTextBreakInline } ;
538
669
}
539
670
540
671
inline . _escapes = / \\ ( [ ! " # $ % & ' ( ) * + , \- . / : ; < = > ? @ \[ \] \\ ^ _ ` { | } ~ ] ) / g;
@@ -599,10 +730,7 @@ inline.gfm = merge({}, inline.normal, {
599
730
. getRegex ( ) ,
600
731
_backpedal : / (?: [ ^ ? ! . , : ; * _ ~ ( ) & ] + | \( [ ^ ) ] * \) | & (? ! [ a - z A - Z 0 - 9 ] + ; $ ) | [ ? ! . , : ; * _ ~ ) ] + (? ! $ ) ) + / ,
601
732
del : / ^ ~ ~ (? = \S ) ( [ \s \S ] * ?\S ) ~ ~ / ,
602
- text : edit ( inline . text )
603
- . replace ( ']|' , '~]|' )
604
- . replace ( '|' , '|https?://|ftp://|www\\.|[a-zA-Z0-9.!#$%&\'*+/=?^_`{\\|}~-]+@|' )
605
- . getRegex ( )
733
+ text : { exec : offsetOfTextBreakInlineGFM } // TODO Missing: .replace(']|', '~]|')
606
734
} ) ;
607
735
608
736
/**
@@ -611,7 +739,7 @@ inline.gfm = merge({}, inline.normal, {
611
739
612
740
inline . breaks = merge ( { } , inline . gfm , {
613
741
br : edit ( inline . br ) . replace ( '{2,}' , '*' ) . getRegex ( ) ,
614
- text : edit ( inline . gfm . text ) . replace ( '{2,}' , '*' ) . getRegex ( )
742
+ text : { exec : offsetOfTextBreakInlineGFM } // TODO Missing: inline.gfm.text.replace('{2,}', '*')
615
743
} ) ;
616
744
617
745
/**
@@ -803,16 +931,22 @@ InlineLexer.prototype.output = function(src) {
803
931
}
804
932
805
933
// text
806
- log ( `lexer: Matching text: ${ this . rules . text . source } \n <${ src } >` ) ;
934
+ // log(`lexer: Matching text: ${this.rules.text.source}\n <${src}>`);
807
935
if ( cap = this . rules . text . exec ( src ) ) {
808
936
if ( NEW_TEXT ) {
809
- log ( `lexer: Match: ${ cap } ${ cap . index } ` ) ;
937
+ log ( `lexer: Match: ${ JSON . stringify ( cap ) } ${ cap . index } ` ) ;
810
938
var textLen = cap . index + 1 ;
811
939
// text is not in cap[0], so extract text before advancing src.
812
940
out += this . renderer . text ( escape ( this . smartypants ( src . substr ( 0 , textLen ) ) ) ) ;
813
941
src = src . substring ( textLen ) ;
814
942
continue ;
815
943
} else {
944
+ var offInline = offsetOfTextBreakInline ( src ) ;
945
+ var offInlineGFM = offsetOfTextBreakInlineGFM ( src ) ;
946
+ console . log ( `cap ${ JSON . stringify ( cap ) } ` ) ;
947
+ console . log ( `offInline ${ JSON . stringify ( offInline ) } ` ) ;
948
+ console . log ( `offInlineGFM ${ JSON . stringify ( offInlineGFM ) } ` ) ;
949
+ console . log ( `regex ${ cap [ 0 ] . length } offInline ${ offInline [ 0 ] . length } offInlineGFM ${ offInlineGFM [ 0 ] . length } ` ) ;
816
950
src = src . substring ( cap [ 0 ] . length ) ;
817
951
out += this . renderer . text ( escape ( this . smartypants ( cap [ 0 ] ) ) ) ;
818
952
continue ;
@@ -1530,6 +1664,8 @@ marked.defaults = marked.getDefaults();
1530
1664
* Expose
1531
1665
*/
1532
1666
1667
+ marked ( ' # # ####A' ) ;
1668
+
1533
1669
marked . Parser = Parser ;
1534
1670
marked . parser = Parser . parse ;
1535
1671
0 commit comments