@@ -124,11 +124,10 @@ class BaseParser
124
124
}
125
125
126
126
module Private
127
- INSTRUCTION_END = /#{ NAME } (\s +.*?)?\? >/um
128
127
TAG_PATTERN = /((?>#{ QNAME_STR } ))\s */um
129
128
CLOSE_PATTERN = /(#{ QNAME_STR } )\s *>/um
130
129
ATTLISTDECL_END = /\s +#{ NAME } (?:#{ ATTDEF } )*\s *>/um
131
- NAME_PATTERN = /\s * #{ NAME } /um
130
+ NAME_PATTERN = /#{ NAME } /um
132
131
GEDECL_PATTERN = "\\ s+#{ NAME } \\ s+#{ ENTITYDEF } \\ s*>"
133
132
PEDECL_PATTERN = "\\ s+(%)\\ s+#{ NAME } \\ s+#{ PEDEF } \\ s*>"
134
133
ENTITYDECL_PATTERN = /(?:#{ GEDECL_PATTERN } )|(?:#{ PEDECL_PATTERN } )/um
@@ -242,7 +241,7 @@ def pull_event
242
241
if @document_status == nil
243
242
start_position = @source . position
244
243
if @source . match ( "<?" , true )
245
- return process_instruction ( start_position )
244
+ return process_instruction
246
245
elsif @source . match ( "<!" , true )
247
246
if @source . match ( "--" , true )
248
247
md = @source . match ( /(.*?)-->/um , true )
@@ -442,7 +441,7 @@ def pull_event
442
441
raise REXML ::ParseException . new ( "Declarations can only occur " +
443
442
"in the doctype declaration." , @source )
444
443
elsif @source . match ( "?" , true )
445
- return process_instruction ( start_position )
444
+ return process_instruction
446
445
else
447
446
# Get the next tag
448
447
md = @source . match ( Private ::TAG_PATTERN , true )
@@ -588,14 +587,14 @@ def need_source_encoding_update?(xml_declaration_encoding)
588
587
def parse_name ( base_error_message )
589
588
md = @source . match ( Private ::NAME_PATTERN , true )
590
589
unless md
591
- if @source . match ( /\s * \ S /um )
590
+ if @source . match ( /\S /um )
592
591
message = "#{ base_error_message } : invalid name"
593
592
else
594
593
message = "#{ base_error_message } : name is missing"
595
594
end
596
595
raise REXML ::ParseException . new ( message , @source )
597
596
end
598
- md [ 1 ]
597
+ md [ 0 ]
599
598
end
600
599
601
600
def parse_id ( base_error_message ,
@@ -664,18 +663,24 @@ def parse_id_invalid_details(accept_external_id:,
664
663
end
665
664
end
666
665
667
- def process_instruction ( start_position )
668
- match_data = @source . match ( Private ::INSTRUCTION_END , true )
669
- unless match_data
670
- message = "Invalid processing instruction node"
671
- @source . position = start_position
672
- raise REXML ::ParseException . new ( message , @source )
666
+ def process_instruction
667
+ name = parse_name ( "Malformed XML: Invalid processing instruction node" )
668
+ if @source . match ( /\s +/um , true )
669
+ match_data = @source . match ( /(.*?)\? >/um , true )
670
+ unless match_data
671
+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction" , @source )
672
+ end
673
+ content = match_data [ 1 ]
674
+ else
675
+ content = nil
676
+ unless @source . match ( "?>" , true )
677
+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction" , @source )
678
+ end
673
679
end
674
- if match_data [ 1 ] == "xml"
680
+ if name == "xml"
675
681
if @document_status
676
682
raise ParseException . new ( "Malformed XML: XML declaration is not at the start" , @source )
677
683
end
678
- content = match_data [ 2 ]
679
684
version = VERSION . match ( content )
680
685
version = version [ 1 ] unless version . nil?
681
686
encoding = ENCODING . match ( content )
@@ -690,7 +695,7 @@ def process_instruction(start_position)
690
695
standalone = standalone [ 1 ] unless standalone . nil?
691
696
return [ :xmldecl , version , encoding , standalone ]
692
697
end
693
- [ :processing_instruction , match_data [ 1 ] , match_data [ 2 ] ]
698
+ [ :processing_instruction , name , content ]
694
699
end
695
700
696
701
def parse_attributes ( prefixes , curr_ns )
0 commit comments