@@ -536,8 +536,8 @@ defmodule Poison.Parser do
536
536
# http://www.ietf.org/rfc/rfc2781.txt
537
537
# http://perldoc.perl.org/Encode/Unicode.html#Surrogate-Pairs
538
538
# http://mathiasbynens.be/notes/javascript-encoding#surrogate-pairs
539
- defguardp is_surrogate ( cp ) when cp in 0xD800 .. 0xDFFF
540
- defguardp is_surrogate_pair ( hi , lo ) when hi in 0xD800 .. 0xDBFF and lo in 0xDC00 .. 0xDFFF
539
+ defguardp is_hi_surrogate ( cp ) when cp in 0xD800 .. 0xDBFF
540
+ defguardp is_lo_surrogate ( cp ) when cp in 0xDC00 .. 0xDFFF
541
541
542
542
defmacrop get_codepoint ( seq , skip ) do
543
543
quote bind_quoted: [ seq: seq , skip: skip ] do
@@ -552,28 +552,36 @@ defmodule Poison.Parser do
552
552
553
553
@ compile { :inline , string_escape_unicode: 5 }
554
554
555
- defp string_escape_unicode ( << "\\ u" , seq2 :: binary - size ( 4 ) , rest :: bits >> , data , skip , acc , seq1 ) do
556
- hi = get_codepoint ( seq1 , skip )
557
- lo = get_codepoint ( seq2 , skip + 6 )
555
+ defp string_escape_unicode ( rest , data , skip , acc , seq1 ) do
556
+ cp1 = get_codepoint ( seq1 , skip )
558
557
559
558
cond do
560
- is_surrogate_pair ( hi , lo ) ->
561
- codepoint = 0x10000 + ( ( hi &&& 0x03FF ) <<< 10 ) + ( lo &&& 0x03FF )
562
- string_continue ( rest , data , skip + 11 , true , 0 , [ acc , codepoint ] )
563
-
564
- is_surrogate ( hi ) ->
565
- raise ParseError , skip: skip , value: "\\ u#{ seq1 } \\ u#{ seq2 } "
559
+ is_hi_surrogate ( cp1 ) -> string_escape_surrogate_pair ( rest , data , skip , acc , seq1 , cp1 )
560
+ is_lo_surrogate ( cp1 ) -> raise ParseError , skip: skip , value: "\\ u#{ seq1 } "
561
+ true -> string_continue ( rest , data , skip + 5 , true , 0 , [ acc , cp1 ] )
562
+ end
563
+ end
566
564
567
- is_surrogate ( lo ) ->
568
- raise ParseError , skip: skip + 6 , value: "\\ u#{ seq2 } "
565
+ @ compile { :inline , string_escape_surrogate_pair: 6 }
569
566
570
- true ->
571
- string_continue ( rest , data , skip + 11 , true , 0 , [ acc , hi , lo ] )
567
+ defp string_escape_surrogate_pair (
568
+ << "\\ u" , seq2 :: binary - size ( 4 ) , rest :: bits >> ,
569
+ data ,
570
+ skip ,
571
+ acc ,
572
+ seq1 ,
573
+ hi
574
+ ) do
575
+ with lo when is_lo_surrogate ( lo ) <- get_codepoint ( seq2 , skip + 6 ) do
576
+ codepoint = 0x10000 + ( ( hi &&& 0x03FF ) <<< 10 ) + ( lo &&& 0x03FF )
577
+ string_continue ( rest , data , skip + 11 , true , 0 , [ acc , codepoint ] )
578
+ else
579
+ _ -> raise ParseError , skip: skip , value: "\\ u#{ seq1 } \\ u#{ seq2 } "
572
580
end
573
581
end
574
582
575
- defp string_escape_unicode ( rest , data , skip , acc , seq1 ) do
576
- string_continue ( rest , data , skip + 5 , true , 0 , [ acc , get_codepoint ( seq1 , skip ) ] )
583
+ defp string_escape_surrogate_pair ( _rest , _data , skip , _acc , seq1 , _hi ) do
584
+ raise ParseError , skip: skip , value: " \\ u #{ seq1 } "
577
585
end
578
586
579
587
## Whitespace
0 commit comments