33
33
34
34
import org .jcodings .Encoding ;
35
35
import org .jcodings .specific .ASCIIEncoding ;
36
+ import org .jcodings .specific .UTF16BEEncoding ;
37
+ import org .jcodings .specific .UTF16LEEncoding ;
38
+ import org .jcodings .specific .UTF32BEEncoding ;
39
+ import org .jcodings .specific .UTF32LEEncoding ;
40
+ import org .jcodings .specific .UTF8Encoding ;
36
41
import org .jruby .*;
37
42
import org .jruby .anno .FrameField ;
38
43
import org .jruby .anno .JRubyClass ;
51
56
import org .jruby .util .ByteList ;
52
57
import org .jruby .util .StringSupport ;
53
58
import org .jruby .util .TypeConverter ;
59
+ import org .jruby .util .func .ObjectObjectIntFunction ;
54
60
import org .jruby .util .io .EncodingUtils ;
55
61
import org .jruby .util .io .Getline ;
62
+ import org .jruby .util .io .IOEncodable ;
56
63
import org .jruby .util .io .ModeFlags ;
57
64
import org .jruby .util .io .OpenFile ;
58
65
62
69
import java .util .Arrays ;
63
70
import java .util .concurrent .atomic .AtomicReferenceFieldUpdater ;
64
71
72
+ import static java .lang .Byte .toUnsignedInt ;
65
73
import static org .jruby .RubyEnumerator .enumeratorize ;
66
74
import static org .jruby .runtime .Visibility .PRIVATE ;
67
75
import static org .jruby .util .RubyStringBuilder .str ;
@@ -93,6 +101,10 @@ static class StringIOData {
93
101
94
102
private static final AtomicReferenceFieldUpdater <StringIOData , Object > LOCKED_UPDATER = AtomicReferenceFieldUpdater .newUpdater (StringIOData .class , Object .class , "owner" );
95
103
104
+ private static final ThreadLocal <Object > VMODE_VPERM_TL = ThreadLocal .withInitial (() -> EncodingUtils .vmodeVperm (null , null ));
105
+ private static final ThreadLocal <int []> FMODE_TL = ThreadLocal .withInitial (() -> new int []{0 });
106
+ private static final int [] OFLAGS_UNUSED = new int []{0 };
107
+
96
108
public static RubyClass createStringIOClass (final Ruby runtime ) {
97
109
RubyClass stringIOClass = runtime .defineClass (
98
110
"StringIO" , runtime .getObject (), StringIO ::new );
@@ -298,12 +310,22 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
298
310
Encoding encoding = null ;
299
311
300
312
IRubyObject options = ArgsUtil .getOptionsArg (runtime , maybeOptions );
313
+ IOEncodable .ConvConfig ioEncodable = new IOEncodable .ConvConfig ();
301
314
if (!options .isNil ()) {
302
315
argc --;
303
- IRubyObject encodingOpt = ArgsUtil .extractKeywordArg (context , "encoding" , (RubyHash ) options );
304
- if (!encodingOpt .isNil ()) {
305
- encoding = EncodingUtils .toEncoding (context , encodingOpt );
306
- }
316
+
317
+ int [] fmode = {0 };
318
+ Object vmodeAndVpermP = VMODE_VPERM_TL .get ();
319
+
320
+ // switch to per-use oflags if it is ever used in the future
321
+ EncodingUtils .extractModeEncoding (context , ioEncodable , vmodeAndVpermP , options , OFLAGS_UNUSED , FMODE_TL .get ());
322
+
323
+ // clear shared vmodeVperm
324
+ EncodingUtils .vmode (vmodeAndVpermP , null );
325
+ EncodingUtils .vperm (vmodeAndVpermP , null );
326
+
327
+ ptr .flags = fmode [0 ];
328
+ encoding = ioEncodable .enc ;
307
329
}
308
330
309
331
switch (argc ) {
@@ -312,11 +334,11 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
312
334
final boolean trunc ;
313
335
if (mode instanceof RubyFixnum ) {
314
336
int flags = RubyFixnum .fix2int (mode );
315
- ptr .flags = ModeFlags .getOpenFileFlagsFor (flags );
337
+ ptr .flags | = ModeFlags .getOpenFileFlagsFor (flags );
316
338
trunc = (flags & ModeFlags .TRUNC ) != 0 ;
317
339
} else {
318
340
String m = arg1 .convertToString ().toString ();
319
- ptr .flags = OpenFile .ioModestrFmode (runtime , m );
341
+ ptr .flags | = OpenFile .ioModestrFmode (runtime , m );
320
342
trunc = m .length () > 0 && m .charAt (0 ) == 'w' ;
321
343
}
322
344
string = arg0 .convertToString ();
@@ -329,11 +351,11 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
329
351
break ;
330
352
case 1 :
331
353
string = arg0 .convertToString ();
332
- ptr .flags = string .isFrozen () ? OpenFile .READABLE : OpenFile .READWRITE ;
354
+ ptr .flags | = string .isFrozen () ? OpenFile .READABLE : OpenFile .READWRITE ;
333
355
break ;
334
356
case 0 :
335
357
string = RubyString .newEmptyString (runtime , runtime .getDefaultExternalEncoding ());
336
- ptr .flags = OpenFile .READWRITE ;
358
+ ptr .flags | = OpenFile .READWRITE ;
337
359
break ;
338
360
default :
339
361
// should not be possible
@@ -344,6 +366,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
344
366
ptr .enc = encoding ;
345
367
ptr .pos = 0 ;
346
368
ptr .lineno = 0 ;
369
+ if ((ptr .flags & OpenFile .SETENC_BY_BOM ) != 0 ) setEncodingByBOM (context );
347
370
// funky way of shifting readwrite flags into object flags
348
371
flags |= (ptr .flags & OpenFile .READWRITE ) * (STRIO_READABLE / OpenFile .READABLE );
349
372
} finally {
@@ -1636,6 +1659,71 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject enc, IRubyObj
1636
1659
return set_encoding (context , enc );
1637
1660
}
1638
1661
1662
+ @ JRubyMethod
1663
+ public IRubyObject set_encoding_by_bom (ThreadContext context ) {
1664
+ if (setEncodingByBOM (context ) == null ) return context .nil ;
1665
+
1666
+ return context .runtime .getEncodingService ().convertEncodingToRubyEncoding (ptr .enc );
1667
+ }
1668
+
1669
+ private Encoding setEncodingByBOM (ThreadContext context ) {
1670
+ Encoding enc = detectBOM (context , ptr .string , (ctx , enc2 , bomlen ) -> {
1671
+ ptr .pos = bomlen ;
1672
+ if (writable ()) {
1673
+ ptr .string .setEncoding (enc2 );
1674
+ }
1675
+ return enc2 ;
1676
+ });
1677
+ ptr .enc = enc ;
1678
+ return enc ;
1679
+ }
1680
+
1681
+ private static Encoding detectBOM (ThreadContext context , RubyString str , ObjectObjectIntFunction <ThreadContext , Encoding , Encoding > callback ) {
1682
+ int p ;
1683
+ int len ;
1684
+
1685
+ ByteList byteList = str .getByteList ();
1686
+ byte [] bytes = byteList .unsafeBytes ();
1687
+ p = byteList .begin ();
1688
+ len = byteList .realSize ();
1689
+
1690
+ if (len < 1 ) return null ;
1691
+ switch (toUnsignedInt (bytes [p ])) {
1692
+ case 0xEF :
1693
+ if (len < 3 ) break ;
1694
+ if (toUnsignedInt (bytes [p + 1 ]) == 0xBB && toUnsignedInt (bytes [p + 2 ]) == 0xBF ) {
1695
+ return callback .apply (context , UTF8Encoding .INSTANCE , 3 );
1696
+ }
1697
+ break ;
1698
+
1699
+ case 0xFE :
1700
+ if (len < 2 ) break ;
1701
+ if (toUnsignedInt (bytes [p + 1 ]) == 0xFF ) {
1702
+ return callback .apply (context , UTF16BEEncoding .INSTANCE , 2 );
1703
+ }
1704
+ break ;
1705
+
1706
+ case 0xFF :
1707
+ if (len < 2 ) break ;
1708
+ if (toUnsignedInt (bytes [p + 1 ]) == 0xFE ) {
1709
+ if (len >= 4 && toUnsignedInt (bytes [p + 2 ]) == 0 && toUnsignedInt (bytes [p + 3 ]) == 0 ) {
1710
+ return callback .apply (context , UTF32LEEncoding .INSTANCE , 4 );
1711
+ }
1712
+ return callback .apply (context , UTF16LEEncoding .INSTANCE , 2 );
1713
+ }
1714
+ break ;
1715
+
1716
+ case 0 :
1717
+ if (len < 4 ) break ;
1718
+ if (toUnsignedInt (bytes [p + 1 ]) == 0 && toUnsignedInt (bytes [p + 2 ]) == 0xFE && toUnsignedInt (bytes [p + 3 ]) == 0xFF ) {
1719
+ return callback .apply (context , UTF32BEEncoding .INSTANCE , 4 );
1720
+ }
1721
+ break ;
1722
+ }
1723
+ return callback .apply (context , null , 0 );
1724
+ }
1725
+
1726
+
1639
1727
@ JRubyMethod
1640
1728
public IRubyObject external_encoding (ThreadContext context ) {
1641
1729
return context .runtime .getEncodingService ().convertEncodingToRubyEncoding (getEncoding ());
0 commit comments