Skip to content

Commit cb46167

Browse files
headiuskou
andauthored
jruby: Implement set_encoding_by_bom (#101)
Fix GH-100 --------- Co-authored-by: Sutou Kouhei <[email protected]>
1 parent 225db02 commit cb46167

File tree

1 file changed

+96
-8
lines changed

1 file changed

+96
-8
lines changed

ext/java/org/jruby/ext/stringio/StringIO.java

+96-8
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@
3333

3434
import org.jcodings.Encoding;
3535
import org.jcodings.specific.ASCIIEncoding;
36+
import org.jcodings.specific.UTF16BEEncoding;
37+
import org.jcodings.specific.UTF16LEEncoding;
38+
import org.jcodings.specific.UTF32BEEncoding;
39+
import org.jcodings.specific.UTF32LEEncoding;
40+
import org.jcodings.specific.UTF8Encoding;
3641
import org.jruby.*;
3742
import org.jruby.anno.FrameField;
3843
import org.jruby.anno.JRubyClass;
@@ -51,8 +56,10 @@
5156
import org.jruby.util.ByteList;
5257
import org.jruby.util.StringSupport;
5358
import org.jruby.util.TypeConverter;
59+
import org.jruby.util.func.ObjectObjectIntFunction;
5460
import org.jruby.util.io.EncodingUtils;
5561
import org.jruby.util.io.Getline;
62+
import org.jruby.util.io.IOEncodable;
5663
import org.jruby.util.io.ModeFlags;
5764
import org.jruby.util.io.OpenFile;
5865

@@ -62,6 +69,7 @@
6269
import java.util.Arrays;
6370
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
6471

72+
import static java.lang.Byte.toUnsignedInt;
6573
import static org.jruby.RubyEnumerator.enumeratorize;
6674
import static org.jruby.runtime.Visibility.PRIVATE;
6775
import static org.jruby.util.RubyStringBuilder.str;
@@ -93,6 +101,10 @@ static class StringIOData {
93101

94102
private static final AtomicReferenceFieldUpdater<StringIOData, Object> LOCKED_UPDATER = AtomicReferenceFieldUpdater.newUpdater(StringIOData.class, Object.class, "owner");
95103

104+
private static final ThreadLocal<Object> VMODE_VPERM_TL = ThreadLocal.withInitial(() -> EncodingUtils.vmodeVperm(null, null));
105+
private static final ThreadLocal<int[]> FMODE_TL = ThreadLocal.withInitial(() -> new int[]{0});
106+
private static final int[] OFLAGS_UNUSED = new int[]{0};
107+
96108
public static RubyClass createStringIOClass(final Ruby runtime) {
97109
RubyClass stringIOClass = runtime.defineClass(
98110
"StringIO", runtime.getObject(), StringIO::new);
@@ -298,12 +310,22 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
298310
Encoding encoding = null;
299311

300312
IRubyObject options = ArgsUtil.getOptionsArg(runtime, maybeOptions);
313+
IOEncodable.ConvConfig ioEncodable = new IOEncodable.ConvConfig();
301314
if (!options.isNil()) {
302315
argc--;
303-
IRubyObject encodingOpt = ArgsUtil.extractKeywordArg(context, "encoding", (RubyHash) options);
304-
if (!encodingOpt.isNil()) {
305-
encoding = EncodingUtils.toEncoding(context, encodingOpt);
306-
}
316+
317+
int[] fmode = {0};
318+
Object vmodeAndVpermP = VMODE_VPERM_TL.get();
319+
320+
// switch to per-use oflags if it is ever used in the future
321+
EncodingUtils.extractModeEncoding(context, ioEncodable, vmodeAndVpermP, options, OFLAGS_UNUSED, FMODE_TL.get());
322+
323+
// clear shared vmodeVperm
324+
EncodingUtils.vmode(vmodeAndVpermP, null);
325+
EncodingUtils.vperm(vmodeAndVpermP, null);
326+
327+
ptr.flags = fmode[0];
328+
encoding = ioEncodable.enc;
307329
}
308330

309331
switch (argc) {
@@ -312,11 +334,11 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
312334
final boolean trunc;
313335
if (mode instanceof RubyFixnum) {
314336
int flags = RubyFixnum.fix2int(mode);
315-
ptr.flags = ModeFlags.getOpenFileFlagsFor(flags);
337+
ptr.flags |= ModeFlags.getOpenFileFlagsFor(flags);
316338
trunc = (flags & ModeFlags.TRUNC) != 0;
317339
} else {
318340
String m = arg1.convertToString().toString();
319-
ptr.flags = OpenFile.ioModestrFmode(runtime, m);
341+
ptr.flags |= OpenFile.ioModestrFmode(runtime, m);
320342
trunc = m.length() > 0 && m.charAt(0) == 'w';
321343
}
322344
string = arg0.convertToString();
@@ -329,11 +351,11 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
329351
break;
330352
case 1:
331353
string = arg0.convertToString();
332-
ptr.flags = string.isFrozen() ? OpenFile.READABLE : OpenFile.READWRITE;
354+
ptr.flags |= string.isFrozen() ? OpenFile.READABLE : OpenFile.READWRITE;
333355
break;
334356
case 0:
335357
string = RubyString.newEmptyString(runtime, runtime.getDefaultExternalEncoding());
336-
ptr.flags = OpenFile.READWRITE;
358+
ptr.flags |= OpenFile.READWRITE;
337359
break;
338360
default:
339361
// should not be possible
@@ -344,6 +366,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
344366
ptr.enc = encoding;
345367
ptr.pos = 0;
346368
ptr.lineno = 0;
369+
if ((ptr.flags & OpenFile.SETENC_BY_BOM) != 0) setEncodingByBOM(context);
347370
// funky way of shifting readwrite flags into object flags
348371
flags |= (ptr.flags & OpenFile.READWRITE) * (STRIO_READABLE / OpenFile.READABLE);
349372
} finally {
@@ -1636,6 +1659,71 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject enc, IRubyObj
16361659
return set_encoding(context, enc);
16371660
}
16381661

1662+
@JRubyMethod
1663+
public IRubyObject set_encoding_by_bom(ThreadContext context) {
1664+
if (setEncodingByBOM(context) == null) return context.nil;
1665+
1666+
return context.runtime.getEncodingService().convertEncodingToRubyEncoding(ptr.enc);
1667+
}
1668+
1669+
private Encoding setEncodingByBOM(ThreadContext context) {
1670+
Encoding enc = detectBOM(context, ptr.string, (ctx, enc2, bomlen) -> {
1671+
ptr.pos = bomlen;
1672+
if (writable()) {
1673+
ptr.string.setEncoding(enc2);
1674+
}
1675+
return enc2;
1676+
});
1677+
ptr.enc = enc;
1678+
return enc;
1679+
}
1680+
1681+
private static Encoding detectBOM(ThreadContext context, RubyString str, ObjectObjectIntFunction<ThreadContext, Encoding, Encoding> callback) {
1682+
int p;
1683+
int len;
1684+
1685+
ByteList byteList = str.getByteList();
1686+
byte[] bytes = byteList.unsafeBytes();
1687+
p = byteList.begin();
1688+
len = byteList.realSize();
1689+
1690+
if (len < 1) return null;
1691+
switch (toUnsignedInt(bytes[p])) {
1692+
case 0xEF:
1693+
if (len < 3) break;
1694+
if (toUnsignedInt(bytes[p + 1]) == 0xBB && toUnsignedInt(bytes[p + 2]) == 0xBF) {
1695+
return callback.apply(context, UTF8Encoding.INSTANCE, 3);
1696+
}
1697+
break;
1698+
1699+
case 0xFE:
1700+
if (len < 2) break;
1701+
if (toUnsignedInt(bytes[p + 1]) == 0xFF) {
1702+
return callback.apply(context, UTF16BEEncoding.INSTANCE, 2);
1703+
}
1704+
break;
1705+
1706+
case 0xFF:
1707+
if (len < 2) break;
1708+
if (toUnsignedInt(bytes[p + 1]) == 0xFE) {
1709+
if (len >= 4 && toUnsignedInt(bytes[p + 2]) == 0 && toUnsignedInt(bytes[p + 3]) == 0) {
1710+
return callback.apply(context, UTF32LEEncoding.INSTANCE, 4);
1711+
}
1712+
return callback.apply(context, UTF16LEEncoding.INSTANCE, 2);
1713+
}
1714+
break;
1715+
1716+
case 0:
1717+
if (len < 4) break;
1718+
if (toUnsignedInt(bytes[p + 1]) == 0 && toUnsignedInt(bytes[p + 2]) == 0xFE && toUnsignedInt(bytes[p + 3]) == 0xFF) {
1719+
return callback.apply(context, UTF32BEEncoding.INSTANCE, 4);
1720+
}
1721+
break;
1722+
}
1723+
return callback.apply(context, null, 0);
1724+
}
1725+
1726+
16391727
@JRubyMethod
16401728
public IRubyObject external_encoding(ThreadContext context) {
16411729
return context.runtime.getEncodingService().convertEncodingToRubyEncoding(getEncoding());

0 commit comments

Comments
 (0)