Skip to content

Commit 4b170c1

Browse files
authored
Add missing row separator encoding conversion (#69)
The conversion logic is borrowed from ruby/ruby's io.c: https://github.com/ruby/ruby/blob/40391faeab608665da87a05c686c074f91a5a206/io.c#L4059-L4079 Fix #68 Reported by IWAMOTO Kouichi. Thanks!!!
1 parent d4f7db2 commit 4b170c1

File tree

2 files changed

+49
-22
lines changed

2 files changed

+49
-22
lines changed

ext/stringio/stringio.c

+41-22
Original file line numberDiff line numberDiff line change
@@ -1143,38 +1143,57 @@ struct getline_arg {
11431143
};
11441144

11451145
static struct getline_arg *
1146-
prepare_getline_args(struct getline_arg *arg, int argc, VALUE *argv)
1146+
prepare_getline_args(struct StringIO *ptr, struct getline_arg *arg, int argc, VALUE *argv)
11471147
{
1148-
VALUE str, lim, opts;
1148+
VALUE rs, lim, opts;
11491149
long limit = -1;
11501150
int respect_chomp;
11511151

1152-
argc = rb_scan_args(argc, argv, "02:", &str, &lim, &opts);
1153-
respect_chomp = argc == 0 || !NIL_P(str);
1152+
argc = rb_scan_args(argc, argv, "02:", &rs, &lim, &opts);
1153+
respect_chomp = argc == 0 || !NIL_P(rs);
11541154
switch (argc) {
11551155
case 0:
1156-
str = rb_rs;
1156+
rs = rb_rs;
11571157
break;
11581158

11591159
case 1:
1160-
if (!NIL_P(str) && !RB_TYPE_P(str, T_STRING)) {
1161-
VALUE tmp = rb_check_string_type(str);
1160+
if (!NIL_P(rs) && !RB_TYPE_P(rs, T_STRING)) {
1161+
VALUE tmp = rb_check_string_type(rs);
11621162
if (NIL_P(tmp)) {
1163-
limit = NUM2LONG(str);
1164-
str = rb_rs;
1163+
limit = NUM2LONG(rs);
1164+
rs = rb_rs;
11651165
}
11661166
else {
1167-
str = tmp;
1167+
rs = tmp;
11681168
}
11691169
}
11701170
break;
11711171

11721172
case 2:
1173-
if (!NIL_P(str)) StringValue(str);
1173+
if (!NIL_P(rs)) StringValue(rs);
11741174
if (!NIL_P(lim)) limit = NUM2LONG(lim);
11751175
break;
11761176
}
1177-
arg->rs = str;
1177+
if (!NIL_P(rs)) {
1178+
rb_encoding *enc_rs, *enc_io;
1179+
enc_rs = rb_enc_get(rs);
1180+
enc_io = get_enc(ptr);
1181+
if (enc_rs != enc_io &&
1182+
(rb_enc_str_coderange(rs) != ENC_CODERANGE_7BIT ||
1183+
(RSTRING_LEN(rs) > 0 && !rb_enc_asciicompat(enc_io)))) {
1184+
if (rs == rb_rs) {
1185+
rs = rb_enc_str_new(0, 0, enc_io);
1186+
rb_str_buf_cat_ascii(rs, "\n");
1187+
rs = rs;
1188+
}
1189+
else {
1190+
rb_raise(rb_eArgError, "encoding mismatch: %s IO with %s RS",
1191+
rb_enc_name(enc_io),
1192+
rb_enc_name(enc_rs));
1193+
}
1194+
}
1195+
}
1196+
arg->rs = rs;
11781197
arg->limit = limit;
11791198
arg->chomp = 0;
11801199
if (!NIL_P(opts)) {
@@ -1302,15 +1321,15 @@ strio_getline(struct getline_arg *arg, struct StringIO *ptr)
13021321
static VALUE
13031322
strio_gets(int argc, VALUE *argv, VALUE self)
13041323
{
1324+
struct StringIO *ptr = readable(self);
13051325
struct getline_arg arg;
13061326
VALUE str;
13071327

1308-
if (prepare_getline_args(&arg, argc, argv)->limit == 0) {
1309-
struct StringIO *ptr = readable(self);
1328+
if (prepare_getline_args(ptr, &arg, argc, argv)->limit == 0) {
13101329
return rb_enc_str_new(0, 0, get_enc(ptr));
13111330
}
13121331

1313-
str = strio_getline(&arg, readable(self));
1332+
str = strio_getline(&arg, ptr);
13141333
rb_lastline_set(str);
13151334
return str;
13161335
}
@@ -1347,16 +1366,16 @@ static VALUE
13471366
strio_each(int argc, VALUE *argv, VALUE self)
13481367
{
13491368
VALUE line;
1369+
struct StringIO *ptr = readable(self);
13501370
struct getline_arg arg;
13511371

1352-
StringIO(self);
13531372
RETURN_ENUMERATOR(self, argc, argv);
13541373

1355-
if (prepare_getline_args(&arg, argc, argv)->limit == 0) {
1374+
if (prepare_getline_args(ptr, &arg, argc, argv)->limit == 0) {
13561375
rb_raise(rb_eArgError, "invalid limit: 0 for each_line");
13571376
}
13581377

1359-
while (!NIL_P(line = strio_getline(&arg, readable(self)))) {
1378+
while (!NIL_P(line = strio_getline(&arg, ptr))) {
13601379
rb_yield(line);
13611380
}
13621381
return self;
@@ -1374,15 +1393,15 @@ static VALUE
13741393
strio_readlines(int argc, VALUE *argv, VALUE self)
13751394
{
13761395
VALUE ary, line;
1396+
struct StringIO *ptr = readable(self);
13771397
struct getline_arg arg;
13781398

1379-
StringIO(self);
1380-
ary = rb_ary_new();
1381-
if (prepare_getline_args(&arg, argc, argv)->limit == 0) {
1399+
if (prepare_getline_args(ptr, &arg, argc, argv)->limit == 0) {
13821400
rb_raise(rb_eArgError, "invalid limit: 0 for readlines");
13831401
}
13841402

1385-
while (!NIL_P(line = strio_getline(&arg, readable(self)))) {
1403+
ary = rb_ary_new();
1404+
while (!NIL_P(line = strio_getline(&arg, ptr))) {
13861405
rb_ary_push(ary, line);
13871406
}
13881407
return ary;

test/stringio/test_stringio.rb

+8
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,14 @@ def test_gets
8888
assert_string("", Encoding::UTF_8, StringIO.new("foo").gets(0))
8989
end
9090

91+
def test_gets_utf_16
92+
stringio = StringIO.new("line1\nline2\nline3\n".encode("utf-16le"))
93+
assert_equal("line1\n".encode("utf-16le"), stringio.gets)
94+
assert_equal("line2\n".encode("utf-16le"), stringio.gets)
95+
assert_equal("line3\n".encode("utf-16le"), stringio.gets)
96+
assert_nil(stringio.gets)
97+
end
98+
9199
def test_gets_chomp
92100
assert_equal(nil, StringIO.new("").gets(chomp: true))
93101
assert_equal("", StringIO.new("\n").gets(chomp: true))

0 commit comments

Comments
 (0)