Skip to content

Commit 70263e8

Browse files
anonriglemire
andcommitted
Update util.cc
Co-authored-by: Daniel Lemire <[email protected]>
1 parent 34b530b commit 70263e8

File tree

2 files changed

+37
-28
lines changed

2 files changed

+37
-28
lines changed

src/string_bytes.cc

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -419,47 +419,47 @@ Maybe<size_t> StringBytes::StorageSize(Isolate* isolate,
419419
Local<Value> val,
420420
enum encoding encoding) {
421421
HandleScope scope(isolate);
422-
size_t data_size = 0;
423-
bool is_buffer = Buffer::HasInstance(val);
424422

425-
if (is_buffer && (encoding == BUFFER || encoding == LATIN1)) {
423+
if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1)) {
426424
return Just(Buffer::Length(val));
427425
}
428426

429427
Local<String> str;
430428
if (!val->ToString(isolate->GetCurrentContext()).ToLocal(&str))
431429
return Nothing<size_t>();
430+
String::ValueView view(isolate, str);
431+
size_t data_size = 0;
432432

433433
switch (encoding) {
434434
case ASCII:
435435
case LATIN1:
436-
data_size = str->Length();
436+
data_size = view.length();
437437
break;
438438

439439
case BUFFER:
440440
case UTF8:
441441
// A single UCS2 codepoint never takes up more than 3 utf8 bytes.
442442
// It is an exercise for the caller to decide when a string is
443443
// long enough to justify calling Size() instead of StorageSize()
444-
data_size = 3 * str->Length();
444+
data_size = 3 * view.length();
445445
break;
446446

447447
case UCS2:
448-
data_size = str->Length() * sizeof(uint16_t);
448+
data_size = view.length() * sizeof(uint16_t);
449449
break;
450450

451451
case BASE64URL:
452-
data_size = simdutf::base64_length_from_binary(str->Length(),
452+
data_size = simdutf::base64_length_from_binary(view.length(),
453453
simdutf::base64_url);
454454
break;
455455

456456
case BASE64:
457-
data_size = simdutf::base64_length_from_binary(str->Length());
457+
data_size = simdutf::base64_length_from_binary(view.length());
458458
break;
459459

460460
case HEX:
461-
CHECK(str->Length() % 2 == 0 && "invalid hex string length");
462-
data_size = str->Length() / 2;
461+
CHECK(view.length() % 2 == 0 && "invalid hex string length");
462+
data_size = view.length() / 2;
463463
break;
464464

465465
default:
@@ -480,32 +480,36 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate,
480480
Local<String> str;
481481
if (!val->ToString(isolate->GetCurrentContext()).ToLocal(&str))
482482
return Nothing<size_t>();
483+
String::ValueView view(isolate, str);
483484

484485
switch (encoding) {
485486
case ASCII:
486487
case LATIN1:
487-
return Just<size_t>(str->Length());
488+
return Just<size_t>(view.length());
488489

489490
case BUFFER:
490491
case UTF8:
491-
return Just<size_t>(str->Utf8Length(isolate));
492+
if (view.is_one_byte()) {
493+
return Just<size_t>(simdutf::utf8_length_from_latin1(
494+
reinterpret_cast<const char*>(view.data8()), view.length()));
495+
}
496+
return Just<size_t>(simdutf::utf8_length_from_utf16(
497+
reinterpret_cast<const char16_t*>(view.data16()), view.length()));
492498

493499
case UCS2:
494-
return Just(str->Length() * sizeof(uint16_t));
500+
return Just(view.length() * sizeof(uint16_t));
495501

496502
case BASE64URL: {
497-
String::Value value(isolate, str);
498-
return Just(simdutf::base64_length_from_binary(value.length(),
503+
return Just(simdutf::base64_length_from_binary(view.length(),
499504
simdutf::base64_url));
500505
}
501506

502507
case BASE64: {
503-
String::Value value(isolate, str);
504-
return Just(simdutf::base64_length_from_binary(value.length()));
508+
return Just(simdutf::base64_length_from_binary(view.length()));
505509
}
506510

507511
case HEX:
508-
return Just<size_t>(str->Length() / 2);
512+
return Just<size_t>(view.length() / 2);
509513
}
510514

511515
UNREACHABLE();

src/util.cc

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,20 +104,25 @@ static void MakeUtf8String(Isolate* isolate,
104104
if (!value->ToString(isolate->GetCurrentContext()).ToLocal(&string)) return;
105105
String::ValueView value_view(isolate, string);
106106

107+
auto value_length = value_view.length();
108+
107109
if (value_view.is_one_byte()) {
108-
target->AllocateSufficientStorage(value_view.length() + 1);
109-
target->SetLengthAndZeroTerminate(value_view.length());
110-
memcpy(target->out(),
111-
reinterpret_cast<const char*>(value_view.data8()),
112-
value_view.length());
110+
auto const_char = reinterpret_cast<const char*>(value_view.data8());
111+
auto expected_length =
112+
simdutf::utf8_length_from_latin1(const_char, value_length);
113+
114+
// Add +1 for null termination.
115+
target->AllocateSufficientStorage(expected_length + 1);
116+
target->SetLengthAndZeroTerminate(expected_length);
117+
auto actual_length = simdutf::convert_latin1_to_utf8(
118+
const_char, value_length, target->out());
119+
target->SetLength(actual_length);
113120
return;
114121
}
115122

116-
// Add +1 for null termination.
117-
auto storage = simdutf::utf8_length_from_utf16(
118-
reinterpret_cast<const char16_t*>(value_view.data16()),
119-
value_view.length()) +
120-
1;
123+
size_t storage;
124+
if (!StringBytes::StorageSize(isolate, string, UTF8).To(&storage)) return;
125+
storage += 1;
121126
target->AllocateSufficientStorage(storage);
122127

123128
// TODO(@anonrig): Use simdutf to speed up non-one-byte strings once it's

0 commit comments

Comments
 (0)