Skip to content

Commit 92c2e0e

Browse files
committed
More improvements to the passStringToWasm function
1 parent 5581cdf commit 92c2e0e

File tree

1 file changed

+81
-101
lines changed
  • crates/cli-support/src/js

1 file changed

+81
-101
lines changed

crates/cli-support/src/js/mod.rs

+81-101
Original file line numberDiff line numberDiff line change
@@ -805,36 +805,14 @@ impl<'a> Context<'a> {
805805
self.global("let WASM_VECTOR_LEN = 0;");
806806
}
807807

808-
fn expose_encode_as_ascii(&mut self) {
809-
if !self.should_write_global("encode_as_ascii") {
810-
return;
811-
}
812-
813-
self.expose_uint8_memory();
814-
815-
self.global("
816-
function encodeAsAscii(arg, ptr, len) {
817-
let offset = 0;
818-
819-
const mem = getUint8Memory();
820-
821-
for (; offset < len; offset++) {
822-
const code = arg.charCodeAt(offset);
823-
if (code > 0x7F) break;
824-
mem[ptr + offset] = code;
825-
}
826-
827-
return offset;
828-
}
829-
");
830-
}
831-
832808
fn expose_pass_string_to_wasm(&mut self) -> Result<(), Error> {
833809
if !self.should_write_global("pass_string_to_wasm") {
834810
return Ok(());
835811
}
812+
836813
self.require_internal_export("__wbindgen_malloc")?;
837814
self.expose_wasm_vector_len();
815+
838816
let debug = if self.config.debug {
839817
"
840818
if (typeof(arg) !== 'string') throw new Error('expected a string argument');
@@ -854,10 +832,10 @@ impl<'a> Context<'a> {
854832
"
855833
function passStringToWasm(arg) {{
856834
{}
857-
const size = Buffer.byteLength(arg);
858-
const ptr = wasm.__wbindgen_malloc(size);
859-
getNodeBufferMemory().write(arg, ptr, size);
860-
WASM_VECTOR_LEN = size;
835+
const len = Buffer.byteLength(arg);
836+
const ptr = wasm.__wbindgen_malloc(len);
837+
getNodeBufferMemory().write(arg, ptr, len);
838+
WASM_VECTOR_LEN = len;
861839
return ptr;
862840
}}
863841
",
@@ -868,9 +846,52 @@ impl<'a> Context<'a> {
868846
}
869847

870848
self.expose_text_encoder()?;
871-
self.expose_uint8_memory();
872849

873-
self.expose_encode_as_ascii();
850+
// The first implementation we have for this is to use
851+
// `TextEncoder#encode` which has been around for quite some time.
852+
let encode = "function (arg, view) {
853+
const buf = cachedTextEncoder.encode(arg);
854+
view.set(buf);
855+
return {
856+
read: arg.length,
857+
written: buf.length
858+
};
859+
}";
860+
861+
// Another possibility is to use `TextEncoder#encodeInto` which is much
862+
// newer and isn't implemented everywhere yet. It's more efficient,
863+
// however, becaues it allows us to elide an intermediate allocation.]
864+
let encode_into = "function (arg, view) {
865+
return cachedTextEncoder.encodeInto(arg, view);
866+
}";
867+
868+
// Looks like `encodeInto` doesn't currently work when the memory passed
869+
// in is backed by a `SharedArrayBuffer`, so force usage of `encode` if
870+
// a `SharedArrayBuffer` is in use.
871+
let shared = self.module.memories.get(self.memory).shared;
872+
873+
match self.config.encode_into {
874+
EncodeInto::Always if !shared => {
875+
self.global(&format!("
876+
const encodeString = {};
877+
", encode_into));
878+
}
879+
EncodeInto::Test if !shared => {
880+
self.global(&format!("
881+
const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
882+
? {}
883+
: {});
884+
", encode_into, encode));
885+
}
886+
_ => {
887+
self.global(&format!("
888+
const encodeString = {};
889+
", encode));
890+
}
891+
}
892+
893+
self.expose_uint8_memory();
894+
self.require_internal_export("__wbindgen_realloc")?;
874895

875896
// A fast path that directly writes char codes into WASM memory as long
876897
// as it finds only ASCII characters.
@@ -881,94 +902,53 @@ impl<'a> Context<'a> {
881902
// This might be not very intuitive, but such calls are usually more
882903
// expensive in mainstream engines than staying in the JS, and
883904
// charCodeAt on ASCII strings is usually optimised to raw bytes.
884-
let start_encoding_as_ascii = format!(
885-
"\
886-
{}
887-
const len = arg.length;
888-
let ptr = wasm.__wbindgen_malloc(len);
889-
const offset = encodeAsAscii(arg, ptr, len);
890-
",
891-
debug
892-
);
905+
let encode_as_ascii = "\
906+
let len = arg.length;
907+
let ptr = wasm.__wbindgen_malloc(len);
893908
894-
// The first implementation we have for this is to use
895-
// `TextEncoder#encode` which has been around for quite some time.
896-
let use_encode = format!(
897-
"\
898-
{}
899-
if (offset !== len) {{
900-
if (offset !== 0) {{
901-
arg = arg.slice(offset);
902-
}}
903-
const buf = cachedTextEncoder.encode(arg);
904-
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + buf.length);
905-
getUint8Memory().set(buf, ptr + offset);
906-
offset += buf.length;
907-
}}
908-
WASM_VECTOR_LEN = offset;
909-
return ptr;
910-
",
911-
start_encoding_as_ascii
912-
);
909+
const mem = getUint8Memory();
913910
914-
// Another possibility is to use `TextEncoder#encodeInto` which is much
915-
// newer and isn't implemented everywhere yet. It's more efficient,
916-
// however, becaues it allows us to elide an intermediate allocation.
917-
let use_encode_into = format!(
918-
"\
911+
let offset = 0;
912+
913+
for (; offset < len; offset++) {
914+
const code = arg.charCodeAt(offset);
915+
if (code > 0x7F) break;
916+
mem[ptr + offset] = code;
917+
}
918+
";
919+
920+
// TODO:
921+
// When converting a JS string to UTF-8, the maximum size is `arg.length * 3`,
922+
// so we just allocate that. This wastes memory, so we should investigate
923+
// looping over the string to calculate the precise size, or perhaps using
924+
// `shrink_to_fit` on the Rust side.
925+
self.global(&format!(
926+
"function passStringToWasm(arg) {{
927+
{}
919928
{}
920929
if (offset !== len) {{
921930
if (offset !== 0) {{
922931
arg = arg.slice(offset);
923932
}}
924-
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + len * 3);
925-
const view = getUint8Memory().subarray(ptr + offset, ptr + size);
926-
const ret = cachedTextEncoder.encodeInto(arg, view);
933+
ptr = wasm.__wbindgen_realloc(ptr, len, len = offset + arg.length * 3);
934+
const view = getUint8Memory().subarray(ptr + offset, ptr + len);
935+
const ret = encodeString(arg, view);
927936
{}
928937
offset += ret.written;
929938
}}
939+
930940
WASM_VECTOR_LEN = offset;
931941
return ptr;
932-
",
933-
start_encoding_as_ascii,
942+
}}",
943+
debug,
944+
encode_as_ascii,
934945
if self.config.debug {
935-
"if (ret.read != len) throw new Error('failed to pass whole string');"
946+
"if (ret.read != arg.length) throw new Error('failed to pass whole string');"
936947
} else {
937948
""
938949
},
939-
);
940-
941-
// Looks like `encodeInto` doesn't currently work when the memory passed
942-
// in is backed by a `SharedArrayBuffer`, so force usage of `encode` if
943-
// a `SharedArrayBuffer` is in use.
944-
let shared = self.module.memories.get(self.memory).shared;
950+
));
945951

946-
match self.config.encode_into {
947-
EncodeInto::Always if !shared => {
948-
self.require_internal_export("__wbindgen_realloc")?;
949-
self.global(&format!(
950-
"function passStringToWasm(arg) {{ {} }}",
951-
use_encode_into,
952-
));
953-
}
954-
EncodeInto::Test if !shared => {
955-
self.require_internal_export("__wbindgen_realloc")?;
956-
self.global(&format!(
957-
"
958-
const passStringToWasm = (typeof cachedTextEncoder.encodeInto === 'function'
959-
? function (arg) {{ {} }}
960-
: function (arg) {{ {} }});
961-
",
962-
use_encode_into, use_encode,
963-
));
964-
}
965-
_ => {
966-
self.global(&format!(
967-
"function passStringToWasm(arg) {{ {} }}",
968-
use_encode,
969-
));
970-
}
971-
}
972952
Ok(())
973953
}
974954

0 commit comments

Comments
 (0)