Skip to content

Commit a162531

Browse files
authored
Merge pull request #1736 from Pauan/improving-string-passing
Making passStringToWasm smaller
2 parents fb0bbc0 + d9ae387 commit a162531

File tree

1 file changed

+84
-84
lines changed
  • crates/cli-support/src/js

1 file changed

+84
-84
lines changed

crates/cli-support/src/js/mod.rs

+84-84
Original file line numberDiff line numberDiff line change
@@ -809,8 +809,10 @@ impl<'a> Context<'a> {
809809
if !self.should_write_global("pass_string_to_wasm") {
810810
return Ok(());
811811
}
812+
812813
self.require_internal_export("__wbindgen_malloc")?;
813814
self.expose_wasm_vector_len();
815+
814816
let debug = if self.config.debug {
815817
"
816818
if (typeof(arg) !== 'string') throw new Error('expected a string argument');
@@ -830,10 +832,10 @@ impl<'a> Context<'a> {
830832
"
831833
function passStringToWasm(arg) {{
832834
{}
833-
const size = Buffer.byteLength(arg);
834-
const ptr = wasm.__wbindgen_malloc(size);
835-
getNodeBufferMemory().write(arg, ptr, size);
836-
WASM_VECTOR_LEN = size;
835+
const len = Buffer.byteLength(arg);
836+
const ptr = wasm.__wbindgen_malloc(len);
837+
getNodeBufferMemory().write(arg, ptr, len);
838+
WASM_VECTOR_LEN = len;
837839
return ptr;
838840
}}
839841
",
@@ -844,7 +846,52 @@ impl<'a> Context<'a> {
844846
}
845847

846848
self.expose_text_encoder()?;
849+
850+
// The first implementation we have for this is to use
851+
// `TextEncoder#encode` which has been around for quite some time.
852+
let encode = "function (arg, view) {
853+
const buf = cachedTextEncoder.encode(arg);
854+
view.set(buf);
855+
return {
856+
read: arg.length,
857+
written: buf.length
858+
};
859+
}";
860+
861+
// Another possibility is to use `TextEncoder#encodeInto` which is much
862+
// newer and isn't implemented everywhere yet. It's more efficient,
863+
// however, becaues it allows us to elide an intermediate allocation.
864+
let encode_into = "function (arg, view) {
865+
return cachedTextEncoder.encodeInto(arg, view);
866+
}";
867+
868+
// Looks like `encodeInto` doesn't currently work when the memory passed
869+
// in is backed by a `SharedArrayBuffer`, so force usage of `encode` if
870+
// a `SharedArrayBuffer` is in use.
871+
let shared = self.module.memories.get(self.memory).shared;
872+
873+
match self.config.encode_into {
874+
EncodeInto::Always if !shared => {
875+
self.global(&format!("
876+
const encodeString = {};
877+
", encode_into));
878+
}
879+
EncodeInto::Test if !shared => {
880+
self.global(&format!("
881+
const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
882+
? {}
883+
: {});
884+
", encode_into, encode));
885+
}
886+
_ => {
887+
self.global(&format!("
888+
const encodeString = {};
889+
", encode));
890+
}
891+
}
892+
847893
self.expose_uint8_memory();
894+
self.require_internal_export("__wbindgen_realloc")?;
848895

849896
// A fast path that directly writes char codes into WASM memory as long
850897
// as it finds only ASCII characters.
@@ -855,100 +902,53 @@ impl<'a> Context<'a> {
855902
// This might be not very intuitive, but such calls are usually more
856903
// expensive in mainstream engines than staying in the JS, and
857904
// charCodeAt on ASCII strings is usually optimised to raw bytes.
858-
let start_encoding_as_ascii = format!(
859-
"
860-
{}
861-
let size = arg.length;
862-
let ptr = wasm.__wbindgen_malloc(size);
863-
let offset = 0;
864-
{{
865-
const mem = getUint8Memory();
866-
for (; offset < arg.length; offset++) {{
867-
const code = arg.charCodeAt(offset);
868-
if (code > 0x7F) break;
869-
mem[ptr + offset] = code;
870-
}}
871-
}}
872-
",
873-
debug
874-
);
905+
let encode_as_ascii = "\
906+
let len = arg.length;
907+
let ptr = wasm.__wbindgen_malloc(len);
875908
876-
// The first implementation we have for this is to use
877-
// `TextEncoder#encode` which has been around for quite some time.
878-
let use_encode = format!(
879-
"
880-
{}
881-
if (offset !== arg.length) {{
882-
const buf = cachedTextEncoder.encode(arg.slice(offset));
883-
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + buf.length);
884-
getUint8Memory().set(buf, ptr + offset);
885-
offset += buf.length;
886-
}}
887-
WASM_VECTOR_LEN = offset;
888-
return ptr;
889-
",
890-
start_encoding_as_ascii
891-
);
909+
const mem = getUint8Memory();
892910
893-
// Another possibility is to use `TextEncoder#encodeInto` which is much
894-
// newer and isn't implemented everywhere yet. It's more efficient,
895-
// however, becaues it allows us to elide an intermediate allocation.
896-
let use_encode_into = format!(
897-
"
911+
let offset = 0;
912+
913+
for (; offset < len; offset++) {
914+
const code = arg.charCodeAt(offset);
915+
if (code > 0x7F) break;
916+
mem[ptr + offset] = code;
917+
}
918+
";
919+
920+
// TODO:
921+
// When converting a JS string to UTF-8, the maximum size is `arg.length * 3`,
922+
// so we just allocate that. This wastes memory, so we should investigate
923+
// looping over the string to calculate the precise size, or perhaps using
924+
// `shrink_to_fit` on the Rust side.
925+
self.global(&format!(
926+
"function passStringToWasm(arg) {{
927+
{}
898928
{}
899-
if (offset !== arg.length) {{
900-
arg = arg.slice(offset);
901-
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + arg.length * 3);
902-
const view = getUint8Memory().subarray(ptr + offset, ptr + size);
903-
const ret = cachedTextEncoder.encodeInto(arg, view);
929+
if (offset !== len) {{
930+
if (offset !== 0) {{
931+
arg = arg.slice(offset);
932+
}}
933+
ptr = wasm.__wbindgen_realloc(ptr, len, len = offset + arg.length * 3);
934+
const view = getUint8Memory().subarray(ptr + offset, ptr + len);
935+
const ret = encodeString(arg, view);
904936
{}
905937
offset += ret.written;
906938
}}
939+
907940
WASM_VECTOR_LEN = offset;
908941
return ptr;
909-
",
910-
start_encoding_as_ascii,
942+
}}",
943+
debug,
944+
encode_as_ascii,
911945
if self.config.debug {
912946
"if (ret.read != arg.length) throw new Error('failed to pass whole string');"
913947
} else {
914948
""
915949
},
916-
);
917-
918-
// Looks like `encodeInto` doesn't currently work when the memory passed
919-
// in is backed by a `SharedArrayBuffer`, so force usage of `encode` if
920-
// a `SharedArrayBuffer` is in use.
921-
let shared = self.module.memories.get(self.memory).shared;
950+
));
922951

923-
match self.config.encode_into {
924-
EncodeInto::Always if !shared => {
925-
self.require_internal_export("__wbindgen_realloc")?;
926-
self.global(&format!(
927-
"function passStringToWasm(arg) {{ {} }}",
928-
use_encode_into,
929-
));
930-
}
931-
EncodeInto::Test if !shared => {
932-
self.require_internal_export("__wbindgen_realloc")?;
933-
self.global(&format!(
934-
"
935-
let passStringToWasm;
936-
if (typeof cachedTextEncoder.encodeInto === 'function') {{
937-
passStringToWasm = function(arg) {{ {} }};
938-
}} else {{
939-
passStringToWasm = function(arg) {{ {} }};
940-
}}
941-
",
942-
use_encode_into, use_encode,
943-
));
944-
}
945-
_ => {
946-
self.global(&format!(
947-
"function passStringToWasm(arg) {{ {} }}",
948-
use_encode,
949-
));
950-
}
951-
}
952952
Ok(())
953953
}
954954

0 commit comments

Comments
 (0)