Skip to content

Commit 68a1519

Browse files
authored
Merge pull request rustwasm#1612 from Pauan/cache
Initial interning implementation
2 parents 029b8ff + 59af318 commit 68a1519

File tree

12 files changed

+248
-12
lines changed

12 files changed

+248
-12
lines changed

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ spans = ["wasm-bindgen-macro/spans"]
2525
std = []
2626
serde-serialize = ["serde", "serde_json", "std"]
2727
nightly = []
28+
enable-interning = ["std"]
2829

2930
# Whether or not the `#[wasm_bindgen]` macro is strict and generates an error on
3031
# all unused attributes
@@ -38,6 +39,7 @@ xxx_debug_only_print_generated_code = ["wasm-bindgen-macro/xxx_debug_only_print_
3839
wasm-bindgen-macro = { path = "crates/macro", version = "=0.2.48" }
3940
serde = { version = "1.0", optional = true }
4041
serde_json = { version = "1.0", optional = true }
42+
cfg-if = "0.1.9"
4143

4244
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
4345
js-sys = { path = 'crates/js-sys', version = '0.3.25' }

azure-pipelines.yml

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ jobs:
2222
displayName: "Anyref test suite builds"
2323
- script: cargo test --target wasm32-unknown-unknown --features serde-serialize
2424
displayName: "Crate test suite (with serde)"
25+
- script: cargo test --target wasm32-unknown-unknown --features enable-interning
26+
displayName: "Crate test suite (with enable-interning)"
2527
- script: cargo test --target wasm32-unknown-unknown -p no-std
2628
displayName: "Crate test suite (no_std)"
2729
- script: cargo test -p wasm-bindgen-futures

crates/cli-support/src/descriptor.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ tys! {
2424
BOOLEAN
2525
FUNCTION
2626
CLOSURE
27+
CACHED_STRING
2728
STRING
2829
REF
2930
REFMUT
@@ -58,6 +59,7 @@ pub enum Descriptor {
5859
RefMut(Box<Descriptor>),
5960
Slice(Box<Descriptor>),
6061
Vector(Box<Descriptor>),
62+
CachedString,
6163
String,
6264
Anyref,
6365
Enum { hole: u32 },
@@ -127,6 +129,7 @@ impl Descriptor {
127129
SLICE => Descriptor::Slice(Box::new(Descriptor::_decode(data, clamped))),
128130
VECTOR => Descriptor::Vector(Box::new(Descriptor::_decode(data, clamped))),
129131
OPTIONAL => Descriptor::Option(Box::new(Descriptor::_decode(data, clamped))),
132+
CACHED_STRING => Descriptor::CachedString,
130133
STRING => Descriptor::String,
131134
ANYREF => Descriptor::Anyref,
132135
ENUM => Descriptor::Enum { hole: get(data) },
@@ -159,12 +162,12 @@ impl Descriptor {
159162

160163
pub fn vector_kind(&self) -> Option<VectorKind> {
161164
let inner = match *self {
162-
Descriptor::String => return Some(VectorKind::String),
165+
Descriptor::String | Descriptor::CachedString => return Some(VectorKind::String),
163166
Descriptor::Vector(ref d) => &**d,
164167
Descriptor::Slice(ref d) => &**d,
165168
Descriptor::Ref(ref d) => match **d {
166169
Descriptor::Slice(ref d) => &**d,
167-
Descriptor::String => return Some(VectorKind::String),
170+
Descriptor::String | Descriptor::CachedString => return Some(VectorKind::String),
168171
_ => return None,
169172
},
170173
Descriptor::RefMut(ref d) => match **d {

crates/cli-support/src/js/mod.rs

+28
Original file line numberDiff line numberDiff line change
@@ -1088,6 +1088,34 @@ impl<'a> Context<'a> {
10881088
Ok(())
10891089
}
10901090

1091+
fn expose_get_cached_string_from_wasm(&mut self) -> Result<(), Error> {
1092+
if !self.should_write_global("get_cached_string_from_wasm") {
1093+
return Ok(());
1094+
}
1095+
1096+
self.expose_get_object();
1097+
self.expose_get_string_from_wasm()?;
1098+
1099+
// This has support for both `&str` and `Option<&str>`.
1100+
//
1101+
// If `ptr` is not `0` then we know that it's a `&str` or `Some(&str)`, so we just decode it.
1102+
//
1103+
// If `ptr` is `0` then the `len` is a pointer to the cached `JsValue`, so we return that.
1104+
//
1105+
// If `ptr` and `len` are both `0` then that means it's `None`, in that case we rely upon
1106+
// the fact that `getObject(0)` is guaranteed to be `undefined`.
1107+
self.global("
1108+
function getCachedStringFromWasm(ptr, len) {
1109+
if (ptr === 0) {
1110+
return getObject(len);
1111+
} else {
1112+
return getStringFromWasm(ptr, len);
1113+
}
1114+
}
1115+
");
1116+
Ok(())
1117+
}
1118+
10911119
fn expose_get_array_js_value_from_wasm(&mut self) -> Result<(), Error> {
10921120
if !self.should_write_global("get_array_js_value_from_wasm") {
10931121
return Ok(());

crates/cli-support/src/js/outgoing.rs

+37
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,33 @@ impl<'a, 'b> Outgoing<'a, 'b> {
130130
Ok(format!("v{}", i))
131131
}
132132

133+
NonstandardOutgoing::CachedString {
134+
offset,
135+
length,
136+
owned,
137+
optional,
138+
} => {
139+
let ptr = self.arg(*offset);
140+
let len = self.arg(*length);
141+
let tmp = self.js.tmp();
142+
143+
if *optional {
144+
self.js.typescript_optional("string");
145+
} else {
146+
self.js.typescript_required("string");
147+
}
148+
149+
self.cx.expose_get_cached_string_from_wasm()?;
150+
151+
self.js.prelude(&format!("const v{} = getCachedStringFromWasm({}, {});", tmp, ptr, len));
152+
153+
if *owned {
154+
self.prelude_free_cached_string(&ptr, &len)?;
155+
}
156+
157+
Ok(format!("v{}", tmp))
158+
}
159+
133160
NonstandardOutgoing::StackClosure {
134161
a,
135162
b,
@@ -408,4 +435,14 @@ impl<'a, 'b> Outgoing<'a, 'b> {
408435
));
409436
self.cx.require_internal_export("__wbindgen_free")
410437
}
438+
439+
fn prelude_free_cached_string(&mut self, ptr: &str, len: &str) -> Result<(), Error> {
440+
self.js.prelude(&format!(
441+
"if ({ptr} !== 0) {{ wasm.__wbindgen_free({ptr}, {len}); }}",
442+
ptr = ptr,
443+
len = len,
444+
));
445+
446+
self.cx.require_internal_export("__wbindgen_free")
447+
}
411448
}

crates/cli-support/src/webidl/incoming.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ impl IncomingBuilder {
211211
Descriptor::RefMut(d) => self.process_ref(true, d)?,
212212
Descriptor::Option(d) => self.process_option(d)?,
213213

214-
Descriptor::String | Descriptor::Vector(_) => {
214+
Descriptor::String | Descriptor::CachedString | Descriptor::Vector(_) => {
215215
let kind = arg.vector_kind().ok_or_else(|| {
216216
format_err!("unsupported argument type for calling Rust function from JS {:?}", arg)
217217
})? ;
@@ -256,7 +256,7 @@ impl IncomingBuilder {
256256
self.bindings
257257
.push(NonstandardIncoming::BorrowedAnyref { val: expr });
258258
}
259-
Descriptor::String | Descriptor::Slice(_) => {
259+
Descriptor::String | Descriptor::CachedString | Descriptor::Slice(_) => {
260260
let kind = arg.vector_kind().ok_or_else(|| {
261261
format_err!(
262262
"unsupported slice type for calling Rust function from JS {:?}",
@@ -363,7 +363,7 @@ impl IncomingBuilder {
363363
self.webidl.push(ast::WebidlScalarType::Any);
364364
}
365365

366-
Descriptor::String | Descriptor::Vector(_) => {
366+
Descriptor::String | Descriptor::CachedString | Descriptor::Vector(_) => {
367367
let kind = arg.vector_kind().ok_or_else(|| {
368368
format_err!(
369369
"unsupported optional slice type for calling Rust function from JS {:?}",

crates/cli-support/src/webidl/outgoing.rs

+31
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,18 @@ pub enum NonstandardOutgoing {
6060
kind: VectorKind,
6161
},
6262

63+
/// A Rust String (or &str) which might be cached, or might be `None`.
64+
///
65+
/// If `offset` is 0 then it is cached, and the cached JsValue's index is in `length`.
66+
///
67+
/// If `offset` and `length` are both 0, then it is `None`.
68+
CachedString {
69+
offset: u32,
70+
length: u32,
71+
owned: bool,
72+
optional: bool,
73+
},
74+
6375
/// A `&[u64]` or `&[i64]` is being passed to JS, and the 64-bit sizes here
6476
/// aren't supported by WebIDL bindings yet.
6577
View64 {
@@ -240,6 +252,8 @@ impl OutgoingBuilder<'_> {
240252
Descriptor::Ref(d) => self.process_ref(false, d)?,
241253
Descriptor::RefMut(d) => self.process_ref(true, d)?,
242254

255+
Descriptor::CachedString => self.cached_string(false, true),
256+
243257
Descriptor::Vector(_) | Descriptor::String => {
244258
let kind = arg.vector_kind().ok_or_else(|| {
245259
format_err!(
@@ -281,6 +295,7 @@ impl OutgoingBuilder<'_> {
281295
self.bindings
282296
.push(NonstandardOutgoing::BorrowedAnyref { idx });
283297
}
298+
Descriptor::CachedString => self.cached_string(false, false),
284299
Descriptor::Slice(_) | Descriptor::String => {
285300
use wasm_webidl_bindings::ast::WebidlScalarType::*;
286301

@@ -422,6 +437,9 @@ impl OutgoingBuilder<'_> {
422437
}
423438
Descriptor::Ref(d) => self.process_option_ref(false, d)?,
424439
Descriptor::RefMut(d) => self.process_option_ref(true, d)?,
440+
441+
Descriptor::CachedString => self.cached_string(true, true),
442+
425443
Descriptor::String | Descriptor::Vector(_) => {
426444
let kind = arg.vector_kind().ok_or_else(|| {
427445
format_err!(
@@ -455,6 +473,7 @@ impl OutgoingBuilder<'_> {
455473
self.bindings
456474
.push(NonstandardOutgoing::BorrowedAnyref { idx });
457475
}
476+
Descriptor::CachedString => self.cached_string(true, false),
458477
Descriptor::String | Descriptor::Slice(_) => {
459478
let kind = arg.vector_kind().ok_or_else(|| {
460479
format_err!(
@@ -505,6 +524,18 @@ impl OutgoingBuilder<'_> {
505524
.push(NonstandardOutgoing::Standard(binding.into()));
506525
}
507526

527+
fn cached_string(&mut self, optional: bool, owned: bool) {
528+
let offset = self.push_wasm(ValType::I32);
529+
let length = self.push_wasm(ValType::I32);
530+
self.webidl.push(ast::WebidlScalarType::DomString);
531+
self.bindings.push(NonstandardOutgoing::CachedString {
532+
offset,
533+
length,
534+
owned,
535+
optional,
536+
})
537+
}
538+
508539
fn option_native(&mut self, signed: bool, ty: ValType) {
509540
let present = self.push_wasm(ValType::I32);
510541
let val = self.push_wasm(ty);

src/cache/intern.rs

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
use cfg_if::cfg_if;
2+
3+
4+
cfg_if! {
5+
if #[cfg(feature = "enable-interning")] {
6+
use std::thread_local;
7+
use std::string::String;
8+
use std::borrow::ToOwned;
9+
use std::cell::RefCell;
10+
use std::collections::HashMap;
11+
use crate::JsValue;
12+
13+
struct Cache {
14+
entries: RefCell<HashMap<String, JsValue>>,
15+
}
16+
17+
thread_local! {
18+
static CACHE: Cache = Cache {
19+
entries: RefCell::new(HashMap::new()),
20+
};
21+
}
22+
23+
/// This returns the raw index of the cached JsValue, so you must take care
24+
/// so that you don't use it after it is freed.
25+
pub(crate) fn unsafe_get_str(s: &str) -> Option<u32> {
26+
CACHE.with(|cache| {
27+
let cache = cache.entries.borrow();
28+
29+
cache.get(s).map(|x| x.idx)
30+
})
31+
}
32+
33+
fn intern_str(key: &str) {
34+
CACHE.with(|cache| {
35+
let mut cache = cache.entries.borrow_mut();
36+
37+
// Can't use `entry` because `entry` requires a `String`
38+
if !cache.contains_key(key) {
39+
cache.insert(key.to_owned(), JsValue::from(key));
40+
}
41+
})
42+
}
43+
}
44+
}
45+
46+
47+
/// Interns Rust strings so that it's much faster to send them to JS.
48+
///
49+
/// Sending strings from Rust to JS is slow, because it has to do a full `O(n)`
50+
/// copy and *also* encode from UTF-8 to UTF-16. This must be done every single
51+
/// time a string is sent to JS.
52+
///
53+
/// If you are sending the same string multiple times, you can call this `intern`
54+
/// function, which simply returns its argument unchanged:
55+
///
56+
/// ```rust
57+
/// # use wasm_bindgen::intern;
58+
/// intern("foo") // returns "foo"
59+
/// # ;
60+
/// ```
61+
///
62+
/// However, if you enable the `"enable-interning"` feature for wasm-bindgen,
63+
/// then it will add the string into an internal cache.
64+
///
65+
/// When you send that cached string to JS, it will look it up in the cache,
66+
/// which completely avoids the `O(n)` copy and encoding. This has a significant
67+
/// speed boost (as high as 783%)!
68+
///
69+
/// However, there is a small cost to this caching, so you shouldn't cache every
70+
/// string. Only cache strings which have a high likelihood of being sent
71+
/// to JS multiple times.
72+
///
73+
/// Also, keep in mind that this function is a *performance hint*: it's not
74+
/// *guaranteed* that the string will be cached, and the caching strategy
75+
/// might change at any time, so don't rely upon it.
76+
#[inline]
77+
pub fn intern(s: &str) -> &str {
78+
#[cfg(feature = "enable-interning")]
79+
intern_str(s);
80+
81+
s
82+
}

src/cache/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod intern;

src/convert/slices.rs

+27-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::prelude::v1::*;
44
use core::slice;
55
use core::str;
66

7+
use cfg_if::cfg_if;
78
use crate::convert::OptionIntoWasmAbi;
89
use crate::convert::{FromWasmAbi, IntoWasmAbi, RefFromWasmAbi, RefMutFromWasmAbi, WasmAbi};
910

@@ -123,6 +124,24 @@ vectors! {
123124
u8 i8 u16 i16 u32 i32 u64 i64 usize isize f32 f64
124125
}
125126

127+
128+
cfg_if! {
129+
if #[cfg(feature = "enable-interning")] {
130+
#[inline]
131+
fn unsafe_get_cached_str(x: &str) -> Option<WasmSlice> {
132+
// This uses 0 for the ptr as an indication that it is a JsValue and not a str.
133+
crate::cache::intern::unsafe_get_str(x).map(|x| WasmSlice { ptr: 0, len: x })
134+
}
135+
136+
} else {
137+
#[inline]
138+
fn unsafe_get_cached_str(_x: &str) -> Option<WasmSlice> {
139+
None
140+
}
141+
}
142+
}
143+
144+
126145
if_std! {
127146
impl<T> IntoWasmAbi for Vec<T> where Box<[T]>: IntoWasmAbi<Abi = WasmSlice> {
128147
type Abi = <Box<[T]> as IntoWasmAbi>::Abi;
@@ -153,12 +172,14 @@ if_std! {
153172

154173
#[inline]
155174
fn into_abi(self) -> Self::Abi {
156-
self.into_bytes().into_abi()
175+
// This is safe because the JsValue is immediately looked up in the heap and
176+
// then returned, so use-after-free cannot occur.
177+
unsafe_get_cached_str(&self).unwrap_or_else(|| self.into_bytes().into_abi())
157178
}
158179
}
159180

160181
impl OptionIntoWasmAbi for String {
161-
fn none() -> WasmSlice { null_slice() }
182+
fn none() -> Self::Abi { null_slice() }
162183
}
163184

164185
impl FromWasmAbi for String {
@@ -180,12 +201,14 @@ impl<'a> IntoWasmAbi for &'a str {
180201

181202
#[inline]
182203
fn into_abi(self) -> Self::Abi {
183-
self.as_bytes().into_abi()
204+
// This is safe because the JsValue is immediately looked up in the heap and
205+
// then returned, so use-after-free cannot occur.
206+
unsafe_get_cached_str(self).unwrap_or_else(|| self.as_bytes().into_abi())
184207
}
185208
}
186209

187210
impl<'a> OptionIntoWasmAbi for &'a str {
188-
fn none() -> WasmSlice {
211+
fn none() -> Self::Abi {
189212
null_slice()
190213
}
191214
}

0 commit comments

Comments
 (0)