Skip to content

Commit 38b8232

Browse files
authored
Merge pull request #1688 from alexcrichton/llvm-9-threads
Fully update threading support for LLVM 9
2 parents 849c345 + 8cb7924 commit 38b8232

File tree

5 files changed

+136
-66
lines changed

5 files changed

+136
-66
lines changed

azure-pipelines.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ jobs:
166166
steps:
167167
- template: ci/azure-install-rust.yml
168168
parameters:
169-
toolchain: nightly-2019-06-13
169+
toolchain: nightly-2019-07-30
170170
- template: ci/azure-install-sccache.yml
171171
- script: rustup component add rust-src
172172
displayName: "install rust-src"

crates/cli-support/src/lib.rs

+7-12
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ pub struct Bindgen {
3232
// Experimental support for weakrefs, an upcoming ECMAScript feature.
3333
// Currently only enable-able through an env var.
3434
weak_refs: bool,
35-
// Experimental support for the wasm threads proposal, transforms the wasm
36-
// module to be "ready to be instantiated on any thread"
37-
threads: Option<wasm_bindgen_threads_xform::Config>,
35+
// Support for the wasm threads proposal, transforms the wasm module to be
36+
// "ready to be instantiated on any thread"
37+
threads: wasm_bindgen_threads_xform::Config,
3838
anyref: bool,
3939
encode_into: EncodeInto,
4040
}
@@ -286,10 +286,8 @@ impl Bindgen {
286286
);
287287
}
288288

289-
if let Some(cfg) = &self.threads {
290-
cfg.run(&mut module)
291-
.with_context(|_| "failed to prepare module for threading")?;
292-
}
289+
self.threads.run(&mut module)
290+
.with_context(|_| "failed to prepare module for threading")?;
293291

294292
// If requested, turn all mangled symbols into prettier unmangled
295293
// symbols with the help of `rustc-demangle`.
@@ -395,18 +393,15 @@ fn reset_indentation(s: &str) -> String {
395393
// Eventually these will all be CLI options, but while they're unstable features
396394
// they're left as environment variables. We don't guarantee anything about
397395
// backwards-compatibility with these options.
398-
fn threads_config() -> Option<wasm_bindgen_threads_xform::Config> {
399-
if env::var("WASM_BINDGEN_THREADS").is_err() {
400-
return None;
401-
}
396+
fn threads_config() -> wasm_bindgen_threads_xform::Config {
402397
let mut cfg = wasm_bindgen_threads_xform::Config::new();
403398
if let Ok(s) = env::var("WASM_BINDGEN_THREADS_MAX_MEMORY") {
404399
cfg.maximum_memory(s.parse().unwrap());
405400
}
406401
if let Ok(s) = env::var("WASM_BINDGEN_THREADS_STACK_SIZE") {
407402
cfg.thread_stack_size(s.parse().unwrap());
408403
}
409-
Some(cfg)
404+
cfg
410405
}
411406

412407
fn demangle(module: &mut Module) {

crates/threads-xform/src/lib.rs

+120-43
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::cmp;
22
use std::collections::HashMap;
3+
use std::env;
34
use std::mem;
45

56
use failure::{bail, format_err, Error};
@@ -78,9 +79,19 @@ impl Config {
7879
///
7980
/// More and/or less may happen here over time, stay tuned!
8081
pub fn run(&self, module: &mut Module) -> Result<(), Error> {
81-
let stack_pointer = find_stack_pointer(module)?;
82+
// Compatibility with older LLVM outputs. Newer LLVM outputs, when
83+
// atomics are enabled, emit a shared memory. That's a good indicator
84+
// that we have work to do. If shared memory isn't enabled, though then
85+
// this isn't an atomic module so there's nothing to do. We still allow,
86+
// though, an environment variable to force us to go down this path to
87+
// remain compatibile with older LLVM outputs.
8288
let memory = find_memory(module)?;
83-
let addr = inject_thread_id_counter(module, memory)?;
89+
if !module.memories.get(memory).shared && env::var("WASM_BINDGEN_THREADS").is_err() {
90+
return Ok(());
91+
}
92+
93+
let stack_pointer = find_stack_pointer(module)?;
94+
let addr = allocate_static_data(module, memory, 4, 4)?;
8495
let zero = InitExpr::Value(Value::I32(0));
8596
let globals = Globals {
8697
thread_id: module.globals.add_local(ValType::I32, true, zero),
@@ -103,18 +114,11 @@ impl Config {
103114
mem.maximum = Some(cmp::max(self.maximum_memory / PAGE_SIZE, prev_max));
104115
assert!(mem.data_segments.is_empty());
105116

106-
let init_memory = module
107-
.exports
108-
.iter()
109-
.find(|e| e.name == "__wasm_init_memory")
110-
.ok_or_else(|| format_err!("failed to find `__wasm_init_memory`"))?;
111-
let init_memory_id = match init_memory.item {
112-
walrus::ExportItem::Function(f) => f,
113-
_ => bail!("`__wasm_init_memory` must be a function"),
114-
};
115-
let export_id = init_memory.id();
116-
module.exports.delete(export_id);
117-
InitMemory::Call(init_memory_id)
117+
InitMemory::Call {
118+
wasm_init_memory: delete_synthetic_func(module, "__wasm_init_memory")?,
119+
wasm_init_tls: delete_synthetic_func(module, "__wasm_init_tls")?,
120+
tls_size: delete_synthetic_global(module, "__tls_size")?,
121+
}
118122
} else {
119123
update_memory(module, memory, self.maximum_memory)?;
120124
InitMemory::Segments(switch_data_segments_to_passive(module, memory)?)
@@ -127,13 +131,47 @@ impl Config {
127131
stack_pointer,
128132
self.thread_stack_size,
129133
memory,
130-
);
134+
)?;
131135

132136
implement_thread_intrinsics(module, &globals)?;
133137
Ok(())
134138
}
135139
}
136140

141+
fn delete_synthetic_func(module: &mut Module, name: &str) -> Result<FunctionId, Error> {
142+
match delete_synthetic_export(module, name)? {
143+
walrus::ExportItem::Function(f) => Ok(f),
144+
_ => bail!("`{}` must be a function", name),
145+
}
146+
}
147+
148+
fn delete_synthetic_global(module: &mut Module, name: &str) -> Result<u32, Error> {
149+
let id = match delete_synthetic_export(module, name)? {
150+
walrus::ExportItem::Global(g) => g,
151+
_ => bail!("`{}` must be a global", name),
152+
};
153+
let g = match module.globals.get(id).kind {
154+
walrus::GlobalKind::Local(g) => g,
155+
walrus::GlobalKind::Import(_) => bail!("`{}` must not be an imported global", name),
156+
};
157+
match g {
158+
InitExpr::Value(Value::I32(v)) => Ok(v as u32),
159+
_ => bail!("`{}` was not an `i32` constant", name),
160+
}
161+
}
162+
163+
fn delete_synthetic_export(module: &mut Module, name: &str) -> Result<ExportItem, Error> {
164+
let item = module
165+
.exports
166+
.iter()
167+
.find(|e| e.name == name)
168+
.ok_or_else(|| format_err!("failed to find `{}`", name))?;
169+
let ret = item.item;
170+
let id = item.id();
171+
module.exports.delete(id);
172+
Ok(ret)
173+
}
174+
137175
struct PassiveSegment {
138176
id: DataId,
139177
offset: InitExpr,
@@ -211,7 +249,12 @@ struct Globals {
211249
thread_tcb: GlobalId,
212250
}
213251

214-
fn inject_thread_id_counter(module: &mut Module, memory: MemoryId) -> Result<u32, Error> {
252+
fn allocate_static_data(
253+
module: &mut Module,
254+
memory: MemoryId,
255+
size: u32,
256+
align: u32,
257+
) -> Result<u32, Error> {
215258
// First up, look for a `__heap_base` export which is injected by LLD as
216259
// part of the linking process. Note that `__heap_base` should in theory be
217260
// *after* the stack and data, which means it's at the very end of the
@@ -256,9 +299,9 @@ fn inject_thread_id_counter(module: &mut Module, memory: MemoryId) -> Result<u32
256299
GlobalKind::Local(InitExpr::Value(Value::I32(n))) => n,
257300
_ => bail!("`__heap_base` not a locally defined `i32`"),
258301
};
259-
let address = (*offset as u32 + 3) & !3; // align up
260-
let add_a_page = (address + 4) / PAGE_SIZE != address / PAGE_SIZE;
261-
*offset = (address + 4) as i32;
302+
let address = (*offset as u32 + (align - 1)) & !(align - 1); // align up
303+
let add_a_page = (address + size) / PAGE_SIZE != address / PAGE_SIZE;
304+
*offset = (address + size) as i32;
262305
(address, add_a_page)
263306
};
264307

@@ -282,22 +325,32 @@ fn find_stack_pointer(module: &mut Module) -> Result<Option<GlobalId>, Error> {
282325
})
283326
.collect::<Vec<_>>();
284327

285-
match candidates.len() {
286-
// If there are no mutable i32 globals, assume this module doesn't even
287-
// need a stack pointer!
288-
0 => Ok(None),
289-
290-
// If there's more than one global give up for now. Eventually we can
291-
// probably do better by pattern matching on functions, but this should
292-
// be sufficient for LLVM's output for now.
293-
1 => Ok(Some(candidates[0].id())),
294-
_ => bail!("too many mutable globals to infer the stack pointer"),
328+
if candidates.len() == 0 {
329+
return Ok(None);
330+
}
331+
if candidates.len() > 2 {
332+
bail!("too many mutable globals to infer the stack pointer");
333+
}
334+
if candidates.len() == 1 {
335+
return Ok(Some(candidates[0].id()));
295336
}
337+
338+
// If we've got two mutable globals then we're in a pretty standard
339+
// situation for threaded code where one is the stack pointer and one is the
340+
// TLS base offset. We need to figure out which is which, and we basically
341+
// assume LLVM's current codegen where the first is the stack pointer.
342+
//
343+
// TODO: have an actual check here.
344+
Ok(Some(candidates[0].id()))
296345
}
297346

298347
enum InitMemory {
299348
Segments(Vec<PassiveSegment>),
300-
Call(walrus::FunctionId),
349+
Call {
350+
wasm_init_memory: walrus::FunctionId,
351+
wasm_init_tls: walrus::FunctionId,
352+
tls_size: u32,
353+
},
301354
}
302355

303356
fn inject_start(
@@ -308,7 +361,7 @@ fn inject_start(
308361
stack_pointer: Option<GlobalId>,
309362
stack_size: u32,
310363
memory: MemoryId,
311-
) {
364+
) -> Result<(), Error> {
312365
use walrus::ir::*;
313366

314367
assert!(stack_size % PAGE_SIZE == 0);
@@ -376,15 +429,6 @@ fn inject_start(
376429
let sp = block.binop(BinaryOp::I32Add, sp_base, stack_size);
377430
let set_stack_pointer = block.global_set(stack_pointer, sp);
378431
block.expr(set_stack_pointer);
379-
380-
// FIXME(WebAssembly/tool-conventions#117) we probably don't want to
381-
// duplicate drop with `if_zero_block` or otherwise just infer to drop
382-
// all these data segments, this seems like something to synthesize in
383-
// the linker...
384-
for segment in module.data.iter() {
385-
let drop = block.data_drop(segment.id());
386-
block.expr(drop);
387-
}
388432
}
389433
let if_nonzero_block = block.id();
390434
drop(block);
@@ -394,7 +438,7 @@ fn inject_start(
394438
// memory, however, so do that here.
395439
let if_zero_block = {
396440
let mut block = builder.if_else_block(Box::new([]), Box::new([]));
397-
match memory_init {
441+
match &memory_init {
398442
InitMemory::Segments(segments) => {
399443
for segment in segments {
400444
let zero = block.i32_const(0);
@@ -409,8 +453,10 @@ fn inject_start(
409453
block.expr(drop);
410454
}
411455
}
412-
InitMemory::Call(wasm_init_memory) => {
413-
let call = block.call(wasm_init_memory, Box::new([]));
456+
InitMemory::Call {
457+
wasm_init_memory, ..
458+
} => {
459+
let call = block.call(*wasm_init_memory, Box::new([]));
414460
block.expr(call);
415461
}
416462
}
@@ -420,6 +466,23 @@ fn inject_start(
420466
let block = builder.if_else(thread_id_is_nonzero, if_nonzero_block, if_zero_block);
421467
exprs.push(block);
422468

469+
// If we have these globals then we're using the new thread local system
470+
// implemented in LLVM, which means that `__wasm_init_tls` needs to be
471+
// called with a chunk of memory `tls_size` bytes big to set as the threads
472+
// thread-local data block.
473+
if let InitMemory::Call {
474+
wasm_init_tls,
475+
tls_size,
476+
..
477+
} = memory_init
478+
{
479+
let malloc = find_wbindgen_malloc(module)?;
480+
let size = builder.i32_const(tls_size as i32);
481+
let ptr = builder.call(malloc, Box::new([size]));
482+
let block = builder.call(wasm_init_tls, Box::new([ptr]));
483+
exprs.push(block);
484+
}
485+
423486
// If a start function previously existed we're done with our own
424487
// initialization so delegate to them now.
425488
if let Some(id) = module.start.take() {
@@ -432,6 +495,20 @@ fn inject_start(
432495

433496
// ... and finally flag it as the new start function
434497
module.start = Some(id);
498+
499+
Ok(())
500+
}
501+
502+
fn find_wbindgen_malloc(module: &Module) -> Result<FunctionId, Error> {
503+
let e = module
504+
.exports
505+
.iter()
506+
.find(|e| e.name == "__wbindgen_malloc")
507+
.ok_or_else(|| format_err!("failed to find `__wbindgen_malloc`"))?;
508+
match e.item {
509+
walrus::ExportItem::Function(f) => Ok(f),
510+
_ => bail!("`__wbindgen_malloc` wasn't a funtion"),
511+
}
435512
}
436513

437514
fn implement_thread_intrinsics(module: &mut Module, globals: &Globals) -> Result<(), Error> {

examples/raytrace-parallel/Xargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
[dependencies.std]
22
stage = 0
3-
features = ['wasm-bindgen-threads']

examples/raytrace-parallel/build.sh

+8-9
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,15 @@ set -ex
1010
#
1111
# * Next we need to compile everything with the `atomics` feature enabled,
1212
# ensuring that LLVM will generate atomic instructions and such.
13-
RUSTFLAGS='-C target-feature=+atomics' \
13+
RUSTFLAGS='-C target-feature=+atomics,+bulk-memory' \
1414
xargo build --target wasm32-unknown-unknown --release
1515

16-
# Threading support is disabled by default in wasm-bindgen, so use an env var
17-
# here to turn it on for our bindings generation. Also note that webpack isn't
18-
# currently compatible with atomics, so we go with the --no-modules output.
19-
WASM_BINDGEN_THREADS=1 \
20-
cargo run --manifest-path ../../crates/cli/Cargo.toml \
21-
--bin wasm-bindgen -- \
22-
../../target/wasm32-unknown-unknown/release/raytrace_parallel.wasm --out-dir . \
23-
--no-modules
16+
# Note the usage of `--no-modules` here which is used to create an output which
17+
# is usable from Web Workers. We notably can't use `--target bundler` since
18+
# Webpack doesn't have support for atomics yet.
19+
cargo run --manifest-path ../../crates/cli/Cargo.toml \
20+
--bin wasm-bindgen -- \
21+
../../target/wasm32-unknown-unknown/release/raytrace_parallel.wasm --out-dir . \
22+
--no-modules
2423

2524
python3 -m http.server

0 commit comments

Comments
 (0)