Skip to content

Commit fc2fd95

Browse files
fix: handle Queue::submit non-fatally (#6318)
* Change the signature of `wgpu_core::Global::queue_submit` to return a `(SubmissionIndex, …)` in addition to its current error type. * Change the control flow of errors in `Queue::submit` to break to the end of a block. This is similar to what we already do in many APIs in `wgpu_core`. * Hoist the scope of the local `submit_index` binding so it can be used at the point where we need to convert current error paths to also return the submission index. Later, we will likely want to avoid actually retrieving a new submission index so we can minimize the critical section of code. We'll need to figure out a strategy for returning a valid (but not necessarily unique) index in the case of failures that prevent successful submission.
1 parent 859dd88 commit fc2fd95

File tree

5 files changed

+115
-24
lines changed

5 files changed

+115
-24
lines changed

deno_webgpu/queue.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ pub fn op_webgpu_queue_submit(
4444
})
4545
.collect::<Result<Vec<_>, AnyError>>()?;
4646

47-
let maybe_err = instance.queue_submit(queue, &ids).err();
47+
let maybe_err = instance.queue_submit(queue, &ids).err().map(|(_idx, e)| e);
4848

4949
for rid in command_buffers {
5050
let resource = state.resource_table.take::<WebGpuCommandBuffer>(rid)?;

tests/tests/regression/issue_6317.rs

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
use wgpu::{DownlevelFlags, Limits};
2+
use wgpu_macros::gpu_test;
3+
use wgpu_test::{fail, GpuTestConfiguration, TestParameters};
4+
5+
#[gpu_test]
6+
static NON_FATAL_ERRORS_IN_QUEUE_SUBMIT: GpuTestConfiguration = GpuTestConfiguration::new()
7+
.parameters(
8+
TestParameters::default()
9+
.downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
10+
.limits(Limits::downlevel_defaults()),
11+
)
12+
.run_sync(|ctx| {
13+
let shader_with_trivial_bind_group = concat!(
14+
"@group(0) @binding(0) var<storage, read_write> stuff: u32;\n",
15+
"\n",
16+
"@compute @workgroup_size(1) fn main() { stuff = 2u; }\n"
17+
);
18+
19+
let module = ctx
20+
.device
21+
.create_shader_module(wgpu::ShaderModuleDescriptor {
22+
label: None,
23+
source: wgpu::ShaderSource::Wgsl(shader_with_trivial_bind_group.into()),
24+
});
25+
26+
let compute_pipeline =
27+
ctx.device
28+
.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
29+
label: None,
30+
layout: None,
31+
module: &module,
32+
entry_point: None,
33+
compilation_options: Default::default(),
34+
cache: Default::default(),
35+
});
36+
37+
fail(
38+
&ctx.device,
39+
|| {
40+
let mut command_encoder = ctx.device.create_command_encoder(&Default::default());
41+
{
42+
let mut render_pass = command_encoder.begin_compute_pass(&Default::default());
43+
render_pass.set_pipeline(&compute_pipeline);
44+
45+
// NOTE: We deliberately don't set a bind group here, to provoke a validation
46+
// error.
47+
48+
render_pass.dispatch_workgroups(1, 1, 1);
49+
}
50+
51+
let _idx = ctx.queue.submit([command_encoder.finish()]);
52+
},
53+
Some(concat!(
54+
"The current set ComputePipeline with '' label ",
55+
"expects a BindGroup to be set at index 0"
56+
)),
57+
)
58+
});

tests/tests/root.rs

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ mod regression {
66
mod issue_4485;
77
mod issue_4514;
88
mod issue_5553;
9+
mod issue_6317;
910
}
1011

1112
mod bgra8unorm_storage;

wgpu-core/src/device/queue.rs

+51-22
Original file line numberDiff line numberDiff line change
@@ -1027,11 +1027,13 @@ impl Global {
10271027
&self,
10281028
queue_id: QueueId,
10291029
command_buffer_ids: &[id::CommandBufferId],
1030-
) -> Result<SubmissionIndex, QueueSubmitError> {
1030+
) -> Result<SubmissionIndex, (SubmissionIndex, QueueSubmitError)> {
10311031
profiling::scope!("Queue::submit");
10321032
api_log!("Queue::submit {queue_id:?}");
10331033

1034-
let (submit_index, callbacks) = {
1034+
let submit_index;
1035+
1036+
let res = 'error: {
10351037
let hub = &self.hub;
10361038

10371039
let queue = hub.queues.get(queue_id);
@@ -1042,7 +1044,7 @@ impl Global {
10421044

10431045
// Fence lock must be acquired after the snatch lock everywhere to avoid deadlocks.
10441046
let mut fence = device.fence.write();
1045-
let submit_index = device
1047+
submit_index = device
10461048
.active_submission_index
10471049
.fetch_add(1, Ordering::SeqCst)
10481050
+ 1;
@@ -1119,18 +1121,29 @@ impl Global {
11191121
}
11201122

11211123
// execute resource transitions
1122-
unsafe {
1124+
if let Err(e) = unsafe {
11231125
baked.encoder.begin_encoding(hal_label(
11241126
Some("(wgpu internal) Transit"),
11251127
device.instance_flags,
11261128
))
11271129
}
1128-
.map_err(|e| device.handle_hal_error(e))?;
1130+
.map_err(|e| device.handle_hal_error(e))
1131+
{
1132+
break 'error Err(e.into());
1133+
}
11291134

11301135
//Note: locking the trackers has to be done after the storages
11311136
let mut trackers = device.trackers.lock();
1132-
baked.initialize_buffer_memory(&mut trackers, &snatch_guard)?;
1133-
baked.initialize_texture_memory(&mut trackers, device, &snatch_guard)?;
1137+
if let Err(e) = baked.initialize_buffer_memory(&mut trackers, &snatch_guard)
1138+
{
1139+
break 'error Err(e.into());
1140+
}
1141+
if let Err(e) =
1142+
baked.initialize_texture_memory(&mut trackers, device, &snatch_guard)
1143+
{
1144+
break 'error Err(e.into());
1145+
}
1146+
11341147
//Note: stateless trackers are not merged:
11351148
// device already knows these resources exist.
11361149
CommandBuffer::insert_barriers_from_device_tracker(
@@ -1147,13 +1160,16 @@ impl Global {
11471160
// Note: we could technically do it after all of the command buffers,
11481161
// but here we have a command encoder by hand, so it's easier to use it.
11491162
if !used_surface_textures.is_empty() {
1150-
unsafe {
1163+
if let Err(e) = unsafe {
11511164
baked.encoder.begin_encoding(hal_label(
11521165
Some("(wgpu internal) Present"),
11531166
device.instance_flags,
11541167
))
11551168
}
1156-
.map_err(|e| device.handle_hal_error(e))?;
1169+
.map_err(|e| device.handle_hal_error(e))
1170+
{
1171+
break 'error Err(e.into());
1172+
}
11571173
let texture_barriers = trackers
11581174
.textures
11591175
.set_from_usage_scope_and_drain_transitions(
@@ -1180,7 +1196,7 @@ impl Global {
11801196
}
11811197

11821198
if let Some(first_error) = first_error {
1183-
return Err(first_error);
1199+
break 'error Err(first_error);
11841200
}
11851201
}
11861202
}
@@ -1190,9 +1206,9 @@ impl Global {
11901206
{
11911207
used_surface_textures.set_size(hub.textures.read().len());
11921208
for texture in pending_writes.dst_textures.values() {
1193-
match texture.try_inner(&snatch_guard)? {
1194-
TextureInner::Native { .. } => {}
1195-
TextureInner::Surface { .. } => {
1209+
match texture.try_inner(&snatch_guard) {
1210+
Ok(TextureInner::Native { .. }) => {}
1211+
Ok(TextureInner::Surface { .. }) => {
11961212
// Compare the Arcs by pointer as Textures don't implement Eq
11971213
submit_surface_textures_owned
11981214
.insert(Arc::as_ptr(texture), texture.clone());
@@ -1203,6 +1219,7 @@ impl Global {
12031219
.unwrap()
12041220
};
12051221
}
1222+
Err(e) => break 'error Err(e.into()),
12061223
}
12071224
}
12081225

@@ -1224,10 +1241,12 @@ impl Global {
12241241
}
12251242
}
12261243

1227-
if let Some(pending_execution) =
1228-
pending_writes.pre_submit(&device.command_allocator, device, &queue)?
1229-
{
1230-
active_executions.insert(0, pending_execution);
1244+
match pending_writes.pre_submit(&device.command_allocator, device, &queue) {
1245+
Ok(Some(pending_execution)) => {
1246+
active_executions.insert(0, pending_execution);
1247+
}
1248+
Ok(None) => {}
1249+
Err(e) => break 'error Err(e.into()),
12311250
}
12321251

12331252
let hal_command_buffers = active_executions
@@ -1249,14 +1268,17 @@ impl Global {
12491268
submit_surface_textures.push(raw);
12501269
}
12511270

1252-
unsafe {
1271+
if let Err(e) = unsafe {
12531272
queue.raw().submit(
12541273
&hal_command_buffers,
12551274
&submit_surface_textures,
12561275
(fence.as_mut(), submit_index),
12571276
)
12581277
}
1259-
.map_err(|e| device.handle_hal_error(e))?;
1278+
.map_err(|e| device.handle_hal_error(e))
1279+
{
1280+
break 'error Err(e.into());
1281+
}
12601282

12611283
// Advance the successful submission index.
12621284
device
@@ -1280,12 +1302,19 @@ impl Global {
12801302
let (closures, _) =
12811303
match device.maintain(fence_guard, wgt::Maintain::Poll, snatch_guard) {
12821304
Ok(closures) => closures,
1283-
Err(WaitIdleError::Device(err)) => return Err(QueueSubmitError::Queue(err)),
1284-
Err(WaitIdleError::StuckGpu) => return Err(QueueSubmitError::StuckGpu),
1305+
Err(WaitIdleError::Device(err)) => {
1306+
break 'error Err(QueueSubmitError::Queue(err))
1307+
}
1308+
Err(WaitIdleError::StuckGpu) => break 'error Err(QueueSubmitError::StuckGpu),
12851309
Err(WaitIdleError::WrongSubmissionIndex(..)) => unreachable!(),
12861310
};
12871311

1288-
(submit_index, closures)
1312+
Ok(closures)
1313+
};
1314+
1315+
let callbacks = match res {
1316+
Ok(ok) => ok,
1317+
Err(e) => return Err((submit_index, e)),
12891318
};
12901319

12911320
// the closures should execute with nothing locked!

wgpu/src/backend/wgpu_core.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -2074,7 +2074,10 @@ impl crate::Context for ContextWgpuCore {
20742074

20752075
let index = match self.0.queue_submit(queue_data.id, &temp_command_buffers) {
20762076
Ok(index) => index,
2077-
Err(err) => self.handle_error_fatal(err, "Queue::submit"),
2077+
Err((index, err)) => {
2078+
self.handle_error_nolabel(&queue_data.error_sink, err, "Queue::submit");
2079+
index
2080+
}
20782081
};
20792082

20802083
for cmdbuf in &temp_command_buffers {

0 commit comments

Comments
 (0)