Skip to content

Commit 2003dd9

Browse files
Wumpfbradwerth
authored andcommitted
Metal encoder & pass timestamp support (gfx-rs#4008)
Implements timer queries via write_timestamp on Metal for encoders (whenever timer queries are available) and passes (for Intel/AMD GPUs, where we should advertise TIMESTAMP_QUERY_INSIDE_PASSES now). Due to some bugs in Metal this was a lot harder than expected. I believe the solution is close to optimal with the current restrictions in place. For details see code comments.
1 parent 2f5dc46 commit 2003dd9

File tree

9 files changed

+260
-61
lines changed

9 files changed

+260
-61
lines changed

.deny.toml

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ allow = [
2727
[sources]
2828
allow-git = [
2929
"https://github.com/grovesNL/glow",
30+
"https://github.com/gfx-rs/metal-rs",
3031
]
3132
unknown-registry = "deny"
3233
unknown-git = "deny"

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ By @Valaphee in [#3402](https://github.com/gfx-rs/wgpu/pull/3402)
8888
### Documentation
8989
- Use WGSL for VertexFormat example types. By @ScanMountGoat in [#4305](https://github.com/gfx-rs/wgpu/pull/4035)
9090

91+
#### Metal
92+
93+
- Support for timestamp queries on encoders and passes. By @wumpf in [#4008](https://github.com/gfx-rs/wgpu/pull/4008)
94+
9195
### Bug Fixes
9296

9397
#### General

Cargo.lock

+1-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ termcolor = "1.2.0"
158158
#glow = { path = "../glow" }
159159
#d3d12 = { path = "../d3d12-rs" }
160160
#metal = { path = "../metal-rs" }
161+
#metal = { path = "../metal-rs" }
162+
metal = { git = "https://github.com/gfx-rs/metal-rs/", rev = "d24f1a4" } # More timer support via https://github.com/gfx-rs/metal-rs/pull/280
161163
#web-sys = { path = "../wasm-bindgen/crates/web-sys" }
162164
#js-sys = { path = "../wasm-bindgen/crates/js-sys" }
163165
#wasm-bindgen = { path = "../wasm-bindgen" }

examples/timestamp-queries/src/main.rs

+8-10
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ impl QueryResults {
4747
// * compute end
4848
const NUM_QUERIES: u64 = 8;
4949

50+
#[allow(clippy::redundant_closure)] // False positive
5051
fn from_raw_results(timestamps: Vec<u64>, timestamps_inside_passes: bool) -> Self {
5152
assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize);
5253

@@ -60,9 +61,9 @@ impl QueryResults {
6061
let mut encoder_timestamps = [0, 0];
6162
encoder_timestamps[0] = get_next_slot();
6263
let render_start_end_timestamps = [get_next_slot(), get_next_slot()];
63-
let render_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot());
64+
let render_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
6465
let compute_start_end_timestamps = [get_next_slot(), get_next_slot()];
65-
let compute_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot());
66+
let compute_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
6667
encoder_timestamps[1] = get_next_slot();
6768

6869
QueryResults {
@@ -79,8 +80,8 @@ impl QueryResults {
7980
let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0;
8081

8182
println!(
82-
"Elapsed time render + compute: {:.2} μs",
83-
elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1])
83+
"Elapsed time before render until after compute: {:.2} μs",
84+
elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]),
8485
);
8586
println!(
8687
"Elapsed time render pass: {:.2} μs",
@@ -464,13 +465,10 @@ mod tests {
464465
render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]);
465466
let compute_delta =
466467
compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]);
468+
let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
467469

468-
// TODO: Metal encoder timestamps aren't implemented yet.
469-
if ctx.adapter.get_info().backend != wgpu::Backend::Metal {
470-
let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
471-
assert!(encoder_delta > 0);
472-
assert!(encoder_delta >= render_delta + compute_delta);
473-
}
470+
assert!(encoder_delta > 0);
471+
assert!(encoder_delta >= render_delta + compute_delta);
474472

475473
if let Some(render_inside_timestamp) = render_inside_timestamp {
476474
assert!(render_inside_timestamp >= render_start_end_timestamps[0]);

wgpu-hal/src/metal/adapter.rs

+33-13
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use wgt::{AstcBlock, AstcChannel};
55

66
use std::{sync::Arc, thread};
77

8+
use super::TimestampQuerySupport;
9+
810
const MAX_COMMAND_BUFFERS: u64 = 2048;
911

1012
unsafe impl Send for super::Adapter {}
@@ -536,6 +538,26 @@ impl super::PrivateCapabilities {
536538
MTLReadWriteTextureTier::TierNone
537539
};
538540

541+
let mut timestamp_query_support = TimestampQuerySupport::empty();
542+
if version.at_least((11, 0), (14, 0), os_is_mac)
543+
&& device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary)
544+
{
545+
// If we don't support at stage boundary, don't support anything else.
546+
timestamp_query_support.insert(TimestampQuerySupport::STAGE_BOUNDARIES);
547+
548+
if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary) {
549+
timestamp_query_support.insert(TimestampQuerySupport::ON_RENDER_ENCODER);
550+
}
551+
if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary)
552+
{
553+
timestamp_query_support.insert(TimestampQuerySupport::ON_COMPUTE_ENCODER);
554+
}
555+
if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtBlitBoundary) {
556+
timestamp_query_support.insert(TimestampQuerySupport::ON_BLIT_ENCODER);
557+
}
558+
// `TimestampQuerySupport::INSIDE_WGPU_PASSES` emerges from the other flags.
559+
}
560+
539561
Self {
540562
family_check,
541563
msl_version: if os_is_xr || version.at_least((12, 0), (15, 0), os_is_mac) {
@@ -773,13 +795,7 @@ impl super::PrivateCapabilities {
773795
} else {
774796
None
775797
},
776-
support_timestamp_query: version.at_least((11, 0), (14, 0), os_is_mac)
777-
&& device
778-
.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary),
779-
support_timestamp_query_in_passes: version.at_least((11, 0), (14, 0), os_is_mac)
780-
&& device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary)
781-
&& device
782-
.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary),
798+
timestamp_query_support,
783799
}
784800
}
785801

@@ -807,12 +823,16 @@ impl super::PrivateCapabilities {
807823
| F::DEPTH32FLOAT_STENCIL8
808824
| F::MULTI_DRAW_INDIRECT;
809825

810-
features.set(F::TIMESTAMP_QUERY, self.support_timestamp_query);
811-
// TODO: Not yet implemented.
812-
// features.set(
813-
// F::TIMESTAMP_QUERY_INSIDE_PASSES,
814-
// self.support_timestamp_query_in_passes,
815-
// );
826+
features.set(
827+
F::TIMESTAMP_QUERY,
828+
self.timestamp_query_support
829+
.contains(TimestampQuerySupport::STAGE_BOUNDARIES),
830+
);
831+
features.set(
832+
F::TIMESTAMP_QUERY_INSIDE_PASSES,
833+
self.timestamp_query_support
834+
.contains(TimestampQuerySupport::INSIDE_WGPU_PASSES),
835+
);
816836
features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc);
817837
features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr);
818838
features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc);

0 commit comments

Comments
 (0)