Skip to content

Commit 9a89c62

Browse files
committed
[Benchmarks] Add MemcpyExecute scenario with high ops count
to measure API overhead
1 parent 5b57041 commit 9a89c62

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

scripts/benchmarks/benches/compute.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,16 @@ def benchmarks(self) -> list[Benchmark]:
5959
ExecImmediateCopyQueue(self, 0, 1, 'Device', 'Device', 1024),
6060
ExecImmediateCopyQueue(self, 1, 1, 'Device', 'Host', 1024),
6161
VectorSum(self),
62-
MemcpyExecute(self, 400, 1, 102400, 10, 1, 1),
63-
MemcpyExecute(self, 100, 8, 102400, 10, 1, 1),
64-
MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1),
65-
MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1),
66-
MemcpyExecute(self, 400, 1, 102400, 10, 0, 1),
67-
MemcpyExecute(self, 100, 8, 102400, 10, 0, 1),
68-
MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1),
69-
MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1),
62+
MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
63+
MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1),
64+
MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1),
65+
MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1),
66+
MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
67+
MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1),
68+
MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1),
69+
MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1),
70+
MemcpyExecute(self, 1, 4096, 1024, 10, 0, 1, 0),
71+
MemcpyExecute(self, 4, 4096, 1024, 10, 0, 1, 0),
7072
]
7173

7274
if options.ur is not None:
@@ -267,22 +269,23 @@ def bin_args(self) -> list[str]:
267269
]
268270

269271
class MemcpyExecute(ComputeBenchmark):
270-
def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations, srcUSM, dstUSM):
272+
def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations, srcUSM, dstUSM, useEvent):
271273
self.numOpsPerThread = numOpsPerThread
272274
self.numThreads = numThreads
273275
self.allocSize = allocSize
274276
self.iterations = iterations
275277
self.srcUSM = srcUSM
276278
self.dstUSM = dstUSM
279+
self.useEvents = useEvent
277280
super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute")
278281

279282
def name(self):
280-
return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}"
283+
return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" + (" without events" if not self.useEvents else "")
281284

282285
def bin_args(self) -> list[str]:
283286
return [
284287
"--Ioq=1",
285-
"--UseEvents=1",
288+
f"--UseEvents={self.useEvents}",
286289
"--MeasureCompletion=1",
287290
"--UseQueuePerThread=1",
288291
f"--AllocSize={self.allocSize}",

0 commit comments

Comments
 (0)