@@ -59,14 +59,16 @@ def benchmarks(self) -> list[Benchmark]:
59
59
ExecImmediateCopyQueue (self , 0 , 1 , 'Device' , 'Device' , 1024 ),
60
60
ExecImmediateCopyQueue (self , 1 , 1 , 'Device' , 'Host' , 1024 ),
61
61
VectorSum (self ),
62
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 ),
63
- MemcpyExecute (self , 100 , 8 , 102400 , 10 , 1 , 1 ),
64
- MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 1 , 1 ),
65
- MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 1 , 1 ),
66
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 ),
67
- MemcpyExecute (self , 100 , 8 , 102400 , 10 , 0 , 1 ),
68
- MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 0 , 1 ),
69
- MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 0 , 1 ),
62
+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 ),
63
+ MemcpyExecute (self , 100 , 8 , 102400 , 10 , 1 , 1 , 1 ),
64
+ MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 1 , 1 , 1 ),
65
+ MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 1 , 1 , 1 ),
66
+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 ),
67
+ MemcpyExecute (self , 100 , 8 , 102400 , 10 , 0 , 1 , 1 ),
68
+ MemcpyExecute (self , 400 , 8 , 1024 , 1000 , 0 , 1 , 1 ),
69
+ MemcpyExecute (self , 10 , 16 , 1024 , 10000 , 0 , 1 , 1 ),
70
+ MemcpyExecute (self , 1 , 4096 , 1024 , 10 , 0 , 1 , 0 ),
71
+ MemcpyExecute (self , 4 , 4096 , 1024 , 10 , 0 , 1 , 0 ),
70
72
]
71
73
72
74
if options .ur is not None :
@@ -267,22 +269,23 @@ def bin_args(self) -> list[str]:
267
269
]
268
270
269
271
class MemcpyExecute (ComputeBenchmark ):
270
- def __init__ (self , bench , numOpsPerThread , numThreads , allocSize , iterations , srcUSM , dstUSM ):
272
+ def __init__ (self , bench , numOpsPerThread , numThreads , allocSize , iterations , srcUSM , dstUSM , useEvent ):
271
273
self .numOpsPerThread = numOpsPerThread
272
274
self .numThreads = numThreads
273
275
self .allocSize = allocSize
274
276
self .iterations = iterations
275
277
self .srcUSM = srcUSM
276
278
self .dstUSM = dstUSM
279
+ self .useEvents = useEvent
277
280
super ().__init__ (bench , "multithread_benchmark_ur" , "MemcpyExecute" )
278
281
279
282
def name (self ):
280
- return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{ self .numOpsPerThread } , numThreads:{ self .numThreads } , allocSize:{ self .allocSize } srcUSM:{ self .srcUSM } dstUSM:{ self .dstUSM } "
283
+ return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{ self .numOpsPerThread } , numThreads:{ self .numThreads } , allocSize:{ self .allocSize } srcUSM:{ self .srcUSM } dstUSM:{ self .dstUSM } " + ( " without events" if not self . useEvents else "" )
281
284
282
285
def bin_args (self ) -> list [str ]:
283
286
return [
284
287
"--Ioq=1" ,
285
- "--UseEvents=1 " ,
288
+ f "--UseEvents={ self . useEvents } " ,
286
289
"--MeasureCompletion=1" ,
287
290
"--UseQueuePerThread=1" ,
288
291
f"--AllocSize={ self .allocSize } " ,
0 commit comments