@@ -42,9 +42,11 @@ static inline zdnn_status call_zdnn_matmul_op(const zdnn_ztensor *inputA,
42
42
inputA , inputB , inputC , (zdnn_matmul_ops )opType , output );
43
43
}
44
44
45
- static float get_elapse (const struct timeval start_t , const struct timeval end_t ) {
46
- return
47
- (((end_t .tv_sec * 1000000. ) + end_t .tv_usec ) - ((start_t .tv_sec * 1000000 ) + start_t .tv_usec ))/1000 ;
45
+ static float get_elapse (
46
+ const struct timeval start_t , const struct timeval end_t ) {
47
+ return (((end_t .tv_sec * 1000000. ) + end_t .tv_usec ) -
48
+ ((start_t .tv_sec * 1000000 ) + start_t .tv_usec )) /
49
+ 1000 ;
48
50
}
49
51
50
52
// It is supposed that sched.h should have the declaration of sched_getcpu.
@@ -89,13 +91,14 @@ static zdnn_status zdnn_matmul_op_common(const zdnn_ztensor *inputA,
89
91
printf ("[MatMul] Not split zTensor ...\n" );
90
92
if (OMZTensorSplitDebug )
91
93
gettimeofday (& start_t , NULL );
92
- zdnn_status status = call_zdnn_matmul_op (inputA , inputB , inputC , opType , output , isBcast );
94
+ zdnn_status status =
95
+ call_zdnn_matmul_op (inputA , inputB , inputC , opType , output , isBcast );
93
96
assert (status == ZDNN_OK && ("call_zdnn_matmul_op failed" ));
94
- if (OMZTensorSplitDebug ) {
95
- gettimeofday (& end_t , NULL );
96
- elapse = get_elapse (start_t , end_t );
97
- printf ("[MatMul] mm, %f, (milliseconds)\n" , elapse );
98
- }
97
+ if (OMZTensorSplitDebug ) {
98
+ gettimeofday (& end_t , NULL );
99
+ elapse = get_elapse (start_t , end_t );
100
+ printf ("[MatMul] mm, %f, (milliseconds)\n" , elapse );
101
+ }
99
102
return status ;
100
103
}
101
104
@@ -119,8 +122,8 @@ static zdnn_status zdnn_matmul_op_common(const zdnn_ztensor *inputA,
119
122
if (OMZTensorSplitDebug )
120
123
gettimeofday (& start_t , NULL );
121
124
122
- // Parallelize the mm part over each chunk
123
- // Thread binding is done at runtime with OMP_PLACES and OMP_PROC_BIND
125
+ // Parallelize the mm part over each chunk
126
+ // Thread binding is done at runtime with OMP_PLACES and OMP_PROC_BIND
124
127
#pragma omp parallel for proc_bind(spread)
125
128
for (uint32_t i = 0 ; i < splitInfoA .numOfChunks ; ++ i ) {
126
129
zdnn_ztensor * zaTensor = (splitInfoA .chunks + i )-> ztensor ;
0 commit comments