17
17
#define _OPEN_THREADS
18
18
#endif
19
19
#include <pthread.h>
20
- #include <sched.h>
21
20
22
21
#include <assert.h>
23
22
#include <math.h>
23
+ #include <sched.h>
24
24
#include <stdio.h>
25
25
#include <stdlib.h>
26
26
#include <sys/time.h>
@@ -42,6 +42,17 @@ static inline zdnn_status call_zdnn_matmul_op(const zdnn_ztensor *inputA,
42
42
inputA , inputB , inputC , (zdnn_matmul_ops )opType , output );
43
43
}
44
44
45
+ static float get_elapse (const struct timeval start_t , const struct timeval end_t ) {
46
+ return
47
+ (((end_t .tv_sec * 1000000. ) + end_t .tv_usec ) - ((start_t .tv_sec * 1000000 ) + start_t .tv_usec ))/1000 ;
48
+ }
49
+
50
+ // It is supposed that sched.h should have the declaration of sched_getcpu.
51
+ // No problem when I compiled a standalone test case.
52
+ // But in onnx-mlir, this function is not defined.
53
+ // Explicitly define it here
54
+ extern int sched_getcpu ();
55
+
45
56
static zdnn_status zdnn_matmul_op_common (const zdnn_ztensor * inputA ,
46
57
const zdnn_ztensor * inputB , const zdnn_ztensor * inputC , int opType ,
47
58
zdnn_ztensor * output , bool isBcast ) {
@@ -68,7 +79,6 @@ static zdnn_status zdnn_matmul_op_common(const zdnn_ztensor *inputA,
68
79
double splitTime = 0. ;
69
80
double mmTime = 0. ;
70
81
double mergeTime = 0. ;
71
- clock_t start_time = 0 , end_time = 0 ;
72
82
struct timeval start_t , end_t ;
73
83
float elapse ;
74
84
@@ -78,36 +88,17 @@ static zdnn_status zdnn_matmul_op_common(const zdnn_ztensor *inputA,
78
88
if (OMZTensorSplitDebug )
79
89
printf ("[MatMul] Not split zTensor ...\n" );
80
90
if (OMZTensorSplitDebug )
81
- start_time = clock ();
82
- gettimeofday (& start_t , NULL );
91
+ gettimeofday (& start_t , NULL );
83
92
zdnn_status status = call_zdnn_matmul_op (inputA , inputB , inputC , opType , output , isBcast );
84
93
assert (status == ZDNN_OK && ("call_zdnn_matmul_op failed" ));
85
94
if (OMZTensorSplitDebug ) {
86
- end_time = clock ();
87
- mmTime = ((float )(end_time - start_time ) / (float )CLOCKS_PER_SEC ) * 1000 ;
88
95
gettimeofday (& end_t , NULL );
89
- elapse = ((( end_t . tv_sec * 1000000. ) + end_t . tv_usec ) - (( start_t . tv_sec * 1000000 ) + start_t . tv_usec ))/ 1000 ;
90
- printf ("[MatMul] mm, %f, %f, (milliseconds)\n" , mmTime , elapse );
96
+ elapse = get_elapse ( start_t , end_t ) ;
97
+ printf ("[MatMul] mm, %f, (milliseconds)\n" , elapse );
91
98
}
92
99
return status ;
93
100
}
94
101
95
- // Create a parallel loop to test the clock() and gettimeofday()
96
- // Tested with OMP_NUM_THREADS = 1 or 2, or unset
97
- start_time = clock ();
98
- gettimeofday (& start_t , NULL );
99
- #pragma omp parallel for
100
- for (uint32_t i = 0 ; i < 2 ; i ++ ) {
101
- system ("sleep 5" );
102
- printf ("====omp thread %u) is on cpu %d=======\n" , i , sched_getcpu ());
103
- }
104
- end_time = clock ();
105
- gettimeofday (& end_t , NULL );
106
- splitTime = ((float )(end_time - start_time ) / (float )CLOCKS_PER_SEC ) * 1000 ;
107
- printf ("sleep loop measured with clock() %f (milliseconds)\n" , splitTime );
108
- splitTime = (((end_t .tv_sec * 1000000. ) + end_t .tv_usec ) - ((start_t .tv_sec * 1000000 ) + start_t .tv_usec ))/1000 ;
109
- printf ("sleep loop measured with gettimeofday(): %f (milliseconds)\n" , splitTime );
110
-
111
102
// Split input A.
112
103
if (OMZTensorSplitDebug )
113
104
printf ("[MatMul] Split the 1st ztensor along e2 into %d chunks of %d "
@@ -116,52 +107,51 @@ static zdnn_status zdnn_matmul_op_common(const zdnn_ztensor *inputA,
116
107
117
108
// Split input A into chunks.
118
109
if (OMZTensorSplitDebug )
119
- start_time = clock ( );
110
+ gettimeofday ( & start_t , NULL );
120
111
splitZTensor (& splitInfoA , /*copyData=*/ true);
121
112
splitZTensor (& splitInfoY , /*copyData=*/ false);
122
113
if (OMZTensorSplitDebug ) {
123
- end_time = clock ( );
124
- splitTime = (( float )( end_time - start_time ) / ( float ) CLOCKS_PER_SEC ) * 1000 ;
114
+ gettimeofday ( & end_t , NULL );
115
+ splitTime = get_elapse ( start_t , end_t ) ;
125
116
}
126
117
127
118
// Call zdnn_matmul_op on each chunk.
128
119
if (OMZTensorSplitDebug )
129
- start_time = clock ();
130
- gettimeofday (& start_t , NULL );
120
+ gettimeofday (& start_t , NULL );
131
121
132
122
// Parallelize the mm part over each chunk
133
123
// Thread binding is done at runtime with OMP_PLACES and OMP_PROC_BIND
134
- #pragma omp parallel for
124
+ #pragma omp parallel for proc_bind(spread)
135
125
for (uint32_t i = 0 ; i < splitInfoA .numOfChunks ; ++ i ) {
136
126
zdnn_ztensor * zaTensor = (splitInfoA .chunks + i )-> ztensor ;
137
127
zdnn_ztensor * zyTensor = (splitInfoY .chunks + i )-> ztensor ;
138
128
zdnn_status status = call_zdnn_matmul_op (
139
129
zaTensor , inputB , inputC , opType , zyTensor , isBcast );
140
130
assert (status == ZDNN_OK );
141
- printf ("====omp thread %u) is on cpu %d=======\n" , i , sched_getcpu ());
131
+ if (OMZTensorSplitDebug ) {
132
+ printf ("====omp thread %u) is on cpu %d=======\n" , i , sched_getcpu ());
133
+ }
142
134
}
143
135
if (OMZTensorSplitDebug ) {
144
- end_time = clock ( );
145
- mmTime = (( float )( end_time - start_time ) / ( float ) CLOCKS_PER_SEC ) * 1000 ;
136
+ gettimeofday ( & end_t , NULL );
137
+ mmTime = get_elapse ( start_t , end_t ) ;
146
138
}
147
- gettimeofday (& end_t , NULL );
148
- elapse = (((end_t .tv_sec * 1000000. ) + end_t .tv_usec ) - ((start_t .tv_sec * 1000000 ) + start_t .tv_usec ))/1000 ;
149
139
150
140
// Merging the chunks into the output.
151
141
if (OMZTensorSplitDebug )
152
- start_time = clock ( );
142
+ gettimeofday ( & start_t , NULL );
153
143
mergeZTensors (& splitInfoY );
154
144
if (OMZTensorSplitDebug ) {
155
- end_time = clock ( );
156
- mergeTime = (( float )( end_time - start_time ) / ( float ) CLOCKS_PER_SEC ) * 1000 ;
145
+ gettimeofday ( & end_t , NULL );
146
+ mergeTime = get_elapse ( start_t , end_t ) ;
157
147
}
158
148
159
149
freeSplitInfoBuffer (& splitInfoA );
160
150
freeSplitInfoBuffer (& splitInfoY );
161
151
162
152
if (OMZTensorSplitDebug )
163
- printf ("[MatMul] split, %f, mm, %f, %f, merge, %f (milliseconds)\n" , splitTime ,
164
- mmTime , elapse , mergeTime );
153
+ printf ("[MatMul] split, %f, mm, %f, merge, %f (milliseconds)\n" , splitTime ,
154
+ mmTime , mergeTime );
165
155
166
156
return ZDNN_OK ;
167
157
}
0 commit comments