Skip to content

Commit 432e4be

Browse files
committed
flag
Signed-off-by: Tong Chen <[email protected]>
1 parent 554edb3 commit 432e4be

File tree

4 files changed

+35
-44
lines changed

4 files changed

+35
-44
lines changed

src/Accelerators/NNPA/NNPAAccelerator.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ NNPAAccelerator::NNPAAccelerator() : Accelerator(Accelerator::Kind::NNPA) {
5858

5959
acceleratorTargets.push_back(this);
6060
// Order is important! libRuntimeNNPA depends on libzdnn
61-
addCompilerConfig(CCM_SHARED_LIB_DEPS, {"RuntimeNNPA", "zdnn"});
61+
addCompilerConfig(CCM_SHARED_LIB_DEPS, {"RuntimeNNPA", "zdnn"}, true);
6262
};
6363

6464
NNPAAccelerator::~NNPAAccelerator() { delete instance; }

src/Accelerators/NNPA/Runtime/zDNNExtension/MatMul.c

+30-40
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
#define _OPEN_THREADS
1818
#endif
1919
#include <pthread.h>
20-
#include <sched.h>
2120

2221
#include <assert.h>
2322
#include <math.h>
23+
#include <sched.h>
2424
#include <stdio.h>
2525
#include <stdlib.h>
2626
#include <sys/time.h>
@@ -42,6 +42,17 @@ static inline zdnn_status call_zdnn_matmul_op(const zdnn_ztensor *inputA,
4242
inputA, inputB, inputC, (zdnn_matmul_ops)opType, output);
4343
}
4444

45+
static float get_elapse(const struct timeval start_t, const struct timeval end_t) {
46+
return
47+
(((end_t.tv_sec * 1000000.) + end_t.tv_usec) - ((start_t.tv_sec * 1000000) + start_t.tv_usec))/1000;
48+
}
49+
50+
// It is supposed that sched.h should have the declaration of sched_getcpu.
51+
// No problem when I compiled a standalone test case.
52+
// But in onnx-mlir, this function is not defined.
53+
// Explicitly define it here
54+
extern int sched_getcpu();
55+
4556
static zdnn_status zdnn_matmul_op_common(const zdnn_ztensor *inputA,
4657
const zdnn_ztensor *inputB, const zdnn_ztensor *inputC, int opType,
4758
zdnn_ztensor *output, bool isBcast) {
@@ -68,7 +79,6 @@ static zdnn_status zdnn_matmul_op_common(const zdnn_ztensor *inputA,
6879
double splitTime = 0.;
6980
double mmTime = 0.;
7081
double mergeTime = 0.;
71-
clock_t start_time = 0, end_time = 0;
7282
struct timeval start_t, end_t;
7383
float elapse;
7484

@@ -78,36 +88,17 @@ static zdnn_status zdnn_matmul_op_common(const zdnn_ztensor *inputA,
7888
if (OMZTensorSplitDebug)
7989
printf("[MatMul] Not split zTensor ...\n");
8090
if (OMZTensorSplitDebug)
81-
start_time = clock();
82-
gettimeofday(&start_t, NULL);
91+
gettimeofday(&start_t, NULL);
8392
zdnn_status status = call_zdnn_matmul_op(inputA, inputB, inputC, opType, output, isBcast);
8493
assert(status == ZDNN_OK && ("call_zdnn_matmul_op failed"));
8594
if (OMZTensorSplitDebug) {
86-
end_time = clock();
87-
mmTime = ((float)(end_time - start_time) / (float)CLOCKS_PER_SEC) * 1000;
8895
gettimeofday(&end_t, NULL);
89-
elapse = (((end_t.tv_sec * 1000000.) + end_t.tv_usec) - ((start_t.tv_sec * 1000000) + start_t.tv_usec))/1000;
90-
printf("[MatMul] mm, %f, %f, (milliseconds)\n", mmTime, elapse);
96+
elapse = get_elapse(start_t, end_t);
97+
printf("[MatMul] mm, %f, (milliseconds)\n", elapse);
9198
}
9299
return status;
93100
}
94101

95-
// Create a parallel loop to test the clock() and gettimeofday()
96-
// Tested with OMP_NUM_THREADS = 1 or 2, or unset
97-
start_time = clock();
98-
gettimeofday(&start_t, NULL);
99-
#pragma omp parallel for
100-
for(uint32_t i = 0; i < 2; i++) {
101-
system("sleep 5");
102-
printf("====omp thread %u) is on cpu %d=======\n", i, sched_getcpu());
103-
}
104-
end_time = clock();
105-
gettimeofday(&end_t, NULL);
106-
splitTime = ((float)(end_time - start_time) / (float)CLOCKS_PER_SEC) * 1000;
107-
printf("sleep loop measured with clock() %f (milliseconds)\n", splitTime);
108-
splitTime = (((end_t.tv_sec * 1000000.) + end_t.tv_usec) - ((start_t.tv_sec * 1000000) + start_t.tv_usec))/1000;
109-
printf("sleep loop measured with gettimeofday(): %f (milliseconds)\n", splitTime);
110-
111102
// Split input A.
112103
if (OMZTensorSplitDebug)
113104
printf("[MatMul] Split the 1st ztensor along e2 into %d chunks of %d "
@@ -116,52 +107,51 @@ static zdnn_status zdnn_matmul_op_common(const zdnn_ztensor *inputA,
116107

117108
// Split input A into chunks.
118109
if (OMZTensorSplitDebug)
119-
start_time = clock();
110+
gettimeofday(&start_t, NULL);
120111
splitZTensor(&splitInfoA, /*copyData=*/true);
121112
splitZTensor(&splitInfoY, /*copyData=*/false);
122113
if (OMZTensorSplitDebug) {
123-
end_time = clock();
124-
splitTime = ((float)(end_time - start_time) / (float)CLOCKS_PER_SEC) * 1000;
114+
gettimeofday(&end_t, NULL);
115+
splitTime = get_elapse(start_t, end_t);
125116
}
126117

127118
// Call zdnn_matmul_op on each chunk.
128119
if (OMZTensorSplitDebug)
129-
start_time = clock();
130-
gettimeofday(&start_t, NULL);
120+
gettimeofday(&start_t, NULL);
131121

132122
// Parallelize the mm part over each chunk
133123
// Thread binding is done at runtime with OMP_PLACES and OMP_PROC_BIND
134-
#pragma omp parallel for
124+
#pragma omp parallel for proc_bind(spread)
135125
for (uint32_t i = 0; i < splitInfoA.numOfChunks; ++i) {
136126
zdnn_ztensor *zaTensor = (splitInfoA.chunks + i)->ztensor;
137127
zdnn_ztensor *zyTensor = (splitInfoY.chunks + i)->ztensor;
138128
zdnn_status status = call_zdnn_matmul_op(
139129
zaTensor, inputB, inputC, opType, zyTensor, isBcast);
140130
assert(status == ZDNN_OK);
141-
printf("====omp thread %u) is on cpu %d=======\n", i, sched_getcpu());
131+
if (OMZTensorSplitDebug) {
132+
printf("====omp thread %u) is on cpu %d=======\n", i, sched_getcpu());
133+
}
142134
}
143135
if (OMZTensorSplitDebug) {
144-
end_time = clock();
145-
mmTime = ((float)(end_time - start_time) / (float)CLOCKS_PER_SEC) * 1000;
136+
gettimeofday(&end_t, NULL);
137+
mmTime = get_elapse(start_t, end_t);
146138
}
147-
gettimeofday(&end_t, NULL);
148-
elapse = (((end_t.tv_sec * 1000000.) + end_t.tv_usec) - ((start_t.tv_sec * 1000000) + start_t.tv_usec))/1000;
149139

150140
// Merging the chunks into the output.
151141
if (OMZTensorSplitDebug)
152-
start_time = clock();
142+
gettimeofday(&start_t, NULL);
153143
mergeZTensors(&splitInfoY);
154144
if (OMZTensorSplitDebug) {
155-
end_time = clock();
156-
mergeTime = ((float)(end_time - start_time) / (float)CLOCKS_PER_SEC) * 1000;
145+
gettimeofday(&end_t, NULL);
146+
mergeTime = get_elapse(start_t, end_t);
157147
}
158148

159149
freeSplitInfoBuffer(&splitInfoA);
160150
freeSplitInfoBuffer(&splitInfoY);
161151

162152
if (OMZTensorSplitDebug)
163-
printf("[MatMul] split, %f, mm, %f, %f, merge, %f (milliseconds)\n", splitTime,
164-
mmTime, elapse, mergeTime);
153+
printf("[MatMul] split, %f, mm, %f, merge, %f (milliseconds)\n", splitTime,
154+
mmTime, mergeTime);
165155

166156
return ZDNN_OK;
167157
}

src/Compiler/CompilerOptions.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -957,10 +957,10 @@ std::vector<std::string> getCompilerConfig(std::string k) {
957957

958958
// Add strings in a vector to the string vector associated
959959
// with the specified key
960-
void addCompilerConfig(std::string k, std::vector<std::string> v) {
960+
void addCompilerConfig(std::string k, std::vector<std::string> v, bool head) {
961961
std::vector<std::string> u = CompilerConfigMap[k];
962962

963-
u.insert(u.end(), v.begin(), v.end());
963+
u.insert(head ? u.begin() : u.end(), v.begin(), v.end());
964964
CompilerConfigMap[k] = u;
965965
}
966966

src/Compiler/CompilerOptions.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,8 @@ std::string getCompilerOption(const onnx_mlir::OptionKind kind);
191191
// The add and del functions are not thread-safe and should only be
192192
// called from one thread.
193193
std::vector<std::string> getCompilerConfig(std::string k);
194-
void addCompilerConfig(std::string k, std::vector<std::string> v);
194+
void addCompilerConfig(
195+
std::string k, std::vector<std::string> v, bool head = false);
195196
void delCompilerConfig(std::string k, std::vector<std::string> v);
196197

197198
// Functions related to initializing compiler configuration states based on

0 commit comments

Comments
 (0)