16
16
// /////////////////////////////////////////////////////////////////////
17
17
18
18
#include " simddetect.h"
19
+ #include " dotproduct.h"
20
+ #include " dotproductavx.h"
21
+ #include " dotproductsse.h"
22
+ #include " params.h" // for STRING_VAR
23
+ #include " tprintf.h" // for tprintf
19
24
20
25
#undef X86_BUILD
21
26
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
34
39
35
40
namespace tesseract {
36
41
42
+ // Computes and returns the dot product of the two n-vectors u and v.
43
+ // Note: because the order of addition is different among the different dot
44
+ // product functions, the results can (and do) vary slightly (although they
45
+ // agree to within about 4e-15). This produces different results when running
46
+ // training, despite all random inputs being precisely equal.
47
+ // To get consistent results, use just one of these dot product functions.
48
+ // On a test multi-layer network, serial is 57% slower than SSE, and AVX
49
+ // is about 8% faster than SSE. This suggests that the time is memory
50
+ // bandwidth constrained and could benefit from holding the reused vector
51
+ // in AVX registers.
52
+ DotProductFunction DotProduct;
53
+
54
+ static STRING_VAR (dotproduct, " auto" ,
55
+ " Function used for calculation of dot product" );
56
+
37
57
SIMDDetect SIMDDetect::detector;
38
58
39
59
// If true, then AVX has been detected.
@@ -44,12 +64,26 @@ bool SIMDDetect::avx512BW_available_;
44
64
// If true, then SSe4.1 has been detected.
45
65
bool SIMDDetect::sse_available_;
46
66
67
+ // Computes and returns the dot product of the two n-vectors u and v.
68
+ static double DotProductGeneric (const double * u, const double * v, int n) {
69
+ double total = 0.0 ;
70
+ for (int k = 0 ; k < n; ++k) total += u[k] * v[k];
71
+ return total;
72
+ }
73
+
74
+ static void SetDotProduct (DotProductFunction function) {
75
+ DotProduct = function;
76
+ }
77
+
47
78
// Constructor.
48
79
// Tests the architecture in a system-dependent way to detect AVX, SSE and
49
80
// any other available SIMD equipment.
50
81
// __GNUC__ is also defined by compilers that include GNU extensions such as
51
82
// clang.
52
83
SIMDDetect::SIMDDetect () {
84
+ // The fallback is a generic dot product calculation.
85
+ SetDotProduct (DotProductGeneric);
86
+
53
87
#if defined(X86_BUILD)
54
88
# if defined(__GNUC__)
55
89
unsigned int eax, ebx, ecx, edx;
@@ -80,6 +114,57 @@ SIMDDetect::SIMDDetect() {
80
114
# error "I don't know how to test for SIMD with this compiler"
81
115
# endif
82
116
#endif // X86_BUILD
117
+
118
+ #if defined(X86_BUILD)
119
+ // Select code for calculation of dot product based on autodetection.
120
+ if (avx_available_) {
121
+ // AVX detected.
122
+ SetDotProduct (DotProductAVX);
123
+ } else if (sse_available_) {
124
+ // SSE detected.
125
+ SetDotProduct (DotProductSSE);
126
+ }
127
+ #endif // X86_BUILD
128
+ }
129
+
130
+ void SIMDDetect::Update () {
131
+ // Select code for calculation of dot product based on the
132
+ // value of the config variable if that value is not empty.
133
+ const char * dotproduct_method = " generic" ;
134
+ if (!strcmp (dotproduct.string (), " auto" )) {
135
+ // Automatic detection. Nothing to be done.
136
+ } else if (!strcmp (dotproduct.string (), " generic" )) {
137
+ // Generic code selected by config variable.
138
+ SetDotProduct (DotProductGeneric);
139
+ dotproduct_method = " generic" ;
140
+ } else if (!strcmp (dotproduct.string (), " native" )) {
141
+ // Native optimized code selected by config variable.
142
+ SetDotProduct (DotProductNative);
143
+ dotproduct_method = " native" ;
144
+ }
145
+ #if defined(X86_BUILD)
146
+ else if (!strcmp (dotproduct.string (), " avx" )) {
147
+ // AVX selected by config variable.
148
+ SetDotProduct (DotProductAVX);
149
+ dotproduct_method = " avx" ;
150
+ } else if (!strcmp (dotproduct.string (), " sse" )) {
151
+ // SSE selected by config variable.
152
+ SetDotProduct (DotProductSSE);
153
+ dotproduct_method = " sse" ;
154
+ }
155
+ #endif // X86_BUILD
156
+ else {
157
+ // Unsupported value of config variable.
158
+ tprintf (" Warning, ignoring unsupported config variable value: dotproduct=%s\n " ,
159
+ dotproduct.string ());
160
+ tprintf (" Support values for dotproduct: auto generic native"
161
+ #if defined(X86_BUILD)
162
+ " avx sse"
163
+ #endif // X86_BUILD
164
+ " .\n " );
165
+ }
166
+
167
+ dotproduct.set_value (dotproduct_method);
83
168
}
84
169
85
170
} // namespace tesseract
0 commit comments