Add config variable for selection of dot product function

stweil · stweil · commit f0a4d04187da · 2018-12-01T00:19:28.000+01:00
All also a C++ implementation with more aggressive compiler options
which is optimized for the CPU where the software was built.

It is now possible to select the function used for the dot product
with -c dotproduct=FUNCTION where FUNCTION can be one of those values:

* auto      selection based on detected hardware (default)
* generic   C++ code with default compiler options
* native    C++ code optimized for build host
* avx       optimized code for AVX
* sse       optimized code for SSE

Signed-off-by: Stefan Weil &lt;sw@weilnetz.de&gt;
diff --git a/src/api/Makefile.am b/src/api/Makefile.am
@@ -48,6 +48,7 @@ libtesseract_la_LIBADD = \
     ../classify/libtesseract_classify.la \
     ../dict/libtesseract_dict.la \
     ../arch/libtesseract_arch.la \
+    ../arch/libtesseract_native.la \
     ../arch/libtesseract_avx.la \
     ../arch/libtesseract_avx2.la \
     ../arch/libtesseract_sse.la \
diff --git a/src/api/tesseractmain.cpp b/src/api/tesseractmain.cpp
@@ -1,8 +1,7 @@
 /**********************************************************************
- * File:        tesseractmain.cpp  (Formerly tessedit.c)
+ * File:        tesseractmain.cpp
  * Description: Main program for merge of tess and editor.
  * Author:      Ray Smith
- * Created:     Tue Jan 07 15:21:46 GMT 1992
  *
  * (C) Copyright 1992, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
@@ -585,6 +584,9 @@ int main(int argc, char** argv) {
 
   SetVariablesFromCLArgs(&api, argc, argv);
 
+  // SIMD settings might be overridden by config variable.
+  tesseract::SIMDDetect::Update();
+
   if (list_langs) {
     PrintLangsList(&api);
     return EXIT_SUCCESS;
diff --git a/src/arch/Makefile.am b/src/arch/Makefile.am
@@ -10,11 +10,13 @@ endif
 
 pkginclude_HEADERS =
 
-noinst_HEADERS = dotproductavx.h dotproductsse.h
+noinst_HEADERS = dotproduct.h dotproductavx.h dotproductsse.h
 noinst_HEADERS += intsimdmatrix.h intsimdmatrixavx2.h intsimdmatrixsse.h
 noinst_HEADERS += simddetect.h
 
-noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_avx2.la libtesseract_sse.la
+noinst_LTLIBRARIES = libtesseract_native.la
+noinst_LTLIBRARIES += libtesseract_avx.la libtesseract_avx2.la
+noinst_LTLIBRARIES += libtesseract_sse.la
 noinst_LTLIBRARIES += libtesseract_arch.la
 
 if AVX_OPT
@@ -27,6 +29,9 @@ if SSE41_OPT
 libtesseract_sse_la_CXXFLAGS = -ffast-math -msse4.1
 endif
 
+libtesseract_native_la_CXXFLAGS = -O3 -ffast-math -march=native -mtune=native
+libtesseract_native_la_SOURCES = dotproduct.cpp
+
 libtesseract_arch_la_SOURCES = intsimdmatrix.cpp simddetect.cpp
 
 libtesseract_avx_la_SOURCES = dotproductavx.cpp
diff --git a/src/arch/dotproduct.cpp b/src/arch/dotproduct.cpp
@@ -0,0 +1,28 @@
+///////////////////////////////////////////////////////////////////////
+// File:        dotproduct.h
+// Description: Native dot product function.
+//
+// (C) Copyright 2018, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#include "dotproduct.h"
+
+namespace tesseract {
+
+// Computes and returns the dot product of the two n-vectors u and v.
+double DotProductNative(const double* u, const double* v, int n) {
+  double total = 0.0;
+  for (int k = 0; k < n; ++k) total += u[k] * v[k];
+  return total;
+}
+
+}  // namespace tesseract
diff --git a/src/arch/dotproduct.h b/src/arch/dotproduct.h
@@ -0,0 +1,27 @@
+///////////////////////////////////////////////////////////////////////
+// File:        dotproduct.h
+// Description: Native dot product function.
+//
+// (C) Copyright 2018, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_ARCH_DOTPRODUCT_H_
+#define TESSERACT_ARCH_DOTPRODUCT_H_
+
+namespace tesseract {
+
+// Computes and returns the dot product of the n-vectors u and v.
+double DotProductNative(const double* u, const double* v, int n);
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_ARCH_DOTPRODUCT_H_
diff --git a/src/arch/simddetect.cpp b/src/arch/simddetect.cpp
@@ -16,6 +16,11 @@
 ///////////////////////////////////////////////////////////////////////
 
 #include "simddetect.h"
+#include "dotproduct.h"
+#include "dotproductavx.h"
+#include "dotproductsse.h"
+#include "params.h"   // for STRING_VAR
+#include "tprintf.h"  // for tprintf
 
 #undef X86_BUILD
 #if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
@@ -34,6 +39,21 @@
 
 namespace tesseract {
 
+// Computes and returns the dot product of the two n-vectors u and v.
+// Note: because the order of addition is different among the different dot
+// product functions, the results can (and do) vary slightly (although they
+// agree to within about 4e-15). This produces different results when running
+// training, despite all random inputs being precisely equal.
+// To get consistent results, use just one of these dot product functions.
+// On a test multi-layer network, serial is 57% slower than SSE, and AVX
+// is about 8% faster than SSE. This suggests that the time is memory
+// bandwidth constrained and could benefit from holding the reused vector
+// in AVX registers.
+DotProductFunction DotProduct;
+
+static STRING_VAR(dotproduct, "auto",
+                  "Function used for calculation of dot product");
+
 SIMDDetect SIMDDetect::detector;
 
 // If true, then AVX has been detected.
@@ -44,12 +64,26 @@ bool SIMDDetect::avx512BW_available_;
 // If true, then SSe4.1 has been detected.
 bool SIMDDetect::sse_available_;
 
+// Computes and returns the dot product of the two n-vectors u and v.
+static double DotProductGeneric(const double* u, const double* v, int n) {
+  double total = 0.0;
+  for (int k = 0; k < n; ++k) total += u[k] * v[k];
+  return total;
+}
+
+static void SetDotProduct(DotProductFunction function) {
+  DotProduct = function;
+}
+
 // Constructor.
 // Tests the architecture in a system-dependent way to detect AVX, SSE and
 // any other available SIMD equipment.
 // __GNUC__ is also defined by compilers that include GNU extensions such as
 // clang.
 SIMDDetect::SIMDDetect() {
+  // The fallback is a generic dot product calculation.
+  SetDotProduct(DotProductGeneric);
+
 #if defined(X86_BUILD)
 #  if defined(__GNUC__)
   unsigned int eax, ebx, ecx, edx;
@@ -80,6 +114,57 @@ SIMDDetect::SIMDDetect() {
 #    error "I don't know how to test for SIMD with this compiler"
 #  endif
 #endif  // X86_BUILD
+
+#if defined(X86_BUILD)
+  // Select code for calculation of dot product based on autodetection.
+  if (avx_available_) {
+    // AVX detected.
+    SetDotProduct(DotProductAVX);
+  } else if (sse_available_) {
+    // SSE detected.
+    SetDotProduct(DotProductSSE);
+  }
+#endif  // X86_BUILD
+}
+
+void SIMDDetect::Update() {
+  // Select code for calculation of dot product based on the
+  // value of the config variable if that value is not empty.
+  const char* dotproduct_method = "generic";
+  if (!strcmp(dotproduct.string(), "auto")) {
+    // Automatic detection. Nothing to be done.
+  } else if (!strcmp(dotproduct.string(), "generic")) {
+    // Generic code selected by config variable.
+    SetDotProduct(DotProductGeneric);
+    dotproduct_method = "generic";
+  } else if (!strcmp(dotproduct.string(), "native")) {
+    // Native optimized code selected by config variable.
+    SetDotProduct(DotProductNative);
+    dotproduct_method = "native";
+  }
+#if defined(X86_BUILD)
+  else if (!strcmp(dotproduct.string(), "avx")) {
+    // AVX selected by config variable.
+    SetDotProduct(DotProductAVX);
+    dotproduct_method = "avx";
+  } else if (!strcmp(dotproduct.string(), "sse")) {
+    // SSE selected by config variable.
+    SetDotProduct(DotProductSSE);
+    dotproduct_method = "sse";
+  }
+#endif  // X86_BUILD
+  else {
+    // Unsupported value of config variable.
+    tprintf("Warning, ignoring unsupported config variable value: dotproduct=%s\n",
+            dotproduct.string());
+    tprintf("Support values for dotproduct: auto generic native"
+#if defined(X86_BUILD)
+            " avx sse"
+#endif  // X86_BUILD
+            ".\n");
+  }
+
+  dotproduct.set_value(dotproduct_method);
 }
 
 }  // namespace tesseract
diff --git a/src/arch/simddetect.h b/src/arch/simddetect.h
@@ -21,6 +21,10 @@
 
 namespace tesseract {
 
+// Function pointer for best calculation of dot product.
+typedef double (*DotProductFunction)(const double* u, const double* v, int n);
+extern DotProductFunction DotProduct;
+
 // Architecture detector. Add code here to detect any other architectures for
 // SIMD-based faster dot product functions. Intended to be a single static
 // object, but it does no real harm to have more than one.
@@ -41,6 +45,9 @@ class SIMDDetect {
   // Returns true if SSE4.1 is available on this system.
   static inline bool IsSSEAvailable() { return detector.sse_available_; }
 
+  // Update settings after config variable was set.
+  static void Update();
+
  private:
   // Constructor, must set all static member variables.
   SIMDDetect();
diff --git a/src/lstm/weightmatrix.cpp b/src/lstm/weightmatrix.cpp
@@ -2,7 +2,6 @@
 // File:        weightmatrix.cpp
 // Description: Hides distinction between float/int implementations.
 // Author:      Ray Smith
-// Created:     Tue Jun 17 11:46:20 PST 2014
 //
 // (C) Copyright 2014, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,10 +17,8 @@
 
 #include "weightmatrix.h"
 
-#include "dotproductavx.h"
-#include "dotproductsse.h"
 #include "intsimdmatrix.h"
-#include "simddetect.h"
+#include "simddetect.h"         // for DotProduct
 #include "statistc.h"
 #include "tprintf.h"
 
@@ -38,29 +35,6 @@ const int kAdamCorrectionIterations = 200000;
 // Epsilon in Adam to prevent division by zero.
 const double kAdamEpsilon = 1e-8;
 
-// Computes and returns the dot product of the two n-vectors u and v.
-static inline double DotProduct(const double* u, const double* v, int n) {
-  // Note: because the order of addition is different among the 3 DotProduct
-  // functions, the results can (and do) vary slightly (although they agree
-  // to within about 4e-15). This produces different results when running
-  // training, despite all random inputs being precisely equal.
-  // To get consistent results, use just one of these DotProduct functions.
-  // On a test multi-layer network, serial is 57% slower than sse, and avx
-  // is about 8% faster than sse. This suggests that the time is memory
-  // bandwidth constrained and could benefit from holding the reused vector
-  // in AVX registers.
-
-  if (SIMDDetect::IsAVXAvailable())
-    return DotProductAVX(u, v, n);
-
-  if (SIMDDetect::IsSSEAvailable())
-    return DotProductSSE(u, v, n);
-
-  double total = 0.0;
-  for (int k = 0; k < n; ++k) total += u[k] * v[k];
-  return total;
-}
-
 // Computes matrix.vector v = Wu.
 // u is of size W.dim2() - add_bias_fwd and the output v is of size
 // W.dim1() - skip_bias_back.
diff --git a/src/lstm/weightmatrix.h b/src/lstm/weightmatrix.h
@@ -2,7 +2,6 @@
 // File:        weightmatrix.h
 // Description: Hides distinction between float/int implementations.
 // Author:      Ray Smith
-// Created:     Tue Jun 17 09:05:39 PST 2014
 //
 // (C) Copyright 2014, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");