tesseract-ocr
diff --git a/‎arch/Makefile.am
+1-1 b/‎arch/Makefile.am
+1-1
diff --git a/‎arch/simddetect.cpp
+14 b/‎arch/simddetect.cpp
+14
diff --git a/‎arch/simddetect.h
+13 b/‎arch/simddetect.h
+13
diff --git a/‎ccstruct/matrix.h
+9-6 b/‎ccstruct/matrix.h
+9-6
diff --git a/‎lstm/convolve.cpp
+1-1 b/‎lstm/convolve.cpp
+1-1
diff --git a/‎lstm/fullyconnected.cpp
+18-7 b/‎lstm/fullyconnected.cpp
+18-7
diff --git a/‎lstm/fullyconnected.h
+10-4 b/‎lstm/fullyconnected.h
+10-4
diff --git a/‎lstm/lstm.cpp
+21-13 b/‎lstm/lstm.cpp
+21-13
diff --git a/‎lstm/lstm.h
+10-4 b/‎lstm/lstm.h
+10-4
diff --git a/‎lstm/lstmrecognizer.cpp
+15-8 b/‎lstm/lstmrecognizer.cpp
+15-8
@@ -1,4 +1,4 @@
-AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer
+AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer -DUSE_STD_NAMESPACE
 AUTOMAKE_OPTIONS = subdir-objects
 SUBDIRS =
 AM_CXXFLAGS =
 
@@ -37,6 +37,9 @@ SIMDDetect SIMDDetect::detector;
 
 // If true, then AVX has been detected.
 bool SIMDDetect::avx_available_;
+bool SIMDDetect::avx2_available_;
+bool SIMDDetect::avx512F_available_;
+bool SIMDDetect::avx512BW_available_;
 // If true, then SSe4.1 has been detected.
 bool SIMDDetect::sse_available_;
 
@@ -50,8 +53,19 @@ SIMDDetect::SIMDDetect() {
 #if defined(__GNUC__)
   unsigned int eax, ebx, ecx, edx;
   if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
+    // Note that these tests all use hex because the older compilers don't have
+    // the newer flags.
     sse_available_ = (ecx & 0x00080000) != 0;
     avx_available_ = (ecx & 0x10000000) != 0;
+    if (avx_available_) {
+      // There is supposed to be a __get_cpuid_count function, but this is all
+      // there is in my cpuid.h. It is a macro for an asm statement and cannot
+      // be used inside an if.
+      __cpuid_count(7, 0, eax, ebx, ecx, edx);
+      avx2_available_ = (ebx & 0x00000020) != 0;
+      avx512F_available_ = (ebx & 0x00010000) != 0;
+      avx512BW_available_ = (ebx & 0x40000000) != 0;
+    }
   }
 #elif defined(_WIN32)
   int cpuInfo[4];
 
@@ -24,6 +24,16 @@ class SIMDDetect {
  public:
   // Returns true if AVX is available on this system.
   static inline bool IsAVXAvailable() { return detector.avx_available_; }
+  // Returns true if AVX2 (integer support) is available on this system.
+  static inline bool IsAVX2Available() { return detector.avx2_available_; }
+  // Returns true if AVX512 Foundation (float) is available on this system.
+  static inline bool IsAVX512FAvailable() {
+    return detector.avx512F_available_;
+  }
+  // Returns true if AVX512 integer is available on this system.
+  static inline bool IsAVX512BWAvailable() {
+    return detector.avx512BW_available_;
+  }
   // Returns true if SSE4.1 is available on this system.
   static inline bool IsSSEAvailable() { return detector.sse_available_; }
 
@@ -36,6 +46,9 @@ class SIMDDetect {
   static SIMDDetect detector;
   // If true, then AVX has been detected.
   static TESS_API bool avx_available_;
+  static TESS_API bool avx2_available_;
+  static TESS_API bool avx512F_available_;
+  static TESS_API bool avx512BW_available_;
   // If true, then SSe4.1 has been detected.
   static TESS_API bool sse_available_;
 };
@@ -360,19 +360,22 @@ class GENERIC_2D_ARRAY {
   }
 
   // Accumulates the element-wise sums of squares of src into *this.
-  void SumSquares(const GENERIC_2D_ARRAY<T>& src) {
+  void SumSquares(const GENERIC_2D_ARRAY<T>& src, T decay_factor) {
+    T update_factor = 1.0 - decay_factor;
     int size = num_elements();
     for (int i = 0; i < size; ++i) {
-      array_[i] += src.array_[i] * src.array_[i];
+      array_[i] = array_[i] * decay_factor +
+                  update_factor * src.array_[i] * src.array_[i];
     }
   }
 
-  // Scales each element using the ada-grad algorithm, ie array_[i] by
-  // sqrt(num_samples/max(1,sqsum[i])).
-  void AdaGradScaling(const GENERIC_2D_ARRAY<T>& sqsum, int num_samples) {
+  // Scales each element using the adam algorithm, ie array_[i] by
+  // sqrt(sqsum[i] + epsilon)).
+  void AdamUpdate(const GENERIC_2D_ARRAY<T>& sum,
+                  const GENERIC_2D_ARRAY<T>& sqsum, T epsilon) {
     int size = num_elements();
     for (int i = 0; i < size; ++i) {
-      array_[i] *= sqrt(num_samples / MAX(1.0, sqsum.array_[i]));
+      array_[i] += sum.array_[i] / (sqrt(sqsum.array_[i]) + epsilon);
     }
   }
 
 
@@ -112,7 +112,7 @@ bool Convolve::Backward(bool debug, const NetworkIO& fwd_deltas,
       }
     }
   } while (src_index.Increment());
-  back_deltas->CopyWithNormalization(*delta_sum, fwd_deltas);
+  back_deltas->CopyAll(*delta_sum);
   return true;
 }
 
 
@@ -79,11 +79,24 @@ void FullyConnected::SetEnableTraining(TrainingState state) {
 // scale `range` picked according to the random number generator `randomizer`.
 int FullyConnected::InitWeights(float range, TRand* randomizer) {
   Network::SetRandomizer(randomizer);
-  num_weights_ = weights_.InitWeightsFloat(no_, ni_ + 1, TestFlag(NF_ADA_GRAD),
+  num_weights_ = weights_.InitWeightsFloat(no_, ni_ + 1, TestFlag(NF_ADAM),
                                            range, randomizer);
   return num_weights_;
 }
 
+// Changes the number of outputs to the size of the given code_map, copying
+// the old weight matrix entries for each output from code_map[output] where
+// non-negative, and uses the mean (over all outputs) of the existing weights
+// for all outputs with negative code_map entries. Returns the new number of
+// weights. Only operates on Softmax layers with old_no outputs.
+int FullyConnected::RemapOutputs(int old_no, const std::vector<int>& code_map) {
+  if (type_ == NT_SOFTMAX && no_ == old_no) {
+    num_weights_ = weights_.RemapOutputs(code_map);
+    no_ = code_map.size();
+  }
+  return num_weights_;
+}
+
 // Converts a float network to an int network.
 void FullyConnected::ConvertToInt() {
   weights_.ConvertToInt();
@@ -240,7 +253,6 @@ bool FullyConnected::Backward(bool debug, const NetworkIO& fwd_deltas,
   FinishBackward(*errors_t.get());
   if (needs_to_backprop_) {
     back_deltas->ZeroInvalidElements();
-    back_deltas->CopyWithNormalization(*back_deltas, fwd_deltas);
 #if DEBUG_DETAIL > 0
     tprintf("F Backprop:%s\n", name_.string());
     back_deltas->Print(10);
@@ -281,12 +293,11 @@ void FullyConnected::FinishBackward(const TransposedArray& errors_t) {
     weights_.SumOuterTransposed(errors_t, *external_source_, true);
 }
 
-// Updates the weights using the given learning rate and momentum.
-// num_samples is the quotient to be used in the adagrad computation iff
-// use_ada_grad_ is true.
+// Updates the weights using the given learning rate, momentum and adam_beta.
+// num_samples is used in the adam computation iff use_adam_ is true.
 void FullyConnected::Update(float learning_rate, float momentum,
-                            int num_samples) {
-  weights_.Update(learning_rate, momentum, num_samples);
+                            float adam_beta, int num_samples) {
+  weights_.Update(learning_rate, momentum, adam_beta, num_samples);
 }
 
 // Sums the products of weight updates in *this and other, splitting into
 
@@ -68,6 +68,12 @@ class FullyConnected : public Network {
   // Sets up the network for training. Initializes weights using weights of
   // scale `range` picked according to the random number generator `randomizer`.
   virtual int InitWeights(float range, TRand* randomizer);
+  // Changes the number of outputs to the size of the given code_map, copying
+  // the old weight matrix entries for each output from code_map[output] where
+  // non-negative, and uses the mean (over all outputs) of the existing weights
+  // for all outputs with negative code_map entries. Returns the new number of
+  // weights. Only operates on Softmax layers with old_no outputs.
+  int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
 
   // Converts a float network to an int network.
   virtual void ConvertToInt();
@@ -101,10 +107,10 @@ class FullyConnected : public Network {
                         TransposedArray* errors_t, double* backprop);
   void FinishBackward(const TransposedArray& errors_t);
 
-  // Updates the weights using the given learning rate and momentum.
-  // num_samples is the quotient to be used in the adagrad computation iff
-  // use_ada_grad_ is true.
-  virtual void Update(float learning_rate, float momentum, int num_samples);
+  // Updates the weights using the given learning rate, momentum and adam_beta.
+  // num_samples is used in the adam computation iff use_adam_ is true.
+  void Update(float learning_rate, float momentum, float adam_beta,
+              int num_samples) override;
   // Sums the products of weight updates in *this and other, splitting into
   // positive (same direction) in *same and negative (different direction) in
   // *changed.
 
@@ -132,14 +132,27 @@ int LSTM::InitWeights(float range, TRand* randomizer) {
   for (int w = 0; w < WT_COUNT; ++w) {
     if (w == GFS && !Is2D()) continue;
     num_weights_ += gate_weights_[w].InitWeightsFloat(
-        ns_, na_ + 1, TestFlag(NF_ADA_GRAD), range, randomizer);
+        ns_, na_ + 1, TestFlag(NF_ADAM), range, randomizer);
   }
   if (softmax_ != NULL) {
     num_weights_ += softmax_->InitWeights(range, randomizer);
   }
   return num_weights_;
 }
 
+// Changes the number of outputs to the size of the given code_map, copying
+// the old weight matrix entries for each output from code_map[output] where
+// non-negative, and uses the mean (over all outputs) of the existing weights
+// for all outputs with negative code_map entries. Returns the new number of
+// weights. Only operates on Softmax layers with old_no outputs.
+int LSTM::RemapOutputs(int old_no, const std::vector<int>& code_map) {
+  if (softmax_ != NULL) {
+    num_weights_ -= softmax_->num_weights();
+    num_weights_ += softmax_->RemapOutputs(old_no, code_map);
+  }
+  return num_weights_;
+}
+
 // Converts a float network to an int network.
 void LSTM::ConvertToInt() {
   for (int w = 0; w < WT_COUNT; ++w) {
@@ -618,27 +631,22 @@ bool LSTM::Backward(bool debug, const NetworkIO& fwd_deltas,
   if (softmax_ != NULL) {
     softmax_->FinishBackward(*softmax_errors_t);
   }
-  if (needs_to_backprop_) {
-    // Normalize the inputerr in back_deltas.
-    back_deltas->CopyWithNormalization(*back_deltas, fwd_deltas);
-    return true;
-  }
-  return false;
+  return needs_to_backprop_;
 }
 
-// Updates the weights using the given learning rate and momentum.
-// num_samples is the quotient to be used in the adagrad computation iff
-// use_ada_grad_ is true.
-void LSTM::Update(float learning_rate, float momentum, int num_samples) {
+// Updates the weights using the given learning rate, momentum and adam_beta.
+// num_samples is used in the adam computation iff use_adam_ is true.
+void LSTM::Update(float learning_rate, float momentum, float adam_beta,
+                  int num_samples) {
 #if DEBUG_DETAIL > 3
   PrintW();
 #endif
   for (int w = 0; w < WT_COUNT; ++w) {
     if (w == GFS && !Is2D()) continue;
-    gate_weights_[w].Update(learning_rate, momentum, num_samples);
+    gate_weights_[w].Update(learning_rate, momentum, adam_beta, num_samples);
   }
   if (softmax_ != NULL) {
-    softmax_->Update(learning_rate, momentum, num_samples);
+    softmax_->Update(learning_rate, momentum, adam_beta, num_samples);
   }
 #if DEBUG_DETAIL > 3
   PrintDW();
 
@@ -76,6 +76,12 @@ class LSTM : public Network {
   // Sets up the network for training. Initializes weights using weights of
   // scale `range` picked according to the random number generator `randomizer`.
   virtual int InitWeights(float range, TRand* randomizer);
+  // Changes the number of outputs to the size of the given code_map, copying
+  // the old weight matrix entries for each output from code_map[output] where
+  // non-negative, and uses the mean (over all outputs) of the existing weights
+  // for all outputs with negative code_map entries. Returns the new number of
+  // weights. Only operates on Softmax layers with old_no outputs.
+  int RemapOutputs(int old_no, const std::vector<int>& code_map) override;
 
   // Converts a float network to an int network.
   virtual void ConvertToInt();
@@ -99,10 +105,10 @@ class LSTM : public Network {
   virtual bool Backward(bool debug, const NetworkIO& fwd_deltas,
                         NetworkScratch* scratch,
                         NetworkIO* back_deltas);
-  // Updates the weights using the given learning rate and momentum.
-  // num_samples is the quotient to be used in the adagrad computation iff
-  // use_ada_grad_ is true.
-  virtual void Update(float learning_rate, float momentum, int num_samples);
+  // Updates the weights using the given learning rate, momentum and adam_beta.
+  // num_samples is used in the adam computation iff use_adam_ is true.
+  void Update(float learning_rate, float momentum, float adam_beta,
+              int num_samples) override;
   // Sums the products of weight updates in *this and other, splitting into
   // positive (same direction) in *same and negative (different direction) in
   // *changed.
 
@@ -55,9 +55,9 @@ LSTMRecognizer::LSTMRecognizer()
       training_iteration_(0),
       sample_iteration_(0),
       null_char_(UNICHAR_BROKEN),
-      weight_range_(0.0f),
       learning_rate_(0.0f),
       momentum_(0.0f),
+      adam_beta_(0.0f),
       dict_(NULL),
       search_(NULL),
       debug_win_(NULL) {}
@@ -94,7 +94,7 @@ bool LSTMRecognizer::Serialize(const TessdataManager* mgr, TFile* fp) const {
   if (fp->FWrite(&sample_iteration_, sizeof(sample_iteration_), 1) != 1)
     return false;
   if (fp->FWrite(&null_char_, sizeof(null_char_), 1) != 1) return false;
-  if (fp->FWrite(&weight_range_, sizeof(weight_range_), 1) != 1) return false;
+  if (fp->FWrite(&adam_beta_, sizeof(adam_beta_), 1) != 1) return false;
   if (fp->FWrite(&learning_rate_, sizeof(learning_rate_), 1) != 1) return false;
   if (fp->FWrite(&momentum_, sizeof(momentum_), 1) != 1) return false;
   if (include_charsets && IsRecoding() && !recoder_.Serialize(fp)) return false;
@@ -120,8 +120,7 @@ bool LSTMRecognizer::DeSerialize(const TessdataManager* mgr, TFile* fp) {
   if (fp->FReadEndian(&sample_iteration_, sizeof(sample_iteration_), 1) != 1)
     return false;
   if (fp->FReadEndian(&null_char_, sizeof(null_char_), 1) != 1) return false;
-  if (fp->FReadEndian(&weight_range_, sizeof(weight_range_), 1) != 1)
-    return false;
+  if (fp->FReadEndian(&adam_beta_, sizeof(adam_beta_), 1) != 1) return false;
   if (fp->FReadEndian(&learning_rate_, sizeof(learning_rate_), 1) != 1)
     return false;
   if (fp->FReadEndian(&momentum_, sizeof(momentum_), 1) != 1) return false;
@@ -207,14 +206,22 @@ void LSTMRecognizer::OutputStats(const NetworkIO& outputs, float* min_output,
   STATS stats(0, kOutputScale + 1);
   for (int t = 0; t < outputs.Width(); ++t) {
     int best_label = outputs.BestLabel(t, NULL);
-    if (best_label != null_char_ || t == 0) {
+    if (best_label != null_char_) {
       float best_output = outputs.f(t)[best_label];
       stats.add(static_cast<int>(kOutputScale * best_output), 1);
     }
   }
-  *min_output = static_cast<float>(stats.min_bucket()) / kOutputScale;
-  *mean_output = stats.mean() / kOutputScale;
-  *sd = stats.sd() / kOutputScale;
+  // If the output is all nulls it could be that the photometric interpretation
+  // is wrong, so make it look bad, so the other way can win, even if not great.
+  if (stats.get_total() == 0) {
+    *min_output = 0.0f;
+    *mean_output = 0.0f;
+    *sd = 1.0f;
+  } else {
+    *min_output = static_cast<float>(stats.min_bucket()) / kOutputScale;
+    *mean_output = stats.mean() / kOutputScale;
+    *sd = stats.sd() / kOutputScale;
+  }
 }
 
 // Recognizes the image_data, returning the labels,
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer`
	`1`	`+AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer -DUSE_STD_NAMESPACE`
`2`	`2`	`AUTOMAKE_OPTIONS = subdir-objects`
`3`	`3`	`SUBDIRS =`
`4`	`4`	`AM_CXXFLAGS =`
Original file line number	Diff line number	Diff line change
`@@ -360,19 +360,22 @@ class GENERIC_2D_ARRAY {`
`360`	`360`	`}`
`361`	`361`
`362`	`362`	`// Accumulates the element-wise sums of squares of src into *this.`
`363`		`- void SumSquares(const GENERIC_2D_ARRAY<T>& src) {`
	`363`	`+ void SumSquares(const GENERIC_2D_ARRAY<T>& src, T decay_factor) {`
	`364`	`+ T update_factor = 1.0 - decay_factor;`
`364`	`365`	`int size = num_elements();`
`365`	`366`	`for (int i = 0; i < size; ++i) {`
`366`		`- array_[i] += src.array_[i] * src.array_[i];`
	`367`	`+ array_[i] = array_[i] * decay_factor +`
	`368`	`+ update_factor * src.array_[i] * src.array_[i];`
`367`	`369`	`}`
`368`	`370`	`}`
`369`	`371`
`370`		`- // Scales each element using the ada-grad algorithm, ie array_[i] by`
`371`		`- // sqrt(num_samples/max(1,sqsum[i])).`
`372`		`- void AdaGradScaling(const GENERIC_2D_ARRAY<T>& sqsum, int num_samples) {`
	`372`	`+ // Scales each element using the adam algorithm, ie array_[i] by`
	`373`	`+ // sqrt(sqsum[i] + epsilon)).`
	`374`	`+ void AdamUpdate(const GENERIC_2D_ARRAY<T>& sum,`
	`375`	`+ const GENERIC_2D_ARRAY<T>& sqsum, T epsilon) {`
`373`	`376`	`int size = num_elements();`
`374`	`377`	`for (int i = 0; i < size; ++i) {`
`375`		`- array_[i] *= sqrt(num_samples / MAX(1.0, sqsum.array_[i]));`
	`378`	`+ array_[i] += sum.array_[i] / (sqrt(sqsum.array_[i]) + epsilon);`
`376`	`379`	`}`
`377`	`380`	`}`
`378`	`381`
Original file line number	Diff line number	Diff line change
`@@ -112,7 +112,7 @@ bool Convolve::Backward(bool debug, const NetworkIO& fwd_deltas,`
`112`	`112`	`}`
`113`	`113`	`}`
`114`	`114`	`} while (src_index.Increment());`
`115`		`- back_deltas->CopyWithNormalization(*delta_sum, fwd_deltas);`
	`115`	`+ back_deltas->CopyAll(*delta_sum);`
`116`	`116`	`return true;`
`117`	`117`	`}`
`118`	`118`