@@ -38,6 +38,29 @@ const int kAdamCorrectionIterations = 200000;
38
38
// Epsilon in Adam to prevent division by zero.
39
39
const double kAdamEpsilon = 1e-8 ;
40
40
41
+ // Computes and returns the dot product of the two n-vectors u and v.
42
+ static inline double DotProduct (const double * u, const double * v, int n) {
43
+ // Note: because the order of addition is different among the 3 DotProduct
44
+ // functions, the results can (and do) vary slightly (although they agree
45
+ // to within about 4e-15). This produces different results when running
46
+ // training, despite all random inputs being precisely equal.
47
+ // To get consistent results, use just one of these DotProduct functions.
48
+ // On a test multi-layer network, serial is 57% slower than sse, and avx
49
+ // is about 8% faster than sse. This suggests that the time is memory
50
+ // bandwidth constrained and could benefit from holding the reused vector
51
+ // in AVX registers.
52
+
53
+ if (SIMDDetect::IsAVXAvailable ())
54
+ return DotProductAVX (u, v, n);
55
+
56
+ if (SIMDDetect::IsSSEAvailable ())
57
+ return DotProductSSE (u, v, n);
58
+
59
+ double total = 0.0 ;
60
+ for (int k = 0 ; k < n; ++k) total += u[k] * v[k];
61
+ return total;
62
+ }
63
+
41
64
// Computes matrix.vector v = Wu.
42
65
// u is of size W.dim2() - add_bias_fwd and the output v is of size
43
66
// W.dim1() - skip_bias_back.
@@ -54,7 +77,7 @@ static inline void MatrixDotVectorInternal(const GENERIC_2D_ARRAY<double>& w,
54
77
int extent = w.dim2 () - add_bias_fwd;
55
78
for (int i = 0 ; i < num_results; ++i) {
56
79
const double * wi = w[i];
57
- double total = WeightMatrix:: DotProduct (wi, u, extent);
80
+ double total = DotProduct (wi, u, extent);
58
81
if (add_bias_fwd) total += wi[extent]; // The bias value.
59
82
v[i] = total;
60
83
}
@@ -389,25 +412,6 @@ void WeightMatrix::Debug2D(const char* msg) {
389
412
histogram.print ();
390
413
}
391
414
392
- // Computes and returns the dot product of the two n-vectors u and v.
393
- /* static */
394
- double WeightMatrix::DotProduct (const double * u, const double * v, int n) {
395
- // Note: because the order of addition is different among the 3 DotProduct
396
- // functions, the results can (and do) vary slightly (although they agree
397
- // to within about 4e-15). This produces different results when running
398
- // training, despite all random inputs being precisely equal.
399
- // To get consistent results, use just one of these DotProduct functions.
400
- // On a test multi-layer network, serial is 57% slower than sse, and avx
401
- // is about 8% faster than sse. This suggests that the time is memory
402
- // bandwidth constrained and could benefit from holding the reused vector
403
- // in AVX registers.
404
- if (SIMDDetect::IsAVXAvailable ()) return DotProductAVX (u, v, n);
405
- if (SIMDDetect::IsSSEAvailable ()) return DotProductSSE (u, v, n);
406
- double total = 0.0 ;
407
- for (int k = 0 ; k < n; ++k) total += u[k] * v[k];
408
- return total;
409
- }
410
-
411
415
// Utility function converts an array of float to the corresponding array
412
416
// of double.
413
417
/* static */
0 commit comments