19
19
#define min (a , b ) a < b ? a : b
20
20
#define max (a , b ) a > b ? a : b
21
21
22
+
23
+ #define calc_covariance (cov , a , b , aMean , bMean ) cov += (a - aMean) * (b - bMean)
24
+ #define calc_stddev (std , data , dataMean ) std += (data - dataMean) * (data - dataMean)
25
+ #define sum_covariance (cov , sampleSize ) cov / ((float) sampleSize - 1)
26
+ #define sum_stddev (std , sampleSize ) __builtin_sqrt(std / ((float) sampleSize - 1))
27
+
22
28
#ifdef WASM_SIMD
23
29
24
30
#include <wasm_simd128.h>
25
31
26
32
typedef float float4 __attribute__((__vector_size__ (16 )));
27
33
#define float4_size 4
28
34
#define new_float4 wasm_f32x4_splat(0)
35
+ #define float4_to_float (vec ) (wasm_f32x4_extract_lane(vec, 0) + wasm_f32x4_extract_lane(vec, 1) + wasm_f32x4_extract_lane(vec, 2) + wasm_f32x4_extract_lane(vec, 3))
29
36
30
- #define calc_covariance (vec , a , b , aMean , bMean ) vec = \
37
+ #define calc_covariance_float4 (vec , a , b , aMean , bMean ) vec = \
31
38
wasm_f32x4_add(\
32
39
vec, \
33
40
wasm_f32x4_mul( \
@@ -41,7 +48,7 @@ typedef float float4 __attribute__((__vector_size__(16)));
41
48
) \
42
49
) \
43
50
)
44
- #define calc_stddev (vec , data , dataMean ) vec = \
51
+ #define calc_stddev_float4 (vec , data , dataMean ) vec = \
45
52
wasm_f32x4_add( \
46
53
vec, \
47
54
wasm_f32x4_mul( \
@@ -55,78 +62,89 @@ typedef float float4 __attribute__((__vector_size__(16)));
55
62
) \
56
63
) \
57
64
)
58
-
59
- #define vec_to_float (vec ) (wasm_f32x4_extract_lane(vec, 0) + wasm_f32x4_extract_lane(vec, 1) + wasm_f32x4_extract_lane(vec, 2) + wasm_f32x4_extract_lane(vec, 3))
60
-
61
- #define sum_covariance (cov , sampleSize ) vec_to_float(cov) / ((float) sampleSize - 1)
62
- #define sum_stddev (std , sampleSize ) __builtin_sqrt(vec_to_float(std) / ((float) sampleSize - 1))
65
+ #define sum_covariance_float4 (cov , sampleSize ) float4_to_float(cov) / ((float) sampleSize - 1)
66
+ #define sum_stddev_float4 (std , sampleSize ) __builtin_sqrt(float4_to_float(std) / ((float) sampleSize - 1))
63
67
64
68
#else
65
69
66
70
typedef float float4 ;
67
71
#define float4_size 1
68
72
#define new_float4 0
73
+ #define float4_to_float (f ) (float) f
69
74
70
- #define calc_stddev (std , data , dataMean ) std += (data - dataMean) * (data - dataMean)
71
- #define calc_covariance (cov , a , b , aMean , bMean ) cov += (a - aMean) * (b - bMean)
72
-
73
- #define sum_covariance (cov , sampleSize ) cov / ((float) sampleSize - 1)
74
- #define sum_stddev (std , sampleSize ) __builtin_sqrt(std / ((float) sampleSize - 1))
75
+ #define calc_covariance_float4 calc_covariance
76
+ #define calc_stddev_float4 calc_stddev
77
+ #define sum_covariance_float4 sum_covariance
78
+ #define sum_stddev_float4 sum_stddev
75
79
76
80
#endif
77
81
82
+ #define calc_correlation_step_float4 (cov , aStd , bStd , aMean , bMean , a , b ) \
83
+ calc_stddev_float4(aStd, a, aMean); \
84
+ calc_stddev_float4(bStd, b, bMean); \
85
+ calc_covariance_float4(covariance, a, b, aMean, bMean);
86
+
78
87
#define calc_correlation_step (cov , aStd , bStd , aMean , bMean , a , b ) \
79
88
calc_stddev(aStd, a, aMean); \
80
89
calc_stddev(bStd, b, bMean); \
81
90
calc_covariance(covariance, a, b, aMean, bMean);
82
91
83
-
84
92
float calc_correlation (float * a , float * b , float aMean , float bMean , long sampleSize ) {
85
- int loopUnroll = 1 * float4_size ;
93
+ int loopUnroll = 4 * float4_size ;
86
94
float4 covariance = new_float4 ;
87
95
float4 aStd = new_float4 ;
88
96
float4 bStd = new_float4 ;
89
97
98
+ int i ;
90
99
for (
91
- int i = 0 ;
100
+ i = 0 ;
92
101
i < sampleSize - loopUnroll ;
93
102
i += loopUnroll
94
103
) {
95
- calc_correlation_step (covariance , aStd , bStd , aMean , bMean , a [i ], b [i ] );
96
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 1 * float4_size], b[i + 1 * float4_size]);
97
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 2 * float4_size], b[i + 2 * float4_size]);
98
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 3 * float4_size], b[i + 3 * float4_size]);
99
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 4 * float4_size], b[i + 4 * float4_size]);
100
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 5 * float4_size], b[i + 5 * float4_size]);
101
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 6 * float4_size], b[i + 6 * float4_size]);
102
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 7 * float4_size], b[i + 7 * float4_size]);
103
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 8 * float4_size], b[i + 8 * float4_size]);
104
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 9 * float4_size], b[i + 9 * float4_size]);
105
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 10 * float4_size], b[i + 10 * float4_size]);
106
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 11 * float4_size], b[i + 11 * float4_size]);
107
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 12 * float4_size], b[i + 12 * float4_size]);
108
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 13 * float4_size], b[i + 13 * float4_size]);
109
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 14 * float4_size], b[i + 14 * float4_size]);
110
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 15 * float4_size], b[i + 15 * float4_size]);
111
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 16 * float4_size], b[i + 16 * float4_size]);
112
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 17 * float4_size], b[i + 17 * float4_size]);
113
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 18 * float4_size], b[i + 18 * float4_size]);
114
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 19 * float4_size], b[i + 19 * float4_size]);
115
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 20 * float4_size], b[i + 20 * float4_size]);
116
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 21 * float4_size], b[i + 21 * float4_size]);
117
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 22 * float4_size], b[i + 22 * float4_size]);
118
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 23 * float4_size], b[i + 23 * float4_size]);
119
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 24 * float4_size], b[i + 24 * float4_size]);
120
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 25 * float4_size], b[i + 25 * float4_size]);
121
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 26 * float4_size], b[i + 26 * float4_size]);
122
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 27 * float4_size], b[i + 27 * float4_size]);
123
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 28 * float4_size], b[i + 28 * float4_size]);
124
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 29 * float4_size], b[i + 29 * float4_size]);
125
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 30 * float4_size], b[i + 30 * float4_size]);
126
- //calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 31 * float4_size], b[i + 31 * float4_size]);
104
+ calc_correlation_step_float4 (covariance , aStd , bStd , aMean , bMean , a [i ], b [i ] );
105
+ calc_correlation_step_float4 (covariance , aStd , bStd , aMean , bMean , a [i + 1 * float4_size ], b [i + 1 * float4_size ]);
106
+ calc_correlation_step_float4 (covariance , aStd , bStd , aMean , bMean , a [i + 2 * float4_size ], b [i + 2 * float4_size ]);
107
+ calc_correlation_step_float4 (covariance , aStd , bStd , aMean , bMean , a [i + 3 * float4_size ], b [i + 3 * float4_size ]);
108
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 4 * float4_size], b[i + 4 * float4_size]);
109
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 5 * float4_size], b[i + 5 * float4_size]);
110
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 6 * float4_size], b[i + 6 * float4_size]);
111
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 7 * float4_size], b[i + 7 * float4_size]);
112
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 8 * float4_size], b[i + 8 * float4_size]);
113
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 9 * float4_size], b[i + 9 * float4_size]);
114
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 10 * float4_size], b[i + 10 * float4_size]);
115
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 11 * float4_size], b[i + 11 * float4_size]);
116
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 12 * float4_size], b[i + 12 * float4_size]);
117
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 13 * float4_size], b[i + 13 * float4_size]);
118
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 14 * float4_size], b[i + 14 * float4_size]);
119
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 15 * float4_size], b[i + 15 * float4_size]);
120
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 16 * float4_size], b[i + 16 * float4_size]);
121
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 17 * float4_size], b[i + 17 * float4_size]);
122
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 18 * float4_size], b[i + 18 * float4_size]);
123
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 19 * float4_size], b[i + 19 * float4_size]);
124
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 20 * float4_size], b[i + 20 * float4_size]);
125
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 21 * float4_size], b[i + 21 * float4_size]);
126
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 22 * float4_size], b[i + 22 * float4_size]);
127
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 23 * float4_size], b[i + 23 * float4_size]);
128
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 24 * float4_size], b[i + 24 * float4_size]);
129
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 25 * float4_size], b[i + 25 * float4_size]);
130
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 26 * float4_size], b[i + 26 * float4_size]);
131
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 27 * float4_size], b[i + 27 * float4_size]);
132
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 28 * float4_size], b[i + 28 * float4_size]);
133
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 29 * float4_size], b[i + 29 * float4_size]);
134
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 30 * float4_size], b[i + 30 * float4_size]);
135
+ //calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 31 * float4_size], b[i + 31 * float4_size]);
136
+ }
137
+
138
+ // calculate any remaining data
139
+ float covarianceRemaining = float4_to_float (covariance );
140
+ float aStdFloat = float4_to_float (aStd );
141
+ float bStdFloat = float4_to_float (bStd );
142
+
143
+ for (; i < sampleSize ; i ++ ) {
144
+ calc_correlation_step (covarianceRemaining , aStdFloat , bStdFloat , aMean , bMean , a [i ], b [i ]);
127
145
}
128
146
129
- return sum_covariance (covariance , sampleSize ) / (sum_stddev (aStd , sampleSize ) * sum_stddev (bStd , sampleSize ));
147
+ return sum_covariance (covarianceRemaining , sampleSize ) / (sum_stddev (aStdFloat , sampleSize ) * sum_stddev (bStdFloat , sampleSize )); ;
130
148
}
131
149
132
150
void sum_channels (float * data , long samples , int channels ) {
@@ -198,28 +216,30 @@ void correlate(
198
216
}
199
217
}
200
218
201
- // narrow down exact correlation from previous results
202
- aOffsetStart = max (* bestSampleOffset - initialGranularity * 2 , 0 );
203
- aOffsetEnd = min (* bestSampleOffset + initialGranularity * 2 , aSamples - sampleSize );
204
-
205
- aSum = sum_for_mean (a , aOffsetStart , aOffsetStart + sampleSize );
206
-
207
- for (long aOffset = aOffsetStart ; aOffset < aOffsetEnd ; aOffset ++ ) {
208
- float aMean = aSum / sampleSize ;
209
- // shift mean sum up one element
210
- aSum -= (double ) a [aOffset ];
211
- aSum += (double ) a [aOffset + sampleSize ];
212
-
213
- correlation = calc_correlation (a + aOffset , b , aMean , bMean , sampleSize );
214
-
215
- if (* bestCorrelation < correlation ) {
216
- bestAMean = aMean ;
217
- * bestCorrelation = correlation ;
218
- * bestSampleOffset = aOffset ;
219
+ if (initialGranularity > 1 ) {
220
+ // narrow down exact correlation from previous results
221
+ aOffsetStart = max (* bestSampleOffset - initialGranularity * initialGranularity , 0 );
222
+ aOffsetEnd = min (* bestSampleOffset + initialGranularity * initialGranularity , aSamples - sampleSize );
223
+
224
+ aSum = sum_for_mean (a , aOffsetStart , aOffsetStart + sampleSize );
225
+
226
+ for (long aOffset = aOffsetStart ; aOffset < aOffsetEnd ; aOffset ++ ) {
227
+ float aMean = aSum / sampleSize ;
228
+ // shift mean sum up one element
229
+ aSum -= (double ) a [aOffset ];
230
+ aSum += (double ) a [aOffset + sampleSize ];
231
+
232
+ correlation = calc_correlation (a + aOffset , b , aMean , bMean , sampleSize );
233
+
234
+ if (* bestCorrelation < correlation ) {
235
+ bestAMean = aMean ;
236
+ * bestCorrelation = correlation ;
237
+ * bestSampleOffset = aOffset ;
238
+ }
219
239
}
240
+
241
+ long bOffsetStart = 0 ;
242
+ long bOffsetEnd = sampleSize ;
243
+ float bMeanLength = bOffsetEnd ;
220
244
}
221
-
222
- long bOffsetStart = 0 ;
223
- long bOffsetEnd = sampleSize ;
224
- float bMeanLength = bOffsetEnd ;
225
245
}
0 commit comments