Skip to content

Commit f8fb1d4

Browse files
authored
Merge pull request #2 from eshaz/better-multi-threading
Better multi threading
2 parents d31bf09 + eb96d51 commit f8fb1d4

File tree

7 files changed

+577
-217
lines changed

7 files changed

+577
-217
lines changed

.github/FUNDING.yml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
github: eshaz

Makefile

+12
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ correlate-scalar:
2626
-O4 \
2727
--reorder-functions \
2828
--reorder-locals \
29+
--coalesce-locals \
30+
--optimize-instructions \
31+
--optimize-added-constants-propagate \
32+
--const-hoisting \
33+
--simplify-globals \
34+
--simplify-locals \
2935
--strip-producers \
3036
--vacuum \
3137
--converge \
@@ -52,6 +58,12 @@ correlate-simd:
5258
-O4 \
5359
--reorder-functions \
5460
--reorder-locals \
61+
--coalesce-locals \
62+
--optimize-instructions \
63+
--optimize-added-constants-propagate \
64+
--const-hoisting \
65+
--simplify-globals \
66+
--simplify-locals \
5567
--strip-producers \
5668
--vacuum \
5769
--converge \

package-lock.json

+6-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "synaudio",
3-
"version": "0.1.0",
3+
"version": "0.2.0",
44
"description": "Library that finds the synchronization point between two similar audio clips.",
55
"files": [
66
"index.js",
@@ -36,6 +36,10 @@
3636
},
3737
"type": "module",
3838
"homepage": "https://github.com/eshaz/synaudio#readme",
39+
"funding": {
40+
"type": "individual",
41+
"url": "https://github.com/sponsors/eshaz"
42+
},
3943
"devDependencies": {
4044
"@types/jest": "^27.5.1",
4145
"jest": "^27.5.1",

src/SynAudio.js

+162-114
Large diffs are not rendered by default.

src/correlate.c

+90-70
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,22 @@
1919
#define min(a, b) a < b ? a : b
2020
#define max(a, b) a > b ? a : b
2121

22+
23+
#define calc_covariance(cov, a, b, aMean, bMean) cov += (a - aMean) * (b - bMean)
24+
#define calc_stddev(std, data, dataMean) std += (data - dataMean) * (data - dataMean)
25+
#define sum_covariance(cov, sampleSize) cov / ((float) sampleSize - 1)
26+
#define sum_stddev(std, sampleSize) __builtin_sqrt(std / ((float) sampleSize - 1))
27+
2228
#ifdef WASM_SIMD
2329

2430
#include <wasm_simd128.h>
2531

2632
typedef float float4 __attribute__((__vector_size__(16)));
2733
#define float4_size 4
2834
#define new_float4 wasm_f32x4_splat(0)
35+
#define float4_to_float(vec) (wasm_f32x4_extract_lane(vec, 0) + wasm_f32x4_extract_lane(vec, 1) + wasm_f32x4_extract_lane(vec, 2) + wasm_f32x4_extract_lane(vec, 3))
2936

30-
#define calc_covariance(vec, a, b, aMean, bMean) vec = \
37+
#define calc_covariance_float4(vec, a, b, aMean, bMean) vec = \
3138
wasm_f32x4_add(\
3239
vec, \
3340
wasm_f32x4_mul( \
@@ -41,7 +48,7 @@ typedef float float4 __attribute__((__vector_size__(16)));
4148
) \
4249
) \
4350
)
44-
#define calc_stddev(vec, data, dataMean) vec = \
51+
#define calc_stddev_float4(vec, data, dataMean) vec = \
4552
wasm_f32x4_add( \
4653
vec, \
4754
wasm_f32x4_mul( \
@@ -55,78 +62,89 @@ typedef float float4 __attribute__((__vector_size__(16)));
5562
) \
5663
) \
5764
)
58-
59-
#define vec_to_float(vec) (wasm_f32x4_extract_lane(vec, 0) + wasm_f32x4_extract_lane(vec, 1) + wasm_f32x4_extract_lane(vec, 2) + wasm_f32x4_extract_lane(vec, 3))
60-
61-
#define sum_covariance(cov, sampleSize) vec_to_float(cov) / ((float) sampleSize - 1)
62-
#define sum_stddev(std, sampleSize) __builtin_sqrt(vec_to_float(std) / ((float) sampleSize - 1))
65+
#define sum_covariance_float4(cov, sampleSize) float4_to_float(cov) / ((float) sampleSize - 1)
66+
#define sum_stddev_float4(std, sampleSize) __builtin_sqrt(float4_to_float(std) / ((float) sampleSize - 1))
6367

6468
#else
6569

6670
typedef float float4;
6771
#define float4_size 1
6872
#define new_float4 0
73+
#define float4_to_float(f) (float) f
6974

70-
#define calc_stddev(std, data, dataMean) std += (data - dataMean) * (data - dataMean)
71-
#define calc_covariance(cov, a, b, aMean, bMean) cov += (a - aMean) * (b - bMean)
72-
73-
#define sum_covariance(cov, sampleSize) cov / ((float) sampleSize - 1)
74-
#define sum_stddev(std, sampleSize) __builtin_sqrt(std / ((float) sampleSize - 1))
75+
#define calc_covariance_float4 calc_covariance
76+
#define calc_stddev_float4 calc_stddev
77+
#define sum_covariance_float4 sum_covariance
78+
#define sum_stddev_float4 sum_stddev
7579

7680
#endif
7781

82+
#define calc_correlation_step_float4(cov, aStd, bStd, aMean, bMean, a, b) \
83+
calc_stddev_float4(aStd, a, aMean); \
84+
calc_stddev_float4(bStd, b, bMean); \
85+
calc_covariance_float4(covariance, a, b, aMean, bMean);
86+
7887
#define calc_correlation_step(cov, aStd, bStd, aMean, bMean, a, b) \
7988
calc_stddev(aStd, a, aMean); \
8089
calc_stddev(bStd, b, bMean); \
8190
calc_covariance(covariance, a, b, aMean, bMean);
8291

83-
8492
float calc_correlation(float *a, float *b, float aMean, float bMean, long sampleSize) {
85-
int loopUnroll = 1*float4_size;
93+
int loopUnroll = 4*float4_size;
8694
float4 covariance = new_float4;
8795
float4 aStd = new_float4;
8896
float4 bStd = new_float4;
8997

98+
int i;
9099
for (
91-
int i = 0;
100+
i = 0;
92101
i < sampleSize - loopUnroll;
93102
i+=loopUnroll
94103
) {
95-
calc_correlation_step(covariance, aStd, bStd, aMean, bMean, a[i], b[i] );
96-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 1 * float4_size], b[i + 1 * float4_size]);
97-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 2 * float4_size], b[i + 2 * float4_size]);
98-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 3 * float4_size], b[i + 3 * float4_size]);
99-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 4 * float4_size], b[i + 4 * float4_size]);
100-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 5 * float4_size], b[i + 5 * float4_size]);
101-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 6 * float4_size], b[i + 6 * float4_size]);
102-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 7 * float4_size], b[i + 7 * float4_size]);
103-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 8 * float4_size], b[i + 8 * float4_size]);
104-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 9 * float4_size], b[i + 9 * float4_size]);
105-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 10 * float4_size], b[i + 10 * float4_size]);
106-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 11 * float4_size], b[i + 11 * float4_size]);
107-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 12 * float4_size], b[i + 12 * float4_size]);
108-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 13 * float4_size], b[i + 13 * float4_size]);
109-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 14 * float4_size], b[i + 14 * float4_size]);
110-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 15 * float4_size], b[i + 15 * float4_size]);
111-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 16 * float4_size], b[i + 16 * float4_size]);
112-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 17 * float4_size], b[i + 17 * float4_size]);
113-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 18 * float4_size], b[i + 18 * float4_size]);
114-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 19 * float4_size], b[i + 19 * float4_size]);
115-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 20 * float4_size], b[i + 20 * float4_size]);
116-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 21 * float4_size], b[i + 21 * float4_size]);
117-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 22 * float4_size], b[i + 22 * float4_size]);
118-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 23 * float4_size], b[i + 23 * float4_size]);
119-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 24 * float4_size], b[i + 24 * float4_size]);
120-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 25 * float4_size], b[i + 25 * float4_size]);
121-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 26 * float4_size], b[i + 26 * float4_size]);
122-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 27 * float4_size], b[i + 27 * float4_size]);
123-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 28 * float4_size], b[i + 28 * float4_size]);
124-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 29 * float4_size], b[i + 29 * float4_size]);
125-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 30 * float4_size], b[i + 30 * float4_size]);
126-
//calc_correlation_part(covariance, aStd, bStd, aMean, bMean, a[i + 31 * float4_size], b[i + 31 * float4_size]);
104+
calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i], b[i] );
105+
calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 1 * float4_size], b[i + 1 * float4_size]);
106+
calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 2 * float4_size], b[i + 2 * float4_size]);
107+
calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 3 * float4_size], b[i + 3 * float4_size]);
108+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 4 * float4_size], b[i + 4 * float4_size]);
109+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 5 * float4_size], b[i + 5 * float4_size]);
110+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 6 * float4_size], b[i + 6 * float4_size]);
111+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 7 * float4_size], b[i + 7 * float4_size]);
112+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 8 * float4_size], b[i + 8 * float4_size]);
113+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 9 * float4_size], b[i + 9 * float4_size]);
114+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 10 * float4_size], b[i + 10 * float4_size]);
115+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 11 * float4_size], b[i + 11 * float4_size]);
116+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 12 * float4_size], b[i + 12 * float4_size]);
117+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 13 * float4_size], b[i + 13 * float4_size]);
118+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 14 * float4_size], b[i + 14 * float4_size]);
119+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 15 * float4_size], b[i + 15 * float4_size]);
120+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 16 * float4_size], b[i + 16 * float4_size]);
121+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 17 * float4_size], b[i + 17 * float4_size]);
122+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 18 * float4_size], b[i + 18 * float4_size]);
123+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 19 * float4_size], b[i + 19 * float4_size]);
124+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 20 * float4_size], b[i + 20 * float4_size]);
125+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 21 * float4_size], b[i + 21 * float4_size]);
126+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 22 * float4_size], b[i + 22 * float4_size]);
127+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 23 * float4_size], b[i + 23 * float4_size]);
128+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 24 * float4_size], b[i + 24 * float4_size]);
129+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 25 * float4_size], b[i + 25 * float4_size]);
130+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 26 * float4_size], b[i + 26 * float4_size]);
131+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 27 * float4_size], b[i + 27 * float4_size]);
132+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 28 * float4_size], b[i + 28 * float4_size]);
133+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 29 * float4_size], b[i + 29 * float4_size]);
134+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 30 * float4_size], b[i + 30 * float4_size]);
135+
//calc_correlation_step_float4(covariance, aStd, bStd, aMean, bMean, a[i + 31 * float4_size], b[i + 31 * float4_size]);
136+
}
137+
138+
// calculate any remaining data
139+
float covarianceRemaining = float4_to_float(covariance);
140+
float aStdFloat = float4_to_float(aStd);
141+
float bStdFloat = float4_to_float(bStd);
142+
143+
for (; i < sampleSize; i++) {
144+
calc_correlation_step(covarianceRemaining, aStdFloat, bStdFloat, aMean, bMean, a[i], b[i]);
127145
}
128146

129-
return sum_covariance(covariance, sampleSize) / (sum_stddev(aStd, sampleSize) * sum_stddev(bStd, sampleSize));
147+
return sum_covariance(covarianceRemaining, sampleSize) / (sum_stddev(aStdFloat, sampleSize) * sum_stddev(bStdFloat, sampleSize));;
130148
}
131149

132150
void sum_channels(float *data, long samples, int channels) {
@@ -198,28 +216,30 @@ void correlate(
198216
}
199217
}
200218

201-
// narrow down exact correlation from previous results
202-
aOffsetStart = max(*bestSampleOffset - initialGranularity * 2, 0);
203-
aOffsetEnd = min(*bestSampleOffset + initialGranularity * 2, aSamples - sampleSize);
204-
205-
aSum = sum_for_mean(a, aOffsetStart, aOffsetStart + sampleSize);
206-
207-
for (long aOffset = aOffsetStart; aOffset < aOffsetEnd; aOffset++) {
208-
float aMean = aSum / sampleSize;
209-
// shift mean sum up one element
210-
aSum -= (double) a[aOffset];
211-
aSum += (double) a[aOffset + sampleSize];
212-
213-
correlation = calc_correlation(a + aOffset, b, aMean, bMean, sampleSize);
214-
215-
if (*bestCorrelation < correlation) {
216-
bestAMean = aMean;
217-
*bestCorrelation = correlation;
218-
*bestSampleOffset = aOffset;
219+
if (initialGranularity > 1) {
220+
// narrow down exact correlation from previous results
221+
aOffsetStart = max(*bestSampleOffset - initialGranularity * initialGranularity, 0);
222+
aOffsetEnd = min(*bestSampleOffset + initialGranularity * initialGranularity, aSamples - sampleSize);
223+
224+
aSum = sum_for_mean(a, aOffsetStart, aOffsetStart + sampleSize);
225+
226+
for (long aOffset = aOffsetStart; aOffset < aOffsetEnd; aOffset++) {
227+
float aMean = aSum / sampleSize;
228+
// shift mean sum up one element
229+
aSum -= (double) a[aOffset];
230+
aSum += (double) a[aOffset + sampleSize];
231+
232+
correlation = calc_correlation(a + aOffset, b, aMean, bMean, sampleSize);
233+
234+
if (*bestCorrelation < correlation) {
235+
bestAMean = aMean;
236+
*bestCorrelation = correlation;
237+
*bestSampleOffset = aOffset;
238+
}
219239
}
240+
241+
long bOffsetStart = 0;
242+
long bOffsetEnd = sampleSize;
243+
float bMeanLength = bOffsetEnd;
220244
}
221-
222-
long bOffsetStart = 0;
223-
long bOffsetEnd = sampleSize;
224-
float bMeanLength = bOffsetEnd;
225245
}

0 commit comments

Comments
 (0)