Skip to content
This repository was archived by the owner on Sep 11, 2024. It is now read-only.

Commit f4a43cd

Browse files
authored
Merge pull request #6436 from matrix-org/travis/voice-messages/large
Use a MediaElementSourceAudioNode to process large audio files
2 parents c4b08d7 + 6b54f83 commit f4a43cd

File tree

1 file changed

+70
-30
lines changed

1 file changed

+70
-30
lines changed

src/voice/Playback.ts

Lines changed: 70 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,10 @@ export class Playback extends EventEmitter implements IDestroyable {
5959
public readonly thumbnailWaveform: number[];
6060

6161
private readonly context: AudioContext;
62-
private source: AudioBufferSourceNode;
62+
private source: AudioBufferSourceNode | MediaElementAudioSourceNode;
6363
private state = PlaybackState.Decoding;
6464
private audioBuf: AudioBuffer;
65+
private element: HTMLAudioElement;
6566
private resampledWaveform: number[];
6667
private waveformObservable = new SimpleObservable<number[]>();
6768
private readonly clock: PlaybackClock;
@@ -129,41 +130,64 @@ export class Playback extends EventEmitter implements IDestroyable {
129130
this.removeAllListeners();
130131
this.clock.destroy();
131132
this.waveformObservable.close();
133+
if (this.element) {
134+
URL.revokeObjectURL(this.element.src);
135+
this.element.remove();
136+
}
132137
}
133138

134139
public async prepare() {
135-
// Safari compat: promise API not supported on this function
136-
this.audioBuf = await new Promise((resolve, reject) => {
137-
this.context.decodeAudioData(this.buf, b => resolve(b), async e => {
138-
try {
139-
// This error handler is largely for Safari as well, which doesn't support Opus/Ogg
140-
// very well.
141-
console.error("Error decoding recording: ", e);
142-
console.warn("Trying to re-encode to WAV instead...");
143-
144-
const wav = await decodeOgg(this.buf);
145-
146-
// noinspection ES6MissingAwait - not needed when using callbacks
147-
this.context.decodeAudioData(wav, b => resolve(b), e => {
148-
console.error("Still failed to decode recording: ", e);
140+
// The point where we use an audio element is fairly arbitrary, though we don't want
141+
// it to be too low. As of writing, voice messages want to show a waveform but audio
142+
// messages do not. Using an audio element means we can't show a waveform preview, so
143+
// we try to target the difference between a voice message file and large audio file.
144+
// Overall, the point of this is to avoid memory-related issues due to storing a massive
145+
// audio buffer in memory, as that can balloon to far greater than the input buffer's
146+
// byte length.
147+
if (this.buf.byteLength > 5 * 1024 * 1024) { // 5mb
148+
console.log("Audio file too large: processing through <audio /> element");
149+
this.element = document.createElement("AUDIO") as HTMLAudioElement;
150+
const prom = new Promise((resolve, reject) => {
151+
this.element.onloadeddata = () => resolve(null);
152+
this.element.onerror = (e) => reject(e);
153+
});
154+
this.element.src = URL.createObjectURL(new Blob([this.buf]));
155+
await prom; // make sure the audio element is ready for us
156+
} else {
157+
// Safari compat: promise API not supported on this function
158+
this.audioBuf = await new Promise((resolve, reject) => {
159+
this.context.decodeAudioData(this.buf, b => resolve(b), async e => {
160+
try {
161+
// This error handler is largely for Safari as well, which doesn't support Opus/Ogg
162+
// very well.
163+
console.error("Error decoding recording: ", e);
164+
console.warn("Trying to re-encode to WAV instead...");
165+
166+
const wav = await decodeOgg(this.buf);
167+
168+
// noinspection ES6MissingAwait - not needed when using callbacks
169+
this.context.decodeAudioData(wav, b => resolve(b), e => {
170+
console.error("Still failed to decode recording: ", e);
171+
reject(e);
172+
});
173+
} catch (e) {
174+
console.error("Caught decoding error:", e);
149175
reject(e);
150-
});
151-
} catch (e) {
152-
console.error("Caught decoding error:", e);
153-
reject(e);
154-
}
176+
}
177+
});
155178
});
156-
});
157179

158-
// Update the waveform to the real waveform once we have channel data to use. We don't
159-
// exactly trust the user-provided waveform to be accurate...
160-
const waveform = Array.from(this.audioBuf.getChannelData(0));
161-
this.resampledWaveform = makePlaybackWaveform(waveform);
180+
// Update the waveform to the real waveform once we have channel data to use. We don't
181+
// exactly trust the user-provided waveform to be accurate...
182+
const waveform = Array.from(this.audioBuf.getChannelData(0));
183+
this.resampledWaveform = makePlaybackWaveform(waveform);
184+
}
185+
162186
this.waveformObservable.update(this.resampledWaveform);
163187

164188
this.emit(PlaybackState.Stopped); // signal that we're not decoding anymore
165189
this.clock.flagLoadTime(); // must happen first because setting the duration fires a clock update
166-
this.clock.durationSeconds = this.audioBuf.duration;
190+
this.clock.durationSeconds = this.element ? this.element.duration : this.audioBuf.duration;
167191
}
168192

169193
private onPlaybackEnd = async () => {
@@ -176,7 +200,11 @@ export class Playback extends EventEmitter implements IDestroyable {
176200
if (this.state === PlaybackState.Stopped) {
177201
this.disconnectSource();
178202
this.makeNewSourceBuffer();
179-
this.source.start();
203+
if (this.element) {
204+
await this.element.play();
205+
} else {
206+
(this.source as AudioBufferSourceNode).start();
207+
}
180208
}
181209

182210
// We use the context suspend/resume functions because it allows us to pause a source
@@ -187,13 +215,21 @@ export class Playback extends EventEmitter implements IDestroyable {
187215
}
188216

189217
private disconnectSource() {
218+
if (this.element) return; // leave connected, we can (and must) re-use it
190219
this.source?.disconnect();
191220
this.source?.removeEventListener("ended", this.onPlaybackEnd);
192221
}
193222

194223
private makeNewSourceBuffer() {
195-
this.source = this.context.createBufferSource();
196-
this.source.buffer = this.audioBuf;
224+
if (this.element && this.source) return; // leave connected, we can (and must) re-use it
225+
226+
if (this.element) {
227+
this.source = this.context.createMediaElementSource(this.element);
228+
} else {
229+
this.source = this.context.createBufferSource();
230+
this.source.buffer = this.audioBuf;
231+
}
232+
197233
this.source.addEventListener("ended", this.onPlaybackEnd);
198234
this.source.connect(this.context.destination);
199235
}
@@ -246,7 +282,11 @@ export class Playback extends EventEmitter implements IDestroyable {
246282
// when it comes time to the user hitting play. After a couple jumps, the user
247283
// will have desynced the clock enough to be about 10-15 seconds off, while this
248284
// keeps it as close to perfect as humans can perceive.
249-
this.source.start(now, timeSeconds);
285+
if (this.element) {
286+
this.element.currentTime = timeSeconds;
287+
} else {
288+
(this.source as AudioBufferSourceNode).start(now, timeSeconds);
289+
}
250290

251291
// Dev note: it's critical that the code gap between `this.source.start()` and
252292
// `this.pause()` is as small as possible: we do not want to delay *anything*

0 commit comments

Comments
 (0)