Skip to content
This repository was archived by the owner on May 10, 2023. It is now read-only.

Commit e09eb64

Browse files
committed
yFFT: Fix SEGV caused by the alignment issue of lookup tables
Tested with: rustc 1.32.0-nightly (25a42b2ce 2018-11-07) jemalloc was removed from the standard library recently: <rust-lang/rust#55238> As a result, we started seeing cases where the alignment requirements enforced on allocated regions are weaker than before. Some code ceased working because it erroneously relied on jemalloc's larger alignment values. This commit fixes this issue by introducing `AlignedVec`, a wrapper of `Vec`, which provides access to a portion of `Vec` that meets an alignment requirement required by SIMD operations, and by modifying code in question to use `AlignedVec` in place of `Vec`.
1 parent 01c9e6c commit e09eb64

File tree

5 files changed

+84
-6
lines changed

5 files changed

+84
-6
lines changed

src/aligned.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
//
2+
// Copyright 2018 yvt, all rights reserved.
3+
//
4+
// This source code is a part of Nightingales.
5+
//
6+
use std::{
7+
fmt,
8+
mem::size_of,
9+
ops::{Deref, DerefMut},
10+
};
11+
12+
/// The alignment value guaranteed by `AlignedVec`.
13+
const ALIGN: usize = 32;
14+
15+
fn ptr_lsbs(x: usize) -> usize {
16+
x & (ALIGN - 1)
17+
}
18+
19+
/// Provides a subset of `Vec`'s interface while providing a minimum alignment
20+
/// guarantee that is convenient for SIMD operations.
21+
pub struct AlignedVec<T> {
22+
storage: Vec<T>,
23+
offset: usize,
24+
}
25+
26+
impl<T: Copy + Default> AlignedVec<T> {
27+
pub fn with_capacity(i: usize) -> Self {
28+
debug_assert!(size_of::<T>() <= ALIGN);
29+
debug_assert!(ALIGN % size_of::<T>() == 0);
30+
31+
let mut storage: Vec<T> = Vec::with_capacity(i + ALIGN / size_of::<T>() - 1);
32+
let mut offset = 0;
33+
34+
// Increase the padding until the storage is aligned
35+
while ptr_lsbs(storage.as_ptr().wrapping_add(offset) as _) != 0 {
36+
storage.push(T::default());
37+
offset += 1;
38+
39+
debug_assert!(offset < ALIGN / size_of::<T>());
40+
}
41+
42+
Self { storage, offset }
43+
}
44+
45+
pub fn push(&mut self, x: T) {
46+
if self.storage.len() >= self.storage.capacity() {
47+
panic!("collection is full");
48+
}
49+
self.storage.push(x);
50+
}
51+
}
52+
53+
impl<T: fmt::Debug> fmt::Debug for AlignedVec<T> {
54+
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
55+
fmt.debug_struct("AlignedVec")
56+
.field("offset", &self.offset)
57+
.field("entries", &&self[..])
58+
.finish()
59+
}
60+
}
61+
62+
impl<T> Deref for AlignedVec<T> {
63+
type Target = [T];
64+
65+
fn deref(&self) -> &Self::Target {
66+
&self.storage[self.offset..]
67+
}
68+
}
69+
70+
impl<T> DerefMut for AlignedVec<T> {
71+
fn deref_mut(&mut self) -> &mut Self::Target {
72+
&mut self.storage[self.offset..]
73+
}
74+
}

src/kernel/x86/x86avxf32realfft.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::f32;
1212
use std::mem;
1313
use std::ptr::{read_unaligned, write_unaligned};
1414

15+
use aligned::AlignedVec;
1516
use simdutils::{avx_f32x8_bitxor, avx_f32x8_complex_mul_riri};
1617
use Num;
1718

@@ -39,7 +40,7 @@ where
3940
#[derive(Debug)]
4041
struct AvxF32RealFFTPrePostProcessKernel {
4142
len: usize,
42-
table: [Vec<f32>; 2],
43+
table: [AlignedVec<f32>; 2],
4344
inverse: bool,
4445
}
4546

src/kernel/x86/x86sse1realfft.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::f32;
1212
use std::mem;
1313
use std::ptr::{read_unaligned, write_unaligned};
1414

15+
use aligned::AlignedVec;
1516
use simdutils::{f32x4_bitxor, f32x4_complex_mul_rrii};
1617
use {mul_pos_i, Complex, Num};
1718

@@ -34,10 +35,10 @@ where
3435
})
3536
}
3637

37-
pub(super) fn new_real_fft_coef_table(len: usize, inverse: bool) -> [Vec<f32>; 2] {
38+
pub(super) fn new_real_fft_coef_table(len: usize, inverse: bool) -> [AlignedVec<f32>; 2] {
3839
assert!(len % 2 == 0);
39-
let mut table_a = Vec::with_capacity(len);
40-
let mut table_b = Vec::with_capacity(len);
40+
let mut table_a = AlignedVec::with_capacity(len);
41+
let mut table_b = AlignedVec::with_capacity(len);
4142
for i in 0..(len / 2) {
4243
let c = Complex::new(0f32, (i as f32) * -f32::consts::PI / (len / 2) as f32).exp();
4344

@@ -61,7 +62,7 @@ pub(super) fn new_real_fft_coef_table(len: usize, inverse: bool) -> [Vec<f32>; 2
6162
#[derive(Debug)]
6263
struct SseRealFFTPrePostProcessKernel {
6364
len: usize,
64-
table: [Vec<f32>; 2],
65+
table: [AlignedVec<f32>; 2],
6566
inverse: bool,
6667
}
6768

src/kernel/x86/x86sse3f32realfft.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::f32;
1212
use std::mem;
1313
use std::ptr::{read_unaligned, write_unaligned};
1414

15+
use aligned::AlignedVec;
1516
use simdutils::{f32x4_bitxor, sse3_f32x4_complex_mul_riri};
1617
use Num;
1718

@@ -39,7 +40,7 @@ where
3940
#[derive(Debug)]
4041
struct Sse3F32RealFFTPrePostProcessKernel {
4142
len: usize,
42-
table: [Vec<f32>; 2],
43+
table: [AlignedVec<f32>; 2],
4344
inverse: bool,
4445
}
4546

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ use num_complex::Complex;
4242

4343
#[macro_use]
4444
mod simdutils;
45+
mod aligned;
4546
mod env;
4647
mod kernel;
4748
mod setup;

0 commit comments

Comments
 (0)