Skip to content

Commit 0c3a47e

Browse files
committed
Convert drfti1 to a safe version
I am still not satisfied with this, but at least it can be a decent starting point for improvements. My points are: 1. Variable names are still unintelligible, mainly because I do not have a clear vision of the algorithm. Maybe someone better than me can be able to grasp the meaning of each variable and to git them a decent name. 2. The first loop is still unidiomatic. Everything I thought involves an additional check to exit the outer loop, and at the current state (no benchmarks) I am not sure about the best approch. Nevertheless, cast checks that were not present in the original code should be helpful to find strange edge cases, and they should have a negligible cost due to the cpu branch predictor.
1 parent 2f044e0 commit 0c3a47e

File tree

1 file changed

+64
-95
lines changed

1 file changed

+64
-95
lines changed

fft/src/smallft.rs

Lines changed: 64 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
unused_mut
99
)]
1010

11-
use std::os::raw::{c_double, c_float, c_int};
11+
use std::{
12+
convert::TryInto,
13+
os::raw::{c_double, c_float, c_int},
14+
};
1215

1316
/* *******************************************************************
1417
* *
@@ -82,112 +85,78 @@ last mod: $Id: smallft.c,v 1.19 2003/10/08 05:12:37 jm Exp $
8285
* it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
8386
* FORTRAN version
8487
*/
85-
unsafe extern "C" fn drfti1(wa: &mut [f32], ifac: &mut [i32]) {
86-
static mut ntryh: [c_int; 4] =
87-
[4 as c_int, 2 as c_int, 3 as c_int, 5 as c_int];
88-
static mut tpi: c_float = 6.28318530717958648f32;
88+
extern "C" fn drfti1(wa: &mut [f32], ifac: &mut [i32]) {
89+
const NTRYH: [i32; 4] = [4, 2, 3, 5];
90+
const TPI: f32 = 6.283_185_5;
8991

90-
let mut n = wa.len() as i32;
91-
let mut wa = wa.as_mut_ptr();
92-
let mut ifac = ifac.as_mut_ptr();
92+
let n = wa.len().try_into().unwrap();
93+
let ifac: &mut [i32; 32] = ifac.try_into().unwrap();
9394

94-
let mut arg: c_float = 0.;
95-
let mut argh: c_float = 0.;
96-
let mut argld: c_float = 0.;
97-
let mut fi: c_float = 0.;
98-
let mut ntry: c_int = 0 as c_int;
99-
let mut i: c_int = 0;
100-
let mut j: c_int = -(1 as c_int);
101-
let mut k1: c_int = 0;
102-
let mut l1: c_int = 0;
103-
let mut l2: c_int = 0;
104-
let mut ib: c_int = 0;
105-
let mut ld: c_int = 0;
106-
let mut ii: c_int = 0;
107-
let mut ip: c_int = 0;
108-
let mut is: c_int = 0;
109-
let mut nq: c_int = 0;
110-
let mut nr: c_int = 0;
111-
let mut ido: c_int = 0;
112-
let mut ipm: c_int = 0;
113-
let mut nfm1: c_int = 0;
114-
let mut nl: c_int = n;
115-
let mut nf: c_int = 0 as c_int;
116-
'c_10244: loop {
117-
j += 1;
118-
if j < 4 as c_int {
119-
ntry = ntryh[j as usize]
120-
} else {
121-
ntry += 2 as c_int
122-
}
95+
let mut nf = 0;
96+
let mut nl = n;
97+
98+
'out: for ntry in NTRYH
99+
.iter()
100+
.copied()
101+
.chain(((NTRYH.last().unwrap() + 2)..).step_by(2))
102+
{
123103
loop {
124-
nq = nl / ntry;
125-
nr = nl - ntry * nq;
126-
if nr != 0 as c_int {
104+
let nq = nl / ntry;
105+
let nr = nl - ntry * nq;
106+
if nr != 0 {
127107
break;
128108
}
109+
129110
nf += 1;
130-
*ifac.offset((nf + 1 as c_int) as isize) = ntry;
131-
nl = nq;
132-
if !(ntry != 2 as c_int) {
133-
if !(nf == 1 as c_int) {
134-
i = 1 as c_int;
135-
while i < nf {
136-
ib = nf - i + 1 as c_int;
137-
*ifac.offset((ib + 1 as c_int) as isize) =
138-
*ifac.offset(ib as isize);
139-
i += 1
140-
}
141-
*ifac.offset(2 as c_int as isize) = 2 as c_int
142-
}
111+
ifac[nf + 1] = ntry;
112+
if ntry == 2 && nf != 1 {
113+
ifac[1..nf]
114+
.rchunks_exact_mut(2)
115+
.for_each(|chunk| chunk[1] = chunk[0]);
116+
ifac[2] = 2;
143117
}
144-
if !(nl != 1 as c_int) {
145-
break 'c_10244;
118+
119+
nl = nq;
120+
if nl == 1 {
121+
break 'out;
146122
}
147123
}
148124
}
149-
*ifac.offset(0 as c_int as isize) = n;
150-
*ifac.offset(1 as c_int as isize) = nf;
151-
argh = tpi / n as c_float;
152-
is = 0 as c_int;
153-
nfm1 = nf - 1 as c_int;
154-
l1 = 1 as c_int;
155-
if nfm1 == 0 as c_int {
125+
let nf = nf;
126+
127+
ifac[0] = n;
128+
ifac[1] = nf.try_into().unwrap();
129+
if nf == 0 {
156130
return;
157131
}
158-
k1 = 0 as c_int;
159-
while k1 < nfm1 {
160-
ip = *ifac.offset((k1 + 2 as c_int) as isize);
161-
ld = 0 as c_int;
162-
l2 = l1 * ip;
163-
ido = n / l2;
164-
ipm = ip - 1 as c_int;
165-
j = 0 as c_int;
166-
while j < ipm {
167-
ld += l1;
168-
i = is;
169-
argld = ld as c_float * argh;
170-
fi = 0.0f32;
171-
ii = 2 as c_int;
172-
while ii < ido {
173-
fi += 1.0f32;
174-
arg = fi * argld;
175-
let fresh0 = i;
176-
i = i + 1;
177-
*wa.offset(fresh0 as isize) =
178-
f64::cos(arg as c_double) as c_float;
179-
let fresh1 = i;
180-
i = i + 1;
181-
*wa.offset(fresh1 as isize) =
182-
f64::sin(arg as c_double) as c_float;
183-
ii += 2 as c_int
184-
}
185-
is += ido;
186-
j += 1
187-
}
188-
l1 = l2;
189-
k1 += 1
190-
}
132+
133+
let argh = TPI / wa.len() as f32;
134+
ifac[2..=nf].iter().map(|&ip| ip.try_into().unwrap()).fold(
135+
(0, 1),
136+
|(is, l1), ip: usize| {
137+
let l2 = l1 * ip;
138+
let ido = wa.len() / l2;
139+
let ipm = ip - 1;
140+
141+
wa[is..]
142+
.chunks_exact_mut(ido)
143+
.zip((l1..).step_by(l1).map(|ld| ld as f32 * argh))
144+
.take(ipm)
145+
.for_each(|(chunk, argld)| {
146+
chunk[..ido.saturating_sub(2)].chunks_exact_mut(2).fold(
147+
1f32,
148+
|fi, chunk| {
149+
let arg = (fi * argld) as f64;
150+
chunk[0] = f64::cos(arg) as f32;
151+
chunk[1] = f64::sin(arg) as f32;
152+
fi + 1.
153+
},
154+
);
155+
});
156+
157+
(is + ipm * ido, l2)
158+
},
159+
);
191160
}
192161
unsafe extern "C" fn fdrffti(
193162
n: usize,

0 commit comments

Comments
 (0)