Skip to content
This repository has been archived by the owner on May 10, 2023. It is now read-only.

Commit

Permalink
yFFT: Fix SEGV caused by the alignment issue of lookup tables
Browse files Browse the repository at this point in the history
Tested with: rustc 1.32.0-nightly (25a42b2ce 2018-11-07)

jemalloc was removed from the standard library recently:
<rust-lang/rust#55238>
As a result, we started seeing cases where the alignment requirements
enforced on allocated regions are weaker than before. Some code ceased
working because it erroneously relied on jemalloc's larger alignment
values.

This commit fixes this issue by introducing `AlignedVec`, a wrapper of
`Vec`, which provides access to a portion of `Vec` that meets an
alignment requirement required by SIMD operations, and by modifying
code in question to use `AlignedVec` in place of `Vec`.
  • Loading branch information
yvt committed Nov 8, 2018
1 parent 01c9e6c commit e09eb64
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 6 deletions.
74 changes: 74 additions & 0 deletions src/aligned.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
//
// Copyright 2018 yvt, all rights reserved.
//
// This source code is a part of Nightingales.
//
use std::{
fmt,
mem::size_of,
ops::{Deref, DerefMut},
};

/// The alignment value guaranteed by `AlignedVec`.
const ALIGN: usize = 32;

fn ptr_lsbs(x: usize) -> usize {
x & (ALIGN - 1)
}

/// Provides a subset of `Vec`'s interface while providing a minimum alignment
/// guarantee that is convenient for SIMD operations.
pub struct AlignedVec<T> {
storage: Vec<T>,
offset: usize,
}

impl<T: Copy + Default> AlignedVec<T> {
pub fn with_capacity(i: usize) -> Self {
debug_assert!(size_of::<T>() <= ALIGN);
debug_assert!(ALIGN % size_of::<T>() == 0);

let mut storage: Vec<T> = Vec::with_capacity(i + ALIGN / size_of::<T>() - 1);
let mut offset = 0;

// Increase the padding until the storage is aligned
while ptr_lsbs(storage.as_ptr().wrapping_add(offset) as _) != 0 {
storage.push(T::default());
offset += 1;

debug_assert!(offset < ALIGN / size_of::<T>());
}

Self { storage, offset }
}

pub fn push(&mut self, x: T) {
if self.storage.len() >= self.storage.capacity() {
panic!("collection is full");
}
self.storage.push(x);
}
}

impl<T: fmt::Debug> fmt::Debug for AlignedVec<T> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_struct("AlignedVec")
.field("offset", &self.offset)
.field("entries", &&self[..])
.finish()
}
}

impl<T> Deref for AlignedVec<T> {
type Target = [T];

fn deref(&self) -> &Self::Target {
&self.storage[self.offset..]
}
}

impl<T> DerefMut for AlignedVec<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.storage[self.offset..]
}
}
3 changes: 2 additions & 1 deletion src/kernel/x86/x86avxf32realfft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::f32;
use std::mem;
use std::ptr::{read_unaligned, write_unaligned};

use aligned::AlignedVec;
use simdutils::{avx_f32x8_bitxor, avx_f32x8_complex_mul_riri};
use Num;

Expand Down Expand Up @@ -39,7 +40,7 @@ where
#[derive(Debug)]
struct AvxF32RealFFTPrePostProcessKernel {
len: usize,
table: [Vec<f32>; 2],
table: [AlignedVec<f32>; 2],
inverse: bool,
}

Expand Down
9 changes: 5 additions & 4 deletions src/kernel/x86/x86sse1realfft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::f32;
use std::mem;
use std::ptr::{read_unaligned, write_unaligned};

use aligned::AlignedVec;
use simdutils::{f32x4_bitxor, f32x4_complex_mul_rrii};
use {mul_pos_i, Complex, Num};

Expand All @@ -34,10 +35,10 @@ where
})
}

pub(super) fn new_real_fft_coef_table(len: usize, inverse: bool) -> [Vec<f32>; 2] {
pub(super) fn new_real_fft_coef_table(len: usize, inverse: bool) -> [AlignedVec<f32>; 2] {
assert!(len % 2 == 0);
let mut table_a = Vec::with_capacity(len);
let mut table_b = Vec::with_capacity(len);
let mut table_a = AlignedVec::with_capacity(len);
let mut table_b = AlignedVec::with_capacity(len);
for i in 0..(len / 2) {
let c = Complex::new(0f32, (i as f32) * -f32::consts::PI / (len / 2) as f32).exp();

Expand All @@ -61,7 +62,7 @@ pub(super) fn new_real_fft_coef_table(len: usize, inverse: bool) -> [Vec<f32>; 2
#[derive(Debug)]
struct SseRealFFTPrePostProcessKernel {
len: usize,
table: [Vec<f32>; 2],
table: [AlignedVec<f32>; 2],
inverse: bool,
}

Expand Down
3 changes: 2 additions & 1 deletion src/kernel/x86/x86sse3f32realfft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::f32;
use std::mem;
use std::ptr::{read_unaligned, write_unaligned};

use aligned::AlignedVec;
use simdutils::{f32x4_bitxor, sse3_f32x4_complex_mul_riri};
use Num;

Expand Down Expand Up @@ -39,7 +40,7 @@ where
#[derive(Debug)]
struct Sse3F32RealFFTPrePostProcessKernel {
len: usize,
table: [Vec<f32>; 2],
table: [AlignedVec<f32>; 2],
inverse: bool,
}

Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use num_complex::Complex;

#[macro_use]
mod simdutils;
mod aligned;
mod env;
mod kernel;
mod setup;
Expand Down

0 comments on commit e09eb64

Please sign in to comment.