Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Potential performance improvements #16

Merged
merged 18 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,37 @@ fn generate_srgb_lookup() -> [f32; 256] {
table
}

fn main() {
fn write_srgb(f: &mut std::fs::File) {
let table = generate_srgb_lookup();
writeln!(f, "static SRGB_LOOKUP: [f32; 256] = {:?};", table).unwrap();
}

fn write_base83(f: &mut std::fs::File) {
const CHARACTERS: &[u8; 83] =
b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz#$%*+,-.:;=?@[]^_{|}~";
writeln!(f, "const CHARACTERS: [u8; 83] = {:?};", CHARACTERS).unwrap();

let max_plus_one = CHARACTERS.iter().max().unwrap() + 1;
let mut inv_map: [u8; 256] = [max_plus_one; 256];
for (i, &c) in CHARACTERS.iter().enumerate() {
inv_map[c as usize] = i as u8;
}
writeln!(
f,
"const CHARACTERS_INV: [u8; {max_plus_one}] = {:?};",
&inv_map[0..max_plus_one as usize]
)
.unwrap();
writeln!(f, "const CHARACTERS_INV_INVALID: u8 = {};", max_plus_one).unwrap();
}

fn main() {
let out_dir = std::env::var("OUT_DIR").unwrap();
let out_dir = std::path::PathBuf::from(out_dir);

let mut f = std::fs::File::create(out_dir.join("srgb_lookup.rs")).unwrap();
writeln!(f, "static SRGB_LOOKUP: [f32; 256] = {:?};", table).unwrap();
write_srgb(&mut f);

let mut f = std::fs::File::create(out_dir.join("base83_lookup.rs")).unwrap();
write_base83(&mut f);
}
41 changes: 22 additions & 19 deletions src/base83.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,12 @@
use crate::Error;

static CHARACTERS: [u8; 83] = [
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F',
b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V',
b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l',
b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'#', b'$',
b'%', b'*', b'+', b',', b'-', b'.', b':', b';', b'=', b'?', b'@', b'[', b']', b'^', b'_', b'{',
b'|', b'}', b'~',
];

pub fn encode(value: u32, length: u32) -> String {
let mut result = String::new();
include!(concat!(env!("OUT_DIR"), "/base83_lookup.rs"));

pub fn encode_into(value: u32, length: u32, s: &mut String) {
for i in 1..=length {
let digit: u32 = (value / u32::pow(83, length - i)) % 83;
result.push(CHARACTERS[digit as usize] as char);
s.push(CHARACTERS[digit as usize] as char);
}

result
}

pub fn decode(str: &str) -> Result<u64, Error> {
Expand All @@ -28,10 +17,13 @@ pub fn decode(str: &str) -> Result<u64, Error> {
let mut value = 0;

for byte in str.as_bytes() {
let digit: usize = CHARACTERS
.iter()
.position(|r| r == byte)
.ok_or(Error::InvalidBase83(*byte))?;
if *byte as usize >= CHARACTERS_INV.len() {
return Err(Error::InvalidBase83(*byte));
}
let digit = CHARACTERS_INV[*byte as usize];
if digit == CHARACTERS_INV_INVALID {
return Err(Error::InvalidBase83(*byte));
}
value = value * 83 + digit as u64;
}

Expand All @@ -40,7 +32,13 @@ pub fn decode(str: &str) -> Result<u64, Error> {

#[cfg(test)]
mod tests {
use super::{decode, encode};
use super::{decode, encode_into};

fn encode(value: u32, length: u32) -> String {
let mut s = String::new();
encode_into(value, length, &mut s);
s
}

#[test]
fn encode83() {
Expand All @@ -54,6 +52,11 @@ mod tests {
assert_eq!(v, 6869);
}

#[test]
fn decode83_too_large() {
assert!(decode("€").is_err());
}

#[test]
#[should_panic]
fn decode83_too_long() {
Expand Down
6 changes: 3 additions & 3 deletions src/dc.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use super::util::{linear_to_srgb, srgb_to_linear};

pub fn encode(value: [f32; 3]) -> u32 {
let rounded_r = linear_to_srgb(value[0]);
let rounded_g = linear_to_srgb(value[1]);
let rounded_b = linear_to_srgb(value[2]);
let rounded_r = linear_to_srgb(value[0]) as u32;
let rounded_g = linear_to_srgb(value[1]) as u32;
let rounded_b = linear_to_srgb(value[2]) as u32;
(rounded_r << 16) + (rounded_g << 8) + rounded_b
}

Expand Down
116 changes: 87 additions & 29 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
return Err(Error::ComponentsOutOfRange);
}

let mut factors: Vec<[f32; 3]> = Vec::new();
let mut factors: Vec<[f32; 3]> =
Vec::with_capacity(components_x as usize * components_y as usize);

for y in 0..components_y {
for x in 0..components_x {
Expand All @@ -56,39 +57,45 @@
let dc = factors[0];
let ac = &factors[1..];

let mut blurhash = String::new();
let mut blurhash = String::with_capacity(
// 1 byte for size flag
1
// 1 byte for maximum value
+ 1
// 4 bytes for DC
+ 4
// 2 bytes for each AC
+ 2 * ac.len(),
);

let size_flag = (components_x - 1) + (components_y - 1) * 9;
blurhash.push_str(&base83::encode(size_flag, 1));
base83::encode_into(size_flag, 1, &mut blurhash);

let maximum_value: f32;
if !ac.is_empty() {
let mut actualmaximum_value = 0.0;
for i in 0..components_y * components_x - 1 {
actualmaximum_value = f32::max(f32::abs(ac[i as usize][0]), actualmaximum_value);
actualmaximum_value = f32::max(f32::abs(ac[i as usize][1]), actualmaximum_value);
actualmaximum_value = f32::max(f32::abs(ac[i as usize][2]), actualmaximum_value);
}
let actualmaximum_value = ac
.iter()
.flatten()
.map(|x| f32::abs(*x))
.reduce(f32::max)
.unwrap_or(0.0);

let quantised_maximum_value = f32::max(

Check warning on line 83 in src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy

clamp-like pattern without using clamp function
0.,
f32::min(82., f32::floor(actualmaximum_value * 166. - 0.5)),
) as u32;

maximum_value = (quantised_maximum_value + 1) as f32 / 166.;
blurhash.push_str(&base83::encode(quantised_maximum_value, 1));
base83::encode_into(quantised_maximum_value, 1, &mut blurhash);
} else {
maximum_value = 1.;
blurhash.push_str(&base83::encode(0, 1));
base83::encode_into(0, 1, &mut blurhash);
}

blurhash.push_str(&base83::encode(dc::encode(dc), 4));
base83::encode_into(dc::encode(dc), 4, &mut blurhash);

for i in 0..components_y * components_x - 1 {
blurhash.push_str(&base83::encode(
ac::encode(ac[i as usize], maximum_value),
2,
));
base83::encode_into(ac::encode(ac[i as usize], maximum_value), 2, &mut blurhash);
}

Ok(blurhash)
Expand All @@ -111,10 +118,22 @@

let bytes_per_row = width * 4;

let pi_cx_over_width = PI * component_x as f32 / width as f32;
let pi_cy_over_height = PI * component_y as f32 / height as f32;

let mut cos_pi_cx_over_width = vec![0.; width as usize];
for x in 0..width {
cos_pi_cx_over_width[x as usize] = f32::cos(pi_cx_over_width * x as f32);
}

let mut cos_pi_cy_over_height = vec![0.; height as usize];
for y in 0..height {
cos_pi_cy_over_height[y as usize] = f32::cos(pi_cy_over_height * y as f32);
}

for y in 0..height {
for x in 0..width {
let basis = f32::cos(PI * component_x as f32 * x as f32 / width as f32)
* f32::cos(PI * component_y as f32 * y as f32 / height as f32);
let basis = cos_pi_cx_over_width[x as usize] * cos_pi_cy_over_height[y as usize];
r += basis * srgb_to_linear(rgb[(4 * x + y * bytes_per_row) as usize]);
g += basis * srgb_to_linear(rgb[(4 * x + 1 + y * bytes_per_row) as usize]);
b += basis * srgb_to_linear(rgb[(4 * x + 2 + y * bytes_per_row) as usize]);
Expand Down Expand Up @@ -164,17 +183,56 @@
}
}

let bytes_per_row = width * 4;
let colors: Vec<_> = colors.chunks(num_x).collect();

let bytes_per_row = width as usize * 4;

let pi_over_height = PI / height as f32;
let pi_over_width = PI / width as f32;

// Precompute the cosines
let mut cos_i_pi_x_over_width = vec![0.; width as usize * num_x];
let mut cos_j_pi_y_over_height = vec![0.; height as usize * num_y];

for x in 0..width {
let pi_x_over_width = x as f32 * pi_over_width;
for i in 0..num_x {
cos_i_pi_x_over_width[x as usize * num_x + i] = f32::cos(pi_x_over_width * i as f32);
}
}

for y in 0..height {
for x in 0..width {
let pi_y_over_height = y as f32 * pi_over_height;
for j in 0..num_y {
cos_j_pi_y_over_height[y as usize * num_y + j] = f32::cos(j as f32 * pi_y_over_height);
}
}

// Hint to the optimizer that the length of the slices is correct
assert!(height as usize * num_y == cos_j_pi_y_over_height.len());
assert!(width as usize * num_x == cos_i_pi_x_over_width.len());

for y in 0..height as usize {
let pixels = &mut pixels[y * bytes_per_row..][..bytes_per_row];

// More optimizer hints.
assert!(y * num_y + num_y <= cos_j_pi_y_over_height.len());

for x in 0..width as usize {
let mut pixel = [0.; 3];

for j in 0..num_y {
for i in 0..num_x {
let basis = f32::cos((PI * x as f32 * i as f32) / width as f32)
* f32::cos((PI * y as f32 * j as f32) / height as f32);
let color = &colors[i + j * num_x];
let cos_j_pi_y_over_height = &cos_j_pi_y_over_height[y * num_y..][..num_y];
let cos_i_pi_x_over_width = &cos_i_pi_x_over_width[x * num_x..][..num_x];

assert_eq!(cos_j_pi_y_over_height.len(), colors.len());
assert_eq!(cos_j_pi_y_over_height.len(), num_y);

for (cos_j, colors) in cos_j_pi_y_over_height.iter().zip(colors.iter()) {
assert_eq!(cos_i_pi_x_over_width.len(), colors.len());
assert_eq!(cos_i_pi_x_over_width.len(), num_x);

for (cos_i, color) in cos_i_pi_x_over_width.iter().zip(colors.iter()) {
let basis = cos_i * cos_j;

pixel[0] += color[0] * basis;
pixel[1] += color[1] * basis;
Expand All @@ -186,11 +244,11 @@
let int_g = linear_to_srgb(pixel[1]);
let int_b = linear_to_srgb(pixel[2]);

let pixels = &mut pixels[((4 * x + y * bytes_per_row) as usize)..][..4];
let pixels = &mut pixels[4 * x as usize..][..4];

Check warning on line 247 in src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy

casting to the same type is unnecessary (`usize` -> `usize`)

pixels[0] = int_r as u8;
pixels[1] = int_g as u8;
pixels[2] = int_b as u8;
pixels[0] = int_r;
pixels[1] = int_g;
pixels[2] = int_b;
pixels[3] = 255u8;
}
}
Expand Down
19 changes: 7 additions & 12 deletions src/util.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
include!(concat!(env!("OUT_DIR"), "/srgb_lookup.rs"));

/// linear 0.0-1.0 floating point to srgb 0-255 integer conversion.
pub fn linear_to_srgb(value: f32) -> u32 {
pub fn linear_to_srgb(value: f32) -> u8 {
let v = f32::max(0., f32::min(1., value));

Check warning on line 5 in src/util.rs

View workflow job for this annotation

GitHub Actions / clippy

clamp-like pattern without using clamp function
if v <= 0.003_130_8 {
(v * 12.92 * 255. + 0.5).round() as u32
(v * 12.92 * 255. + 0.5).round() as u8
} else {
((1.055 * f32::powf(v, 1. / 2.4) - 0.055) * 255. + 0.5).round() as u32
// The original C implementation uses this formula:
// ((1.055 * f32::powf(v, 1. / 2.4) - 0.055) * 255. + 0.5).round() as u8
// But we can distribute the latter multiplication, to reduce the number of operations:
((1.055 * 255.) * f32::powf(v, 1. / 2.4) - (0.055 * 255. - 0.5)).round() as u8
}
}

Expand All @@ -15,14 +18,6 @@
SRGB_LOOKUP[value as usize]
}

fn sign(n: f32) -> f32 {
if n < 0. {
-1.
} else {
1.
}
}

pub fn sign_pow(val: f32, exp: f32) -> f32 {
sign(val) * f32::powf(val.abs(), exp)
f32::copysign(f32::powf(val.abs(), exp), val)
}
Loading