From 539a3ed8650b645727d6d10227803da3c0dba918 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Sun, 7 Jul 2024 17:48:16 +0200 Subject: [PATCH 01/18] Preallocate factors vector --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 984b010..1c00f57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,7 +44,8 @@ pub fn encode( return Err(Error::ComponentsOutOfRange); } - let mut factors: Vec<[f32; 3]> = Vec::new(); + let mut factors: Vec<[f32; 3]> = + Vec::with_capacity(components_x as usize * components_y as usize); for y in 0..components_y { for x in 0..components_x { From a2496c768d805efba1e438955da6e7322161ee64 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Sun, 7 Jul 2024 17:54:37 +0200 Subject: [PATCH 02/18] Preallocate whole blurhash string --- src/base83.rs | 16 +++++++++------- src/lib.rs | 24 +++++++++++++++--------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/base83.rs b/src/base83.rs index eb1e756..069970f 100644 --- a/src/base83.rs +++ b/src/base83.rs @@ -9,15 +9,11 @@ static CHARACTERS: [u8; 83] = [ b'|', b'}', b'~', ]; -pub fn encode(value: u32, length: u32) -> String { - let mut result = String::new(); - +pub fn encode_into(value: u32, length: u32, s: &mut String) { for i in 1..=length { let digit: u32 = (value / u32::pow(83, length - i)) % 83; - result.push(CHARACTERS[digit as usize] as char); + s.push(CHARACTERS[digit as usize] as char); } - - result } pub fn decode(str: &str) -> Result { @@ -40,7 +36,13 @@ pub fn decode(str: &str) -> Result { #[cfg(test)] mod tests { - use super::{decode, encode}; + use super::{decode, encode_into}; + + fn encode(value: u32, length: u32) -> String { + let mut s = String::new(); + encode_into(value, length, &mut s); + s + } #[test] fn encode83() { diff --git a/src/lib.rs b/src/lib.rs index 1c00f57..a8cad2f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,10 +57,19 @@ pub fn encode( let dc = factors[0]; let ac = &factors[1..]; - let mut blurhash = String::new(); + let mut blurhash = String::with_capacity( + // 1 byte for size flag + 1 + // 1 byte for maximum value + + 1 + // 4 bytes for DC + + 4 + // 2 bytes for each AC + + 2 * ac.len(), + ); let size_flag = (components_x - 1) + (components_y - 1) * 9; - blurhash.push_str(&base83::encode(size_flag, 1)); + base83::encode_into(size_flag, 1, &mut blurhash); let maximum_value: f32; if !ac.is_empty() { @@ -77,19 +86,16 @@ pub fn encode( ) as u32; maximum_value = (quantised_maximum_value + 1) as f32 / 166.; - blurhash.push_str(&base83::encode(quantised_maximum_value, 1)); + base83::encode_into(quantised_maximum_value, 1, &mut blurhash); } else { maximum_value = 1.; - blurhash.push_str(&base83::encode(0, 1)); + base83::encode_into(0, 1, &mut blurhash); } - blurhash.push_str(&base83::encode(dc::encode(dc), 4)); + base83::encode_into(dc::encode(dc), 4, &mut blurhash); for i in 0..components_y * components_x - 1 { - blurhash.push_str(&base83::encode( - ac::encode(ac[i as usize], maximum_value), - 2, - )); + base83::encode_into(ac::encode(ac[i as usize], maximum_value), 2, &mut blurhash); } Ok(blurhash) From 9f4697a1fb1f950aa084b47aac0d23ca38568e60 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Sun, 7 Jul 2024 18:11:24 +0200 Subject: [PATCH 03/18] build: Move write_srgb in a function --- build.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index e45cae3..4291665 100644 --- a/build.rs +++ b/build.rs @@ -18,11 +18,15 @@ fn generate_srgb_lookup() -> [f32; 256] { table } -fn main() { +fn write_srgb(f: &mut std::fs::File) { let table = generate_srgb_lookup(); + writeln!(f, "static SRGB_LOOKUP: [f32; 256] = {:?};", table).unwrap(); +} +fn main() { let out_dir = std::env::var("OUT_DIR").unwrap(); let out_dir = std::path::PathBuf::from(out_dir); + let mut f = std::fs::File::create(out_dir.join("srgb_lookup.rs")).unwrap(); - writeln!(f, "static SRGB_LOOKUP: [f32; 256] = {:?};", table).unwrap(); + write_srgb(&mut f); } From fdcd7cd3dd6b03a38902c56e9447c3042cff163e Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Sun, 7 Jul 2024 18:14:26 +0200 Subject: [PATCH 04/18] Write base83 characters list through build script --- build.rs | 9 +++++++++ src/base83.rs | 9 +-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/build.rs b/build.rs index 4291665..3e75c80 100644 --- a/build.rs +++ b/build.rs @@ -23,10 +23,19 @@ fn write_srgb(f: &mut std::fs::File) { writeln!(f, "static SRGB_LOOKUP: [f32; 256] = {:?};", table).unwrap(); } +fn write_base83(f: &mut std::fs::File) { + const CHARACTERS: &[u8; 83] = + b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz#$%*+,-.:;=?@[]^_{|}~"; + writeln!(f, "const CHARACTERS: [u8; 83] = {:?};", CHARACTERS).unwrap(); +} + fn main() { let out_dir = std::env::var("OUT_DIR").unwrap(); let out_dir = std::path::PathBuf::from(out_dir); let mut f = std::fs::File::create(out_dir.join("srgb_lookup.rs")).unwrap(); write_srgb(&mut f); + + let mut f = std::fs::File::create(out_dir.join("base83_lookup.rs")).unwrap(); + write_base83(&mut f); } diff --git a/src/base83.rs b/src/base83.rs index 069970f..7fbaf27 100644 --- a/src/base83.rs +++ b/src/base83.rs @@ -1,13 +1,6 @@ use crate::Error; -static CHARACTERS: [u8; 83] = [ - b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F', - b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', - b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', - b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'#', b'$', - b'%', b'*', b'+', b',', b'-', b'.', b':', b';', b'=', b'?', b'@', b'[', b']', b'^', b'_', b'{', - b'|', b'}', b'~', -]; +include!(concat!(env!("OUT_DIR"), "/base83_lookup.rs")); pub fn encode_into(value: u32, length: u32, s: &mut String) { for i in 1..=length { From 71417afb1657d8c55891d87df88a98271cb89cbd Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Sun, 7 Jul 2024 18:30:21 +0200 Subject: [PATCH 05/18] Precompute height/width inverses --- src/lib.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a8cad2f..8b38615 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -173,14 +173,17 @@ pub fn decode_into( let bytes_per_row = width * 4; + let height_inv = 1. / height as f32; + let width_inv = 1. / width as f32; + for y in 0..height { for x in 0..width { let mut pixel = [0.; 3]; for j in 0..num_y { for i in 0..num_x { - let basis = f32::cos((PI * x as f32 * i as f32) / width as f32) - * f32::cos((PI * y as f32 * j as f32) / height as f32); + let basis = f32::cos((PI * x as f32 * i as f32) * width_inv) + * f32::cos(PI * y as f32 * j as f32 * height_inv); let color = &colors[i + j * num_x]; pixel[0] += color[0] * basis; From 5e18d529c016e795e31e8d2560431a5a7ccfc297 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Sun, 7 Jul 2024 18:56:58 +0200 Subject: [PATCH 06/18] Pull pi*x/width and pi*y/height out of hot loop --- src/lib.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8b38615..0e4f0cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -173,17 +173,19 @@ pub fn decode_into( let bytes_per_row = width * 4; - let height_inv = 1. / height as f32; - let width_inv = 1. / width as f32; + let pi_over_height = PI / height as f32; + let pi_over_width = PI / width as f32; for y in 0..height { + let pi_y_over_height = PI * y as f32 * pi_over_height; for x in 0..width { + let pi_x_over_width = PI * x as f32 * pi_over_width; let mut pixel = [0.; 3]; for j in 0..num_y { for i in 0..num_x { - let basis = f32::cos((PI * x as f32 * i as f32) * width_inv) - * f32::cos(PI * y as f32 * j as f32 * height_inv); + let basis = f32::cos(i as f32 * pi_x_over_width) + * f32::cos(j as f32 * pi_y_over_height); let color = &colors[i + j * num_x]; pixel[0] += color[0] * basis; From 7ad8f2e1fd0759fc5ec95741cbed86fb41b73458 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Sun, 7 Jul 2024 19:01:25 +0200 Subject: [PATCH 07/18] Generate base83 inverse character map --- build.rs | 13 +++++++++++++ src/base83.rs | 16 ++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/build.rs b/build.rs index 3e75c80..7601ba3 100644 --- a/build.rs +++ b/build.rs @@ -27,6 +27,19 @@ fn write_base83(f: &mut std::fs::File) { const CHARACTERS: &[u8; 83] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz#$%*+,-.:;=?@[]^_{|}~"; writeln!(f, "const CHARACTERS: [u8; 83] = {:?};", CHARACTERS).unwrap(); + + let max_plus_one = CHARACTERS.iter().max().unwrap() + 1; + let mut inv_map: [u8; 256] = [max_plus_one; 256]; + for (i, &c) in CHARACTERS.iter().enumerate() { + inv_map[c as usize] = i as u8; + } + writeln!( + f, + "const CHARACTERS_INV: [u8; {max_plus_one}] = {:?};", + &inv_map[0..max_plus_one as usize] + ) + .unwrap(); + writeln!(f, "const CHARACTERS_INV_INVALID: u8 = {};", max_plus_one).unwrap(); } fn main() { diff --git a/src/base83.rs b/src/base83.rs index 7fbaf27..1593679 100644 --- a/src/base83.rs +++ b/src/base83.rs @@ -17,10 +17,13 @@ pub fn decode(str: &str) -> Result { let mut value = 0; for byte in str.as_bytes() { - let digit: usize = CHARACTERS - .iter() - .position(|r| r == byte) - .ok_or(Error::InvalidBase83(*byte))?; + if *byte as usize >= CHARACTERS_INV.len() { + return Err(Error::InvalidBase83(*byte)); + } + let digit = CHARACTERS_INV[*byte as usize]; + if digit == CHARACTERS_INV_INVALID { + return Err(Error::InvalidBase83(*byte)); + } value = value * 83 + digit as u64; } @@ -49,6 +52,11 @@ mod tests { assert_eq!(v, 6869); } + #[test] + fn decode83_too_large() { + assert!(decode("€").is_err()); + } + #[test] #[should_panic] fn decode83_too_long() { From 08a04bc680ee3d3cb54b37a74257aa7727d14bbd Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Sun, 7 Jul 2024 19:12:01 +0200 Subject: [PATCH 08/18] Remove a multiplication for sign_pow --- src/util.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/util.rs b/src/util.rs index 6e17108..78bb911 100644 --- a/src/util.rs +++ b/src/util.rs @@ -15,14 +15,11 @@ pub fn srgb_to_linear(value: u8) -> f32 { SRGB_LOOKUP[value as usize] } -fn sign(n: f32) -> f32 { - if n < 0. { - -1. +pub fn sign_pow(val: f32, exp: f32) -> f32 { + let t = f32::powf(val.abs(), exp); + if val < 0. { + -t } else { - 1. + t } } - -pub fn sign_pow(val: f32, exp: f32) -> f32 { - sign(val) * f32::powf(val.abs(), exp) -} From f26590c945ac985c5a54fadd4f6f9e972e3bad62 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Sun, 7 Jul 2024 19:12:33 +0200 Subject: [PATCH 09/18] Reduce return size for linear_to_srgb --- src/dc.rs | 6 +++--- src/lib.rs | 6 +++--- src/util.rs | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/dc.rs b/src/dc.rs index 8681b5c..6414dad 100644 --- a/src/dc.rs +++ b/src/dc.rs @@ -1,9 +1,9 @@ use super::util::{linear_to_srgb, srgb_to_linear}; pub fn encode(value: [f32; 3]) -> u32 { - let rounded_r = linear_to_srgb(value[0]); - let rounded_g = linear_to_srgb(value[1]); - let rounded_b = linear_to_srgb(value[2]); + let rounded_r = linear_to_srgb(value[0]) as u32; + let rounded_g = linear_to_srgb(value[1]) as u32; + let rounded_b = linear_to_srgb(value[2]) as u32; (rounded_r << 16) + (rounded_g << 8) + rounded_b } diff --git a/src/lib.rs b/src/lib.rs index 0e4f0cf..79d563b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -200,9 +200,9 @@ pub fn decode_into( let pixels = &mut pixels[((4 * x + y * bytes_per_row) as usize)..][..4]; - pixels[0] = int_r as u8; - pixels[1] = int_g as u8; - pixels[2] = int_b as u8; + pixels[0] = int_r; + pixels[1] = int_g; + pixels[2] = int_b; pixels[3] = 255u8; } } diff --git a/src/util.rs b/src/util.rs index 78bb911..0fc5647 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,12 +1,12 @@ include!(concat!(env!("OUT_DIR"), "/srgb_lookup.rs")); /// linear 0.0-1.0 floating point to srgb 0-255 integer conversion. -pub fn linear_to_srgb(value: f32) -> u32 { +pub fn linear_to_srgb(value: f32) -> u8 { let v = f32::max(0., f32::min(1., value)); if v <= 0.003_130_8 { - (v * 12.92 * 255. + 0.5).round() as u32 + (v * 12.92 * 255. + 0.5).round() as u8 } else { - ((1.055 * f32::powf(v, 1. / 2.4) - 0.055) * 255. + 0.5).round() as u32 + ((1.055 * f32::powf(v, 1. / 2.4) - 0.055) * 255. + 0.5).round() as u8 } } From c44d6058267ff2461aa8739b50c15aeea8de022a Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Mon, 8 Jul 2024 10:14:26 +0200 Subject: [PATCH 10/18] Precompute cosines outside of hot loop --- src/lib.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 79d563b..e1b7dc1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -176,16 +176,26 @@ pub fn decode_into( let pi_over_height = PI / height as f32; let pi_over_width = PI / width as f32; + let mut pi_x_over_width = vec![0.; width as usize]; + for x in 0..width { + pi_x_over_width[x as usize] = x as f32 * pi_over_width as f32; + } + + let mut cos_i_pi_x_over_width = vec![0.; num_x]; + for y in 0..height { let pi_y_over_height = PI * y as f32 * pi_over_height; for x in 0..width { - let pi_x_over_width = PI * x as f32 * pi_over_width; let mut pixel = [0.; 3]; + for i in 0..num_x { + cos_i_pi_x_over_width[i] = f32::cos(pi_x_over_width[x as usize] * i as f32); + } + for j in 0..num_y { + let cos_j_pi_y_over_height = f32::cos(j as f32 * pi_y_over_height); for i in 0..num_x { - let basis = f32::cos(i as f32 * pi_x_over_width) - * f32::cos(j as f32 * pi_y_over_height); + let basis = cos_i_pi_x_over_width[i] * cos_j_pi_y_over_height; let color = &colors[i + j * num_x]; pixel[0] += color[0] * basis; From 428d553b7d75532b1102abb2e560603f832472b1 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Mon, 8 Jul 2024 10:40:25 +0200 Subject: [PATCH 11/18] Pull pi_x_width/pi_y_height out of hot loop --- src/lib.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e1b7dc1..ce31b07 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,10 +118,13 @@ fn multiply_basis_function( let bytes_per_row = width * 4; + let pi_cx_over_width = PI * component_x as f32 / width as f32; + let pi_cy_over_height = PI * component_y as f32 / height as f32; + for y in 0..height { for x in 0..width { - let basis = f32::cos(PI * component_x as f32 * x as f32 / width as f32) - * f32::cos(PI * component_y as f32 * y as f32 / height as f32); + let basis = + f32::cos(pi_cx_over_width * x as f32) * f32::cos(pi_cy_over_height * y as f32); r += basis * srgb_to_linear(rgb[(4 * x + y * bytes_per_row) as usize]); g += basis * srgb_to_linear(rgb[(4 * x + 1 + y * bytes_per_row) as usize]); b += basis * srgb_to_linear(rgb[(4 * x + 2 + y * bytes_per_row) as usize]); From bd7d2f8e4c7fad535c440374705f357b5b7ccb1c Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Mon, 8 Jul 2024 10:40:36 +0200 Subject: [PATCH 12/18] Use precomputation tables for cosines in decode --- src/lib.rs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ce31b07..00837f4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -179,26 +179,32 @@ pub fn decode_into( let pi_over_height = PI / height as f32; let pi_over_width = PI / width as f32; - let mut pi_x_over_width = vec![0.; width as usize]; + // Precompute the cosines + let mut cos_i_pi_x_over_width = vec![0.; width as usize * num_x]; + let mut cos_j_pi_y_over_height = vec![0.; height as usize * num_y]; + for x in 0..width { - pi_x_over_width[x as usize] = x as f32 * pi_over_width as f32; + let pi_x_over_width = x as f32 * pi_over_width; + for i in 0..num_x { + cos_i_pi_x_over_width[x as usize * num_x + i] = f32::cos(pi_x_over_width * i as f32); + } } - let mut cos_i_pi_x_over_width = vec![0.; num_x]; + for y in 0..height { + let pi_y_over_height = y as f32 * pi_over_height; + for j in 0..num_y { + cos_j_pi_y_over_height[y as usize * num_y + j] = f32::cos(j as f32 * pi_y_over_height); + } + } for y in 0..height { - let pi_y_over_height = PI * y as f32 * pi_over_height; for x in 0..width { let mut pixel = [0.; 3]; - for i in 0..num_x { - cos_i_pi_x_over_width[i] = f32::cos(pi_x_over_width[x as usize] * i as f32); - } - for j in 0..num_y { - let cos_j_pi_y_over_height = f32::cos(j as f32 * pi_y_over_height); for i in 0..num_x { - let basis = cos_i_pi_x_over_width[i] * cos_j_pi_y_over_height; + let basis = cos_i_pi_x_over_width[x as usize * num_x + i] + * cos_j_pi_y_over_height[y as usize * num_y + j]; let color = &colors[i + j * num_x]; pixel[0] += color[0] * basis; From ab0520e1a35685c900aaef3e73816aa0c4825169 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Mon, 8 Jul 2024 11:04:45 +0200 Subject: [PATCH 13/18] Use subslice for accessing cosine table Co-authored-by: Thibaut Vandervelden --- src/lib.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 00837f4..07e106f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -201,11 +201,14 @@ pub fn decode_into( for x in 0..width { let mut pixel = [0.; 3]; + let cos_i_pi_x_over_width = &cos_i_pi_x_over_width[x as usize * num_x..][..num_x]; + let cos_j_pi_y_over_height = &cos_j_pi_y_over_height[y as usize * num_y..][..num_y]; for j in 0..num_y { + let colors = &colors[j * num_x..][..num_x]; + for i in 0..num_x { - let basis = cos_i_pi_x_over_width[x as usize * num_x + i] - * cos_j_pi_y_over_height[y as usize * num_y + j]; - let color = &colors[i + j * num_x]; + let basis = cos_i_pi_x_over_width[i] * cos_j_pi_y_over_height[j]; + let color = &colors[i]; pixel[0] += color[0] * basis; pixel[1] += color[1] * basis; From 8efa5cfe32cdec5efa4a40fe312da6c8dd2ea152 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Mon, 8 Jul 2024 11:12:52 +0200 Subject: [PATCH 14/18] Use f32::copysign instead of manual branched assignment --- src/util.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/util.rs b/src/util.rs index 0fc5647..53fd5c7 100644 --- a/src/util.rs +++ b/src/util.rs @@ -16,10 +16,5 @@ pub fn srgb_to_linear(value: u8) -> f32 { } pub fn sign_pow(val: f32, exp: f32) -> f32 { - let t = f32::powf(val.abs(), exp); - if val < 0. { - -t - } else { - t - } + f32::copysign(f32::powf(val.abs(), exp), val) } From f49f21d249fb5e12965671cf76b5beea6df77c4d Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Mon, 8 Jul 2024 11:18:42 +0200 Subject: [PATCH 15/18] Optimization in linear_to_srgb --- src/util.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/util.rs b/src/util.rs index 53fd5c7..b79e13c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -6,7 +6,10 @@ pub fn linear_to_srgb(value: f32) -> u8 { if v <= 0.003_130_8 { (v * 12.92 * 255. + 0.5).round() as u8 } else { - ((1.055 * f32::powf(v, 1. / 2.4) - 0.055) * 255. + 0.5).round() as u8 + // The original C implementation uses this formula: + // ((1.055 * f32::powf(v, 1. / 2.4) - 0.055) * 255. + 0.5).round() as u8 + // But we can distribute the latter multiplication, to reduce the number of operations: + ((1.055 * 255.) * f32::powf(v, 1. / 2.4) - (0.055 * 255. - 0.5)).round() as u8 } } From 1cca59ddb3cd8836e81b89feffcca0c87f216b5f Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Mon, 8 Jul 2024 11:29:29 +0200 Subject: [PATCH 16/18] Faster computation of maximum --- src/lib.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 07e106f..cf411c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,12 +73,12 @@ pub fn encode( let maximum_value: f32; if !ac.is_empty() { - let mut actualmaximum_value = 0.0; - for i in 0..components_y * components_x - 1 { - actualmaximum_value = f32::max(f32::abs(ac[i as usize][0]), actualmaximum_value); - actualmaximum_value = f32::max(f32::abs(ac[i as usize][1]), actualmaximum_value); - actualmaximum_value = f32::max(f32::abs(ac[i as usize][2]), actualmaximum_value); - } + let actualmaximum_value = ac + .iter() + .flatten() + .map(|x| f32::abs(*x)) + .reduce(f32::max) + .unwrap_or(0.0); let quantised_maximum_value = f32::max( 0., From a89f1316e9b89e4941ebe0c5c9e2560f50ee3e8c Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Mon, 8 Jul 2024 11:34:54 +0200 Subject: [PATCH 17/18] Encode: precompute cosines --- src/lib.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index cf411c9..f7a9efb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -121,10 +121,19 @@ fn multiply_basis_function( let pi_cx_over_width = PI * component_x as f32 / width as f32; let pi_cy_over_height = PI * component_y as f32 / height as f32; + let mut cos_pi_cx_over_width = vec![0.; width as usize]; + for x in 0..width { + cos_pi_cx_over_width[x as usize] = f32::cos(pi_cx_over_width * x as f32); + } + + let mut cos_pi_cy_over_height = vec![0.; height as usize]; + for y in 0..height { + cos_pi_cy_over_height[y as usize] = f32::cos(pi_cy_over_height * y as f32); + } + for y in 0..height { for x in 0..width { - let basis = - f32::cos(pi_cx_over_width * x as f32) * f32::cos(pi_cy_over_height * y as f32); + let basis = cos_pi_cx_over_width[x as usize] * cos_pi_cy_over_height[y as usize]; r += basis * srgb_to_linear(rgb[(4 * x + y * bytes_per_row) as usize]); g += basis * srgb_to_linear(rgb[(4 * x + 1 + y * bytes_per_row) as usize]); b += basis * srgb_to_linear(rgb[(4 * x + 2 + y * bytes_per_row) as usize]); From 73fa250bff1f6b753cc33b2baff84ce7f46e3aa8 Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Mon, 8 Jul 2024 14:10:13 +0200 Subject: [PATCH 18/18] asserts as hints for the optimizer, use zip as iterator --- src/lib.rs | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f7a9efb..5975b0a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -183,7 +183,9 @@ pub fn decode_into( } } - let bytes_per_row = width * 4; + let colors: Vec<_> = colors.chunks(num_x).collect(); + + let bytes_per_row = width as usize * 4; let pi_over_height = PI / height as f32; let pi_over_width = PI / width as f32; @@ -206,18 +208,31 @@ pub fn decode_into( } } - for y in 0..height { - for x in 0..width { + // Hint to the optimizer that the length of the slices is correct + assert!(height as usize * num_y == cos_j_pi_y_over_height.len()); + assert!(width as usize * num_x == cos_i_pi_x_over_width.len()); + + for y in 0..height as usize { + let pixels = &mut pixels[y * bytes_per_row..][..bytes_per_row]; + + // More optimizer hints. + assert!(y * num_y + num_y <= cos_j_pi_y_over_height.len()); + + for x in 0..width as usize { let mut pixel = [0.; 3]; - let cos_i_pi_x_over_width = &cos_i_pi_x_over_width[x as usize * num_x..][..num_x]; - let cos_j_pi_y_over_height = &cos_j_pi_y_over_height[y as usize * num_y..][..num_y]; - for j in 0..num_y { - let colors = &colors[j * num_x..][..num_x]; + let cos_j_pi_y_over_height = &cos_j_pi_y_over_height[y * num_y..][..num_y]; + let cos_i_pi_x_over_width = &cos_i_pi_x_over_width[x * num_x..][..num_x]; + + assert_eq!(cos_j_pi_y_over_height.len(), colors.len()); + assert_eq!(cos_j_pi_y_over_height.len(), num_y); + + for (cos_j, colors) in cos_j_pi_y_over_height.iter().zip(colors.iter()) { + assert_eq!(cos_i_pi_x_over_width.len(), colors.len()); + assert_eq!(cos_i_pi_x_over_width.len(), num_x); - for i in 0..num_x { - let basis = cos_i_pi_x_over_width[i] * cos_j_pi_y_over_height[j]; - let color = &colors[i]; + for (cos_i, color) in cos_i_pi_x_over_width.iter().zip(colors.iter()) { + let basis = cos_i * cos_j; pixel[0] += color[0] * basis; pixel[1] += color[1] * basis; @@ -229,7 +244,7 @@ pub fn decode_into( let int_g = linear_to_srgb(pixel[1]); let int_b = linear_to_srgb(pixel[2]); - let pixels = &mut pixels[((4 * x + y * bytes_per_row) as usize)..][..4]; + let pixels = &mut pixels[4 * x as usize..][..4]; pixels[0] = int_r; pixels[1] = int_g;