Skip to content

Commit 8c19ae9

Browse files
authored
Merge pull request #416 from Kirk-Fox/lookup
Add `FromLinear` and `IntoLinear` lookup table creation to build script
2 parents f509ebd + 544e40f commit 8c19ae9

File tree

14 files changed

+70251
-901
lines changed

14 files changed

+70251
-901
lines changed

codegen/src/lut.rs

+266
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
use anyhow::Result;
2+
use proc_macro2::{Ident, TokenStream};
3+
use quote::{format_ident, quote};
4+
5+
use crate::{codegen_file::CodegenFile, lut::model::LinearModel};
6+
7+
mod model;
8+
9+
pub fn generate() -> Result<()> {
10+
let mut file = CodegenFile::create("palette/src/encoding/lut/codegen.rs")?;
11+
12+
let transfer_fn_u8 = vec![
13+
LutEntryU8::new(
14+
"srgb",
15+
"SRGB",
16+
TransferFn::new_with_linear(12.92, 0.0031308, 2.4),
17+
),
18+
LutEntryU8::new(
19+
"rec_standards",
20+
"REC_OETF",
21+
TransferFn::new_with_linear(4.5, 0.018053968510807, 1.0 / 0.45),
22+
),
23+
LutEntryU8::new(
24+
"adobe",
25+
"ADOBE_RGB",
26+
TransferFn::new_pure_gamma(563.0 / 256.0),
27+
),
28+
LutEntryU8::new("p3", "P3_GAMMA", TransferFn::new_pure_gamma(2.6)),
29+
];
30+
31+
let transfer_fn_u16 = vec![LutEntryU16::new(
32+
"prophoto",
33+
"PROPHOTO_RGB",
34+
TransferFn::new_with_linear(16.0, 0.001953125, 1.8),
35+
)];
36+
37+
for LutEntryU8 {
38+
module,
39+
fn_type_uppercase,
40+
transfer_fn,
41+
} in transfer_fn_u8
42+
{
43+
let u8_to_float = build_u8_to_float_lut(&fn_type_uppercase, &transfer_fn);
44+
let float_to_u8 = build_float_to_u8_lut(&fn_type_uppercase, &transfer_fn);
45+
46+
file.append(quote! {
47+
pub mod #module {
48+
#u8_to_float
49+
50+
#float_to_u8
51+
}
52+
})?;
53+
}
54+
55+
for LutEntryU16 {
56+
module,
57+
fn_type_uppercase,
58+
transfer_fn,
59+
} in transfer_fn_u16
60+
{
61+
let u16_to_float = build_u16_to_float_lut(&fn_type_uppercase, &transfer_fn);
62+
let float_to_u8 = build_float_to_u16_lut(&fn_type_uppercase, &transfer_fn);
63+
64+
file.append(quote! {
65+
#[cfg(feature = "gamma_lut_u16")]
66+
pub mod #module {
67+
#u16_to_float
68+
69+
#float_to_u8
70+
}
71+
})?;
72+
}
73+
74+
Ok(())
75+
}
76+
77+
/// This struct is able to model a given transfer function.
78+
///
79+
/// Any transfer function will have a linear part (optional) for input values
80+
/// less than some value `beta` and an exponential part determined by the function's
81+
/// `gamma` value. For transfer functions with a linear part, `alpha` is chosen to
82+
/// preserve function continuity.
83+
struct TransferFn {
84+
into_linear: Box<dyn Fn(f64) -> f64>,
85+
linear_scale: Option<f64>,
86+
alpha: f64,
87+
beta: f64,
88+
gamma: f64,
89+
}
90+
91+
impl TransferFn {
92+
fn new_with_linear(linear_scale: f64, linear_end: f64, gamma: f64) -> Self {
93+
let alpha = (linear_scale * linear_end - 1.0) / (linear_end.powf(gamma.recip()) - 1.0);
94+
let beta = linear_end;
95+
Self {
96+
into_linear: Box::new(move |encoded| {
97+
if encoded <= linear_scale * beta {
98+
encoded / linear_scale
99+
} else {
100+
((encoded + alpha - 1.0) / alpha).powf(gamma)
101+
}
102+
}),
103+
linear_scale: Some(linear_scale),
104+
alpha,
105+
beta,
106+
gamma,
107+
}
108+
}
109+
110+
fn new_pure_gamma(gamma: f64) -> Self {
111+
Self {
112+
into_linear: Box::new(move |encoded| encoded.powf(gamma)),
113+
linear_scale: None,
114+
alpha: 1.0,
115+
beta: 0.0,
116+
gamma,
117+
}
118+
}
119+
}
120+
121+
struct LutEntryU8 {
122+
module: Ident,
123+
fn_type_uppercase: String,
124+
transfer_fn: TransferFn,
125+
}
126+
127+
struct LutEntryU16 {
128+
module: Ident,
129+
fn_type_uppercase: String,
130+
transfer_fn: TransferFn,
131+
}
132+
133+
impl LutEntryU8 {
134+
fn new(module: &str, fn_type_uppercase: &str, transfer_fn: TransferFn) -> Self {
135+
Self {
136+
module: format_ident!("{module}"),
137+
fn_type_uppercase: fn_type_uppercase.to_owned(),
138+
transfer_fn,
139+
}
140+
}
141+
}
142+
143+
impl LutEntryU16 {
144+
fn new(module: &str, fn_type_uppercase: &str, transfer_fn: TransferFn) -> Self {
145+
Self {
146+
module: format_ident!("{module}"),
147+
fn_type_uppercase: fn_type_uppercase.to_owned(),
148+
transfer_fn,
149+
}
150+
}
151+
}
152+
153+
fn build_u8_to_float_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
154+
let table = (0..=u8::MAX).map(|i| (transfer_fn.into_linear)((i as f64) / 255.0));
155+
let table_ident = format_ident!("{fn_type_uppercase}_U8_TO_F64");
156+
let table_f32 = table.clone().map(|f| f as f32);
157+
let table_f32_ident = format_ident!("{fn_type_uppercase}_U8_TO_F32");
158+
quote! {
159+
pub const #table_ident: [f64; 256] = [
160+
#(#table),*
161+
];
162+
163+
pub const #table_f32_ident: [f32; 256] = [
164+
#(#table_f32),*
165+
];
166+
}
167+
}
168+
169+
fn build_u16_to_float_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
170+
let table = (0..=u16::MAX).map(|i| (transfer_fn.into_linear)((i as f64) / 65535.0));
171+
let table_ident = format_ident!("{fn_type_uppercase}_U16_TO_F64");
172+
quote! {
173+
pub static #table_ident: [f64; 65536] = [
174+
#(#table),*
175+
];
176+
}
177+
}
178+
179+
/// This algorithm is an adaptation of [this C++ code](<https://gist.github.com/rygorous/2203834>)
180+
/// by Fabian "ryg" Giesen, which utilizes simple linear regression on
181+
/// sub-intervals of the transfer function's domain and stores the resulting
182+
/// models' scales and biases into a lookup table.
183+
///
184+
/// The algorithm linked above calculates the transfer function for every
185+
/// potential `f32` input and feeds that into the regression model. In
186+
/// contrast, this algorithm replaces the discrete sums in the model with
187+
/// continuous integrals in order to reduce the time it takes to generate
188+
/// the tables. We are able to do this since transfer functions follow a
189+
/// predictable pattern for which the anti-derivative is known.
190+
fn build_float_to_u8_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
191+
// 1.0 - f32::EPSILON
192+
const MAX_FLOAT_BITS: u32 = 0x3f7fffff;
193+
// The number of mantissa bits used to index into the lookup table
194+
const MAN_INDEX_WIDTH: u32 = 3;
195+
// The number of bits in the remainder of the mantissa
196+
const BUCKET_INDEX_WIDTH: u32 = 20;
197+
const BUCKET_SIZE: u32 = 1 << BUCKET_INDEX_WIDTH;
198+
// Any input less than or equal to this maps to 0
199+
let min_float_bits =
200+
(((transfer_fn.into_linear)(0.5 / 255.0) as f32).to_bits() - 1) & 0xff800000;
201+
202+
let exp_table_size = ((MAX_FLOAT_BITS - min_float_bits) >> 23) + 1;
203+
let table_size = exp_table_size << MAN_INDEX_WIDTH;
204+
205+
let table = (0..table_size).map(|i| {
206+
let start = min_float_bits + (i << BUCKET_INDEX_WIDTH);
207+
let end = start + BUCKET_SIZE;
208+
209+
LinearModel::new(transfer_fn, start, end, MAN_INDEX_WIDTH, 8).into_u8_lookup()
210+
});
211+
212+
let table_ident = format_ident!("TO_{fn_type_uppercase}_U8");
213+
let table_size_usize = table_size as usize;
214+
215+
let float_const_ident = format_ident!("{fn_type_uppercase}_MIN_FLOAT");
216+
quote! {
217+
pub const #float_const_ident: u32 = #min_float_bits;
218+
219+
pub const #table_ident: [u32; #table_size_usize] = [
220+
#(#table),*
221+
];
222+
}
223+
}
224+
225+
fn build_float_to_u16_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream {
226+
// 1.0 - f32::EPSILON
227+
const MAX_FLOAT_BITS: u32 = 0x3f7fffff;
228+
// The number of mantissa bits used to index into the lookup table
229+
const MAN_INDEX_WIDTH: u32 = 7;
230+
// The number of bits in the remainder of the mantissa
231+
const BUCKET_INDEX_WIDTH: i32 = 16;
232+
const BUCKET_SIZE: u32 = 1 << BUCKET_INDEX_WIDTH;
233+
let TransferFn {
234+
into_linear,
235+
linear_scale,
236+
beta,
237+
..
238+
} = transfer_fn;
239+
let min_float_bits = (*beta as f32)
240+
.to_bits()
241+
.max((into_linear(0.5 / 65535.0) as f32).to_bits() - 1)
242+
& 0xff800000;
243+
let exp_table_size = ((MAX_FLOAT_BITS - min_float_bits) >> 23) + 1;
244+
let table_size = exp_table_size << MAN_INDEX_WIDTH;
245+
let table = (0..table_size).map(|i| {
246+
let start = min_float_bits + (i << BUCKET_INDEX_WIDTH);
247+
let end = start + BUCKET_SIZE;
248+
249+
LinearModel::new(transfer_fn, start, end, MAN_INDEX_WIDTH, 16).into_u16_lookup()
250+
});
251+
252+
let table_ident = format_ident!("TO_{fn_type_uppercase}_U16");
253+
let table_size_usize = table_size as usize;
254+
let linear_scale = 65535.0 * (linear_scale.unwrap_or_default() as f32);
255+
256+
let float_const_ident = format_ident!("{fn_type_uppercase}_MIN_FLOAT");
257+
let linear_scale_ident = format_ident!("{fn_type_uppercase}_LINEAR_SCALE");
258+
quote! {
259+
pub const #float_const_ident: u32 = #min_float_bits;
260+
pub const #linear_scale_ident: f32 = #linear_scale;
261+
262+
pub const #table_ident: [u64; #table_size_usize] = [
263+
#(#table),*
264+
];
265+
}
266+
}

codegen/src/lut/model.rs

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
use super::TransferFn;
2+
3+
/// This struct contains the scale and bias for a linear
4+
/// regression model of a transfer function on a given interval.
5+
///
6+
/// This model is calculated by using simple linear regression with
7+
/// integration instead of summation.
8+
pub(super) struct LinearModel {
9+
scale: f64,
10+
bias: f64,
11+
}
12+
13+
impl LinearModel {
14+
pub(super) fn new(
15+
transfer_fn: &TransferFn,
16+
start: u32,
17+
end: u32,
18+
man_index_width: u32,
19+
t_width: u32,
20+
) -> Self {
21+
let TransferFn {
22+
linear_scale,
23+
alpha,
24+
beta,
25+
gamma,
26+
..
27+
} = *transfer_fn;
28+
29+
let beta_bits = (beta as f32).to_bits();
30+
// Corresponds to the scale between differentials. Specifically,
31+
// `dx = exp_scale * dt`
32+
let exp_scale = f32::from_bits(((start >> 23) - man_index_width - t_width) << 23) as f64;
33+
let start_x = f32::from_bits(start) as f64;
34+
let end_x = f32::from_bits(end) as f64;
35+
36+
// If the transfer function is purely linear on a given interval,
37+
// integration is unnecessary.
38+
if let Some(linear_scale) = linear_scale {
39+
if end <= beta_bits {
40+
return Self {
41+
scale: linear_scale * exp_scale,
42+
bias: linear_scale * start_x,
43+
};
44+
}
45+
}
46+
47+
let max_t = 2.0f64.powi(t_width as i32);
48+
49+
let (integral_y, integral_ty) = match linear_scale {
50+
Some(linear_scale) if start < beta_bits => {
51+
let beta_t =
52+
(beta_bits << (9 + man_index_width)) as f64 * 2.0f64.powi(t_width as i32 - 32);
53+
let int_linear =
54+
integrate_linear((start_x, beta), (0.0, beta_t), linear_scale, exp_scale);
55+
let int_exponential =
56+
integrate_exponential((beta, end_x), (beta_t, max_t), alpha, gamma, exp_scale);
57+
(
58+
int_linear.0 + int_exponential.0,
59+
int_linear.1 + int_exponential.1,
60+
)
61+
}
62+
_ => integrate_exponential((start_x, end_x), (0.0, max_t), alpha, gamma, exp_scale),
63+
};
64+
let max_t2 = max_t * max_t;
65+
let integral_t = max_t2 * 0.5;
66+
let integral_t2 = max_t2 * max_t / 3.0;
67+
68+
let scale = (max_t * integral_ty - integral_t * integral_y)
69+
/ (max_t * integral_t2 - integral_t * integral_t);
70+
Self {
71+
scale,
72+
bias: (integral_y - scale * integral_t) / max_t,
73+
}
74+
}
75+
76+
pub(super) fn into_u8_lookup(self) -> u32 {
77+
let scale_uint = (255.0 * self.scale * 65536.0 + 0.5) as u32;
78+
let bias_uint = (((255.0 * self.bias + 0.5) * 128.0 + 0.5) as u32) << 9;
79+
(bias_uint << 7) | scale_uint
80+
}
81+
82+
pub(super) fn into_u16_lookup(self) -> u64 {
83+
let scale_uint = (65535.0 * self.scale * 4294967296.0 + 0.5) as u64;
84+
let bias_uint = (((65535.0 * self.bias + 0.5) * 32768.0 + 0.5) as u64) << 17;
85+
(bias_uint << 15) | scale_uint
86+
}
87+
}
88+
89+
fn integrate_linear(
90+
(start_x, end_x): (f64, f64),
91+
(start_t, end_t): (f64, f64),
92+
linear_scale: f64,
93+
exp_scale: f64,
94+
) -> (f64, f64) {
95+
let antiderive_y = |x: f64| 0.5 * linear_scale * x * x / exp_scale;
96+
let antiderive_ty =
97+
|x: f64, t: f64| 0.5 * linear_scale * x * x * (t - x / (3.0 * exp_scale)) / exp_scale;
98+
99+
(
100+
antiderive_y(end_x) - antiderive_y(start_x),
101+
antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t),
102+
)
103+
}
104+
105+
fn integrate_exponential(
106+
(start_x, end_x): (f64, f64),
107+
(start_t, end_t): (f64, f64),
108+
alpha: f64,
109+
gamma: f64,
110+
exp_scale: f64,
111+
) -> (f64, f64) {
112+
let one_plus_gamma_inv = 1.0 + gamma.recip();
113+
let antiderive_y = |x: f64, t: f64| {
114+
alpha * gamma * x.powf(one_plus_gamma_inv) / (exp_scale * (1.0 + gamma)) + (1.0 - alpha) * t
115+
};
116+
let antiderive_ty = |x: f64, t: f64| {
117+
alpha
118+
* gamma
119+
* x.powf(one_plus_gamma_inv)
120+
* (t - gamma * x / (exp_scale * (1.0 + 2.0 * gamma)))
121+
/ (exp_scale * (1.0 + gamma))
122+
+ 0.5 * (1.0 - alpha) * t * t
123+
};
124+
125+
(
126+
antiderive_y(end_x, end_t) - antiderive_y(start_x, start_t),
127+
antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t),
128+
)
129+
}

0 commit comments

Comments
 (0)