From c49c6c40c05fc97ee0109fcffa41a67db35fc78b Mon Sep 17 00:00:00 2001 From: David Michael Barr Date: Sun, 10 May 2020 08:41:43 +0900 Subject: [PATCH] Optimize chroma quantizer offsets for subset3 4:4:4 Numeric analysis indicated 401 +/- 2 in Q12. Gradient for 4:2:2 is estimated by the midpoint with 4:2:0, rounded down. Partial AWCY results on derf-720p-444 indicate luma BD-rates from -30% to -37% and CIEDE2000 BD-rates from -13% to -17%. --- src/rate.rs | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/src/rate.rs b/src/rate.rs index ee4e746531..c665ecdb86 100644 --- a/src/rate.rs +++ b/src/rate.rs @@ -7,6 +7,7 @@ // Media Patent License 1.0 was not distributed with this source code in the // PATENTS file, you can obtain it at www.aomedia.org/license/patent. +use crate::api::color::ChromaSampling; use crate::api::ContextInner; use crate::encoder::TEMPORAL_DELIMITER; use crate::quantize::{ac_q, dc_q, select_ac_qi, select_dc_qi}; @@ -676,10 +677,18 @@ const Q57_SQUARE_EXP_SCALE: f64 = (2.0 * ::std::f64::consts::LN_2) / ((1i64 << 57) as f64); // Daala style log-offset for chroma quantizers -fn chroma_offset(log_target_q: i64) -> (i64, i64) { +// TODO: Optimal offsets for more configurations than just BT.709 +fn chroma_offset( + log_target_q: i64, chroma_sampling: ChromaSampling, +) -> (i64, i64) { let x = log_target_q.max(0); - // Gradient 0.266 optimized for CIEDE2000+PSNR on subset3 - let y = (x >> 2) + (x >> 6); + // Gradient optimized for CIEDE2000+PSNR on subset3 + let y = match chroma_sampling { + ChromaSampling::Cs400 => unimplemented!(), + ChromaSampling::Cs420 => (x >> 2) + (x >> 6), // 0.266 + ChromaSampling::Cs422 => (x >> 3) + (x >> 4) - (x >> 7), // 0.180 + ChromaSampling::Cs444 => (x >> 4) + (x >> 5) + (x >> 8), // 0.098 + }; // blog64(7) - blog64(4); blog64(5) - blog64(4) (0x19D_5D9F_D501_0B37 - y, 0xA4_D3C2_5E68_DC58 - y) } @@ -687,10 +696,11 @@ fn chroma_offset(log_target_q: i64) -> (i64, i64) { impl QuantizerParameters { fn new_from_log_q( log_base_q: i64, log_target_q: i64, bit_depth: usize, + chroma_sampling: ChromaSampling, ) -> QuantizerParameters { let scale = q57(QSCALE + bit_depth as i32 - 8); let quantizer = bexp64(log_target_q + scale); - let (offset_u, offset_v) = chroma_offset(log_target_q); + let (offset_u, offset_v) = chroma_offset(log_target_q, chroma_sampling); let log_target_q_u = log_target_q + offset_u; let log_target_q_v = log_target_q + offset_v; let quantizer_u = bexp64(log_target_q_u + scale); @@ -858,7 +868,7 @@ impl RCState { } pub(crate) fn select_first_pass_qi( - &self, bit_depth: usize, fti: usize, + &self, bit_depth: usize, fti: usize, chroma_sampling: ChromaSampling, ) -> QuantizerParameters { // Adjust the quantizer for the frame type, result is Q57: let log_q = ((self.pass1_log_base_q + (1i64 << 11)) >> 12) @@ -868,6 +878,7 @@ impl RCState { self.pass1_log_base_q, log_q, bit_depth, + chroma_sampling, ) } @@ -885,6 +896,7 @@ impl RCState { // parameterize a "quality" configuration parameter). let base_qi = ctx.config.quantizer; let bit_depth = ctx.config.bit_depth; + let chroma_sampling = ctx.config.chroma_sampling; // We use the AC quantizer as the source quantizer since its quantizer // tables have unique entries, while the DC tables do not. let ac_quantizer = ac_q(base_qi as u8, 0, bit_depth) as i64; @@ -899,7 +911,12 @@ impl RCState { // Adjust the quantizer for the frame type, result is Q57: let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) + DQP_Q57[fti]; - QuantizerParameters::new_from_log_q(log_base_q, log_q, bit_depth) + QuantizerParameters::new_from_log_q( + log_base_q, + log_q, + bit_depth, + chroma_sampling, + ) } else { let mut nframes: [i32; FRAME_NSUBTYPES + 1] = [0; FRAME_NSUBTYPES + 1]; let mut log_scale: [i64; FRAME_NSUBTYPES] = self.log_scale; @@ -909,7 +926,11 @@ impl RCState { match self.twopass_state { // First pass of 2-pass mode: use a fixed base quantizer. PASS_1 => { - return self.select_first_pass_qi(ctx.config.bit_depth, fti); + return self.select_first_pass_qi( + ctx.config.bit_depth, + fti, + ctx.config.chroma_sampling, + ); } // Second pass of 2-pass mode: we know exactly how much of each frame // type there is in the current buffer window, and have estimates for @@ -1064,6 +1085,7 @@ impl RCState { // rate = exp2(log2(scale) - log2(quantizer)*exp) // There's no easy closed form solution, so we bisection searh for it. let bit_depth = ctx.config.bit_depth; + let chroma_sampling = ctx.config.chroma_sampling; // TODO: Proper handling of lossless. let mut log_qlo = blog64(ac_q(self.ac_qi_min, 0, bit_depth) as i64) - q57(QSCALE + bit_depth as i32 - 8); @@ -1173,7 +1195,12 @@ impl RCState { // If that target is unreasonable, oh well; we'll have to drop. } } - QuantizerParameters::new_from_log_q(log_base_q, log_q, bit_depth) + QuantizerParameters::new_from_log_q( + log_base_q, + log_q, + bit_depth, + chroma_sampling, + ) } }