From 934c8d103a5ab568e6685760b6f10fd474e5cb16 Mon Sep 17 00:00:00 2001 From: Stephen Crane Date: Wed, 27 Mar 2024 16:18:51 -0700 Subject: [PATCH] `struct Rav1dFrameContext`: Separate into data and context Separates the frame data into intialized and then rarely updated data (`Rav1dFrameData`) and a few of the fields required to pick tasks before accessing the rest of the frame data to execute the task. --- src/decode.rs | 189 +++++++++++++++----------- src/internal.rs | 44 +++++- src/lib.rs | 64 ++++----- src/obu.rs | 11 +- src/thread_task.rs | 329 ++++++++++++++++++++++++++------------------- 5 files changed, 378 insertions(+), 259 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 0f6b8a0c8..714554521 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -25,6 +25,7 @@ use crate::src::cdf::rav1d_cdf_thread_init_static; use crate::src::cdf::rav1d_cdf_thread_update; use crate::src::cdf::CdfMvComponent; use crate::src::cdf::CdfMvContext; +use crate::src::cdf::CdfThreadContext; use crate::src::ctx::CaseSet; use crate::src::dequant_tables::dav1d_dq_tbl; use crate::src::disjoint_mut::DisjointMut; @@ -63,6 +64,7 @@ use crate::src::internal::Bxy; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dContextTaskType; use crate::src::internal::Rav1dDSPContext; +use crate::src::internal::Rav1dFrameContext; use crate::src::internal::Rav1dFrameData; use crate::src::internal::Rav1dTaskContext; use crate::src::internal::Rav1dTaskContext_scratch_pal; @@ -171,7 +173,6 @@ use std::ffi::c_uint; use std::ffi::c_void; use std::iter; use std::mem; -use std::ptr::addr_of_mut; use std::slice; use std::sync::atomic::AtomicI32; use std::sync::atomic::Ordering; @@ -3648,6 +3649,7 @@ unsafe fn setup_tile( c: &Rav1dContext, ts: &mut Rav1dTileState, f: &Rav1dFrameData, + in_cdf: &CdfThreadContext, data: &[u8], tile_row: usize, tile_col: usize, @@ -3678,7 +3680,7 @@ unsafe fn setup_tile( }; } - rav1d_cdf_thread_copy(&mut ts.cdf, &f.in_cdf); + rav1d_cdf_thread_copy(&mut ts.cdf, in_cdf); ts.last_qidx = frame_hdr.quant.yac; ts.last_delta_lf.fill(0); @@ -3855,7 +3857,7 @@ unsafe fn read_restoration_info( pub(crate) unsafe fn rav1d_decode_tile_sbrow( c: &Rav1dContext, t: &mut Rav1dTaskContext, - f: &mut Rav1dFrameData, + f: &Rav1dFrameData, ) -> Result<(), ()> { let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let root_bl = if seq_hdr.sb128 != 0 { @@ -4071,8 +4073,11 @@ pub(crate) unsafe fn rav1d_decode_tile_sbrow( pub(crate) unsafe fn rav1d_decode_frame_init( c: &Rav1dContext, - f: &mut Rav1dFrameData, + fc: &Rav1dFrameContext, ) -> Rav1dResult { + let mut f = fc.data.try_write().unwrap(); + let f = &mut *f; + // TODO: Fallible allocation f.lf.start_of_tile_row.resize(f.sbh as usize, 0); @@ -4404,12 +4409,14 @@ pub(crate) unsafe fn rav1d_decode_frame_init( pub(crate) unsafe fn rav1d_decode_frame_init_cdf( c: &Rav1dContext, + fc: &Rav1dFrameContext, f: &mut Rav1dFrameData, + in_cdf: &CdfThreadContext, ) -> Rav1dResult { let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); if frame_hdr.refresh_context != 0 { - rav1d_cdf_thread_copy(&mut f.out_cdf.cdf_write(), &f.in_cdf); + rav1d_cdf_thread_copy(&mut f.out_cdf.cdf_write(), in_cdf); } let uses_2pass = c.n_fc > 1; @@ -4424,7 +4431,7 @@ pub(crate) unsafe fn rav1d_decode_frame_init_cdf( // parse individual tiles per tile group let mut tile_row = 0; let mut tile_col = 0; - f.task_thread.update_set.store(false, Ordering::Relaxed); + fc.task_thread.update_set.store(false, Ordering::Relaxed); for tile in &f.tiles { let start = tile.hdr.start.try_into().unwrap(); let end: usize = tile.hdr.end.try_into().unwrap(); @@ -4465,7 +4472,16 @@ pub(crate) unsafe fn rav1d_decode_frame_init_cdf( }; let (cur_data, rest_data) = data.split_at(tile_sz); - setup_tile(c, ts, f, cur_data, tile_row, tile_col, tile_start_off); + setup_tile( + c, + ts, + f, + in_cdf, + cur_data, + tile_row, + tile_col, + tile_start_off, + ); tile_col += 1; if tile_col == cols { @@ -4473,7 +4489,7 @@ pub(crate) unsafe fn rav1d_decode_frame_init_cdf( tile_row += 1; } if j == tiling.update as usize && frame_hdr.refresh_context != 0 { - f.task_thread.update_set.store(true, Ordering::Relaxed); + fc.task_thread.update_set.store(true, Ordering::Relaxed); } data = rest_data; } @@ -4561,11 +4577,13 @@ unsafe fn rav1d_decode_frame_main(c: &Rav1dContext, f: &mut Rav1dFrameData) -> R pub(crate) unsafe fn rav1d_decode_frame_exit( c: &Rav1dContext, - f: &mut Rav1dFrameData, + fc: &Rav1dFrameContext, retval: Rav1dResult, ) { + let task_thread = &fc.task_thread; + let mut f = fc.data.try_write().unwrap(); if f.sr_cur.p.data.is_some() { - f.task_thread.error = AtomicI32::new(0); + task_thread.error.store(0, Ordering::Relaxed); } let cf = f.frame_thread.cf.get_mut(); if c.n_fc > 1 && retval.is_err() && !cf.is_empty() { @@ -4580,7 +4598,7 @@ pub(crate) unsafe fn rav1d_decode_frame_exit( } rav1d_picture_unref_internal(&mut f.cur); rav1d_thread_picture_unref(&mut f.sr_cur); - let _ = mem::take(&mut f.in_cdf); + let _ = mem::take(&mut *fc.in_cdf.try_write().unwrap()); if let Some(frame_hdr) = &f.frame_hdr { if frame_hdr.refresh_context != 0 { if let Some(progress) = f.out_cdf.progress() { @@ -4599,42 +4617,41 @@ pub(crate) unsafe fn rav1d_decode_frame_exit( let _ = mem::take(&mut f.seq_hdr); let _ = mem::take(&mut f.frame_hdr); f.tiles.clear(); - f.task_thread.finished.store(true, Ordering::SeqCst); - *f.task_thread.retval.try_lock().unwrap() = retval; + task_thread.finished.store(true, Ordering::SeqCst); + *task_thread.retval.try_lock().unwrap() = retval; } -pub(crate) unsafe fn rav1d_decode_frame(c: &Rav1dContext, f: &mut Rav1dFrameData) -> Rav1dResult { +pub(crate) unsafe fn rav1d_decode_frame(c: &Rav1dContext, fc: &Rav1dFrameContext) -> Rav1dResult { assert!(c.n_fc == 1); // if.tc.len() > 1 (but n_fc == 1), we could run init/exit in the task // threads also. Not sure it makes a measurable difference. - let mut res = rav1d_decode_frame_init(c, f); + let mut res = rav1d_decode_frame_init(c, fc); + let mut f = fc.data.try_write().unwrap(); if res.is_ok() { - res = rav1d_decode_frame_init_cdf(c, f); + res = rav1d_decode_frame_init_cdf(c, fc, &mut f, &fc.in_cdf()); } // wait until all threads have completed if res.is_ok() { if c.tc.len() > 1 { - res = rav1d_task_create_tile_sbrow(c, f, 0, 1); - let mut task_thread_lock = (*f.task_thread.ttd).delayed_fg.lock().unwrap(); - (*f.task_thread.ttd).cond.notify_one(); + res = rav1d_task_create_tile_sbrow(c, fc, &f, 0, 1); + drop(f); // release the frame data before waiting for the other threads + let mut task_thread_lock = (*fc.task_thread.ttd).delayed_fg.lock().unwrap(); + (*fc.task_thread.ttd).cond.notify_one(); if res.is_ok() { - while f.task_thread.done[0].load(Ordering::Relaxed) == 0 - // TODO(kkysen) Make `.task_counter` an `AtomicI32`, but that requires recursively removing `impl Copy`s. - || (*(addr_of_mut!(f.task_thread.task_counter) as *mut AtomicI32)) - .load(Ordering::SeqCst) - > 0 + while fc.task_thread.done[0].load(Ordering::Relaxed) == 0 + || fc.task_thread.task_counter.load(Ordering::SeqCst) > 0 { - task_thread_lock = f.task_thread.cond.wait(task_thread_lock).unwrap(); + task_thread_lock = fc.task_thread.cond.wait(task_thread_lock).unwrap(); } } drop(task_thread_lock); - res = *f.task_thread.retval.try_lock().unwrap(); + res = *fc.task_thread.retval.try_lock().unwrap(); } else { - res = rav1d_decode_frame_main(c, f); + res = rav1d_decode_frame_main(c, &mut f); let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); if res.is_ok() && frame_hdr.refresh_context != 0 - && f.task_thread.update_set.load(Ordering::Relaxed) + && fc.task_thread.update_set.load(Ordering::Relaxed) { rav1d_cdf_thread_update( frame_hdr, @@ -4642,9 +4659,10 @@ pub(crate) unsafe fn rav1d_decode_frame(c: &Rav1dContext, f: &mut Rav1dFrameData &(*f.ts.offset(frame_hdr.tiling.update as isize)).cdf, ); } + drop(f); } } - rav1d_decode_frame_exit(c, f, res); + rav1d_decode_frame_exit(c, fc, res); res } @@ -4656,7 +4674,7 @@ fn get_upscale_x0(in_w: c_int, out_w: c_int, step: c_int) -> c_int { pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { // wait for c->out_delayed[next] and move into c->out if visible - let (f, out, _task_thread_lock) = if c.n_fc > 1 { + let (fc, out, _task_thread_lock) = if c.n_fc > 1 { let mut task_thread_lock = c.task_thread.delayed_fg.lock().unwrap(); let next = c.frame_thread.next; c.frame_thread.next += 1; @@ -4664,12 +4682,12 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { c.frame_thread.next = 0; } - let f = &mut *c.fc.offset(next as isize); - while !f.task_thread.finished.load(Ordering::SeqCst) { - task_thread_lock = f.task_thread.cond.wait(task_thread_lock).unwrap(); + let fc = &(*c.fc.offset(next as isize)); + while !fc.task_thread.finished.load(Ordering::SeqCst) { + task_thread_lock = fc.task_thread.cond.wait(task_thread_lock).unwrap(); } let out_delayed = &mut c.frame_thread.out_delayed[next as usize]; - if out_delayed.p.data.is_some() || f.task_thread.error.load(Ordering::SeqCst) != 0 { + if out_delayed.p.data.is_some() || fc.task_thread.error.load(Ordering::SeqCst) != 0 { let first = c.task_thread.first.load(Ordering::SeqCst); if first + 1 < c.n_fc { c.task_thread.first.fetch_add(1, Ordering::SeqCst); @@ -4688,33 +4706,37 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { c.task_thread.cur.fetch_sub(1, Ordering::Relaxed); } } - { - let mut error = f.task_thread.retval.try_lock().unwrap(); - if error.is_err() { - c.cached_error = mem::replace(&mut error, Ok(())); - *c.cached_error_props.get_mut().unwrap() = out_delayed.p.m.clone(); - rav1d_thread_picture_unref(out_delayed); - } else if out_delayed.p.data.is_some() { - let progress = out_delayed.progress.as_ref().unwrap()[1].load(Ordering::Relaxed); - if (out_delayed.visible || c.output_invisible_frames) && progress != FRAME_ERROR { - rav1d_thread_picture_ref(&mut c.out, out_delayed); - c.event_flags |= out_delayed.flags.into(); - } - rav1d_thread_picture_unref(out_delayed); + let mut error = fc.task_thread.retval.try_lock().unwrap(); + if error.is_err() { + c.cached_error = mem::replace(&mut *error, Ok(())); + *c.cached_error_props.get_mut().unwrap() = out_delayed.p.m.clone(); + rav1d_thread_picture_unref(out_delayed); + } else if out_delayed.p.data.is_some() { + let progress = out_delayed.progress.as_ref().unwrap()[1].load(Ordering::Relaxed); + if (out_delayed.visible || c.output_invisible_frames) && progress != FRAME_ERROR { + rav1d_thread_picture_ref(&mut c.out, out_delayed); + c.event_flags |= out_delayed.flags.into(); } + rav1d_thread_picture_unref(out_delayed); } - (f, out_delayed as *mut _, Some(task_thread_lock)) + (fc, out_delayed as *mut _, Some(task_thread_lock)) } else { - (&mut *c.fc, &mut c.out as *mut _, None) + (&(*c.fc), &mut c.out as *mut _, None) }; + let mut f = fc.data.try_write().unwrap(); f.seq_hdr = c.seq_hdr.clone(); f.frame_hdr = mem::take(&mut c.frame_hdr); - let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); - - unsafe fn on_error(f: &mut Rav1dFrameData, c: &Rav1dContext, out: *mut Rav1dThreadPicture) { - f.task_thread.error = AtomicI32::new(1); - let _ = mem::take(&mut f.in_cdf); + let seq_hdr = f.seq_hdr.clone().unwrap(); + + unsafe fn on_error( + fc: &Rav1dFrameContext, + f: &mut Rav1dFrameData, + c: &Rav1dContext, + out: *mut Rav1dThreadPicture, + ) { + fc.task_thread.error.store(1, Ordering::Relaxed); + let _ = mem::take(&mut *fc.in_cdf.try_write().unwrap()); if f.frame_hdr.as_ref().unwrap().refresh_context != 0 { let _ = mem::take(&mut f.out_cdf); } @@ -4733,7 +4755,7 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { *c.cached_error_props.lock().unwrap() = c.in_0.m.clone(); f.tiles.clear(); - f.task_thread.finished.store(true, Ordering::SeqCst); + fc.task_thread.finished.store(true, Ordering::SeqCst); } let bpc = 8 + 2 * seq_hdr.hbd; @@ -4741,7 +4763,7 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { Some(dsp) => f.dsp = dsp, None => { writeln!(c.logger, "Compiled without support for {bpc}-bit decoding",); - on_error(f, c, out); + on_error(fc, &mut f, c, out); return Err(ENOPROTOOPT); } }; @@ -4751,12 +4773,12 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { } let mut ref_coded_width = <[i32; 7]>::default(); - let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); + let frame_hdr = f.frame_hdr.as_ref().unwrap().clone(); if frame_hdr.frame_type.is_inter_or_switch() { if frame_hdr.primary_ref_frame != RAV1D_PRIMARY_REF_NONE { let pri_ref = frame_hdr.refidx[frame_hdr.primary_ref_frame as usize] as usize; if c.refs[pri_ref].p.p.data.is_none() { - on_error(f, c, out); + on_error(fc, &mut f, c, out); return Err(EINVAL); } } @@ -4773,7 +4795,7 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { for j in 0..i { rav1d_thread_picture_unref(&mut f.refp[j]); } - on_error(f, c, out); + on_error(fc, &mut f, c, out); return Err(EINVAL); } rav1d_thread_picture_ref(&mut f.refp[i], &mut c.refs[refidx].p); @@ -4798,23 +4820,28 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { // setup entropy if frame_hdr.primary_ref_frame == RAV1D_PRIMARY_REF_NONE { - f.in_cdf = rav1d_cdf_thread_init_static(frame_hdr.quant.yac); + *fc.in_cdf.try_write().unwrap() = rav1d_cdf_thread_init_static(frame_hdr.quant.yac); } else { let pri_ref = frame_hdr.refidx[frame_hdr.primary_ref_frame as usize] as usize; - f.in_cdf = c.cdf[pri_ref].clone(); + *fc.in_cdf.try_write().unwrap() = c.cdf[pri_ref].clone(); } if frame_hdr.refresh_context != 0 { let res = rav1d_cdf_thread_alloc(c, (c.n_fc > 1) as c_int); - if res.is_err() { - on_error(f, c, out); + match res { + Err(e) => { + on_error(fc, &mut f, c, out); + return Err(e); + } + Ok(res) => { + f.out_cdf = res; + } } - f.out_cdf = res?; } // FIXME qsort so tiles are in order (for frame threading) f.tiles.clear(); mem::swap(&mut f.tiles, &mut c.tiles); - f.task_thread + fc.task_thread .finished .store(f.tiles.is_empty(), Ordering::SeqCst); @@ -4823,22 +4850,26 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { // We must take itut_t35 out of the context before the call so borrowck can // see we mutably borrow `c.itut_t35` disjointly from the task thread lock. let itut_t35 = mem::take(&mut c.itut_t35); - let res = rav1d_thread_picture_alloc(c, f, bpc, itut_t35); + let res = rav1d_thread_picture_alloc(c, &mut f, bpc, itut_t35); if res.is_err() { - on_error(f, c, out); + on_error(fc, &mut f, c, out); return res; } - let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); - let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); + let seq_hdr = f.seq_hdr.as_ref().unwrap().clone(); + let frame_hdr = f.frame_hdr.as_ref().unwrap().clone(); if frame_hdr.size.width[0] != frame_hdr.size.width[1] { - let res = rav1d_picture_alloc_copy(c, &mut f.cur, frame_hdr.size.width[0], &mut f.sr_cur.p); + // Re-borrow to allow independent borrows of fields + let f = &mut *f; + let res = rav1d_picture_alloc_copy(c, &mut f.cur, frame_hdr.size.width[0], &f.sr_cur.p); if res.is_err() { - on_error(f, c, out); + on_error(fc, f, c, out); return res; } } else { + // Re-borrow to allow independent borrows of fields + let f = &mut *f; rav1d_picture_ref(&mut f.cur, &mut f.sr_cur.p); } if frame_hdr.size.width[0] != frame_hdr.size.width[1] { @@ -4872,11 +4903,11 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { f.sbh = f.bh + f.sb_step - 1 >> f.sb_shift; f.b4_stride = (f.bw + 31 & !31) as ptrdiff_t; f.bitdepth_max = (1 << f.cur.p.bpc) - 1; - f.task_thread.error = AtomicI32::new(0); + fc.task_thread.error.store(0, Ordering::Relaxed); let uses_2pass = (c.n_fc > 1) as c_int; let cols = frame_hdr.tiling.cols; let rows = frame_hdr.tiling.rows; - f.task_thread + fc.task_thread .task_counter .store(cols * rows + f.sbh << uses_2pass, Ordering::SeqCst); @@ -4973,7 +5004,7 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { if frame_hdr.refresh_context != 0 { c.cdf[i] = f.out_cdf.clone(); } else { - c.cdf[i] = f.in_cdf.clone(); + c.cdf[i] = fc.in_cdf.try_read().unwrap().clone(); } c.refs[i].segmap = f.cur_segmap.clone(); @@ -4986,7 +5017,8 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { } if c.n_fc == 1 { - let res = rav1d_decode_frame(c, f); + drop(f); + let res = rav1d_decode_frame(c, &fc); if res.is_err() { rav1d_thread_picture_unref(&mut c.out); for i in 0..8 { @@ -4999,11 +5031,14 @@ pub unsafe fn rav1d_submit_frame(c: &mut Rav1dContext) -> Rav1dResult { let _ = mem::take(&mut c.refs[i].refmvs); } } - on_error(f, c, out); + let mut f = fc.data.try_write().unwrap(); + on_error(fc, &mut f, c, out); return res; } } else { - rav1d_task_frame_init(c, f); + let index = f.index; + let fc = &mut (*c.fc.offset(index as isize)); + rav1d_task_frame_init(c, fc, index); } Ok(()) diff --git a/src/internal.rs b/src/internal.rs index fee14c59d..df2368b1f 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -91,6 +91,7 @@ use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::mem; +use std::mem::MaybeUninit; use std::ops::Add; use std::ops::AddAssign; use std::ops::Deref; @@ -98,6 +99,7 @@ use std::ops::Index; use std::ops::IndexMut; use std::ops::Range; use std::ops::Sub; +use std::ptr::addr_of_mut; use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicI32; use std::sync::atomic::AtomicU32; @@ -107,6 +109,7 @@ use std::sync::Condvar; use std::sync::Mutex; use std::sync::OnceLock; use std::sync::RwLock; +use std::sync::RwLockReadGuard; use std::thread::JoinHandle; #[repr(C)] @@ -182,7 +185,7 @@ pub(crate) struct Rav1dTileGroup { pub hdr: Rav1dTileGroupHeader, } -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum TaskType { Init = 0, InitCdf = 1, @@ -309,7 +312,7 @@ impl Rav1dContextTaskThread { #[repr(C)] pub struct Rav1dContext { - pub(crate) fc: *mut Rav1dFrameData, + pub(crate) fc: *mut Rav1dFrameContext, pub(crate) n_fc: c_uint, /// Worker thread join handles and communication, or main thread task @@ -794,6 +797,29 @@ pub(crate) struct Rav1dFrameContext_frame_thread_progress { pub copy_lpf: RwLock>, } +pub(crate) struct Rav1dFrameContext { + pub data: RwLock, + pub in_cdf: RwLock, + pub task_thread: Rav1dFrameContext_task_thread, + pub frame_thread_progress: Rav1dFrameContext_frame_thread_progress, +} + +impl Rav1dFrameContext { + pub fn in_cdf<'a>(&'a self) -> RwLockReadGuard<'a, CdfThreadContext> { + self.in_cdf.try_read().unwrap() + } + + pub fn frame_hdr(&self) -> Arc> { + self.data + .try_read() + .unwrap() + .frame_hdr + .as_ref() + .unwrap() + .clone() + } +} + #[repr(C)] pub(crate) struct Rav1dFrameData { /// Index in [`Rav1dContext::fc`] @@ -813,7 +839,6 @@ pub(crate) struct Rav1dFrameData { pub refpoc: [c_uint; 7], pub refrefpoc: [[c_uint; 7]; 7], pub gmv_warp_allowed: [u8; 7], - pub in_cdf: CdfThreadContext, pub out_cdf: CdfThreadContext, pub tiles: Vec, @@ -847,13 +872,22 @@ pub(crate) struct Rav1dFrameData { pub bitdepth_max: c_int, pub frame_thread: Rav1dFrameContext_frame_thread, - pub frame_thread_progress: Rav1dFrameContext_frame_thread_progress, pub lf: Rav1dFrameContext_lf, - pub task_thread: Rav1dFrameContext_task_thread, pub lowest_pixel_mem: DisjointMut>, } impl Rav1dFrameData { + pub unsafe fn zeroed() -> Self { + let mut data: MaybeUninit = MaybeUninit::zeroed(); + addr_of_mut!((*data.as_mut_ptr()).out_cdf).write(Default::default()); + addr_of_mut!((*data.as_mut_ptr()).tiles).write(vec![]); + addr_of_mut!((*data.as_mut_ptr()).a).write(vec![]); + addr_of_mut!((*data.as_mut_ptr()).lowest_pixel_mem).write(Default::default()); + addr_of_mut!((*data.as_mut_ptr()).frame_thread).write(Default::default()); + addr_of_mut!((*data.as_mut_ptr()).lf).write(Default::default()); + data.assume_init() + } + pub fn bd_fn(&self) -> &'static Rav1dFrameContext_bd_fn { let bpc = BPC::from_bitdepth_max(self.bitdepth_max); Rav1dFrameContext_bd_fn::get(bpc) diff --git a/src/lib.rs b/src/lib.rs index 4253da7fa..ad04651a6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,7 @@ use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dContextTaskThread; use crate::src::internal::Rav1dContextTaskType; use crate::src::internal::Rav1dDSPContext; +use crate::src::internal::Rav1dFrameContext; use crate::src::internal::Rav1dFrameData; use crate::src::internal::Rav1dTaskContext; use crate::src::internal::Rav1dTaskContext_task_thread; @@ -80,6 +81,7 @@ use std::sync::Arc; use std::sync::Condvar; use std::sync::Mutex; use std::sync::Once; +use std::sync::RwLock; use std::thread; use to_method::To as _; @@ -249,16 +251,16 @@ pub(crate) unsafe fn rav1d_open(c_out: &mut *mut Rav1dContext, s: &Rav1dSettings let NumThreads { n_tc, n_fc } = get_num_threads(s); (*c).n_fc = n_fc as c_uint; (*c).fc = rav1d_alloc_aligned( - ::core::mem::size_of::().wrapping_mul((*c).n_fc as usize), + ::core::mem::size_of::().wrapping_mul((*c).n_fc as usize), 32 as c_int as usize, - ) as *mut Rav1dFrameData; + ) as *mut Rav1dFrameContext; if ((*c).fc).is_null() { return error(c, c_out); } memset( (*c).fc as *mut c_void, 0 as c_int, - ::core::mem::size_of::().wrapping_mul((*c).n_fc as usize), + ::core::mem::size_of::().wrapping_mul((*c).n_fc as usize), ); let ttd = TaskThreadData { cond: Condvar::new(), @@ -282,23 +284,22 @@ pub(crate) unsafe fn rav1d_open(c_out: &mut *mut Rav1dContext, s: &Rav1dSettings addr_of_mut!((*c).cache).write(Default::default()); addr_of_mut!((*c).refs).write(Default::default()); for n in 0..n_fc { - let f: &mut Rav1dFrameData = &mut *((*c).fc).offset(n as isize); + let fc = &mut *((*c).fc).offset(n as isize); + addr_of_mut!(fc.data).write(RwLock::new(Rav1dFrameData::zeroed())); + let f = fc.data.get_mut().unwrap(); f.index = n; - addr_of_mut!(f.tiles).write(Default::default()); - addr_of_mut!(f.task_thread.tasks).write(UnsafeCell::new(Default::default())); - addr_of_mut!(f.task_thread.retval).write(Mutex::new(Ok(()))); - addr_of_mut!(f.task_thread.update_set).write(AtomicBool::new(false)); - addr_of_mut!(f.task_thread.finished).write(AtomicBool::new(true)); - addr_of_mut!(f.frame_thread).write(Default::default()); - addr_of_mut!(f.frame_thread_progress).write(Default::default()); - addr_of_mut!(f.lowest_pixel_mem).write(Default::default()); - addr_of_mut!(f.lf).write(Default::default()); + addr_of_mut!(fc.in_cdf).write(RwLock::new(Default::default())); + addr_of_mut!(fc.task_thread.tasks).write(UnsafeCell::new(Default::default())); + addr_of_mut!(fc.task_thread.retval).write(Mutex::new(Ok(()))); + addr_of_mut!(fc.task_thread.update_set).write(AtomicBool::new(false)); + addr_of_mut!(fc.task_thread.finished).write(AtomicBool::new(true)); + addr_of_mut!(fc.task_thread.ttd).write(Arc::clone(&(*c).task_thread)); + addr_of_mut!(fc.frame_thread_progress).write(Default::default()); if n_tc > 1 { - f.task_thread.lock = Mutex::new(()); - f.task_thread.cond = Condvar::new(); - f.task_thread.pending_tasks = Default::default(); + fc.task_thread.lock = Mutex::new(()); + fc.task_thread.cond = Condvar::new(); + fc.task_thread.pending_tasks = Default::default(); } - (&mut f.task_thread.ttd as *mut Arc).write(Arc::clone(&(*c).task_thread)); f.lf.last_sharpness = -(1 as c_int); rav1d_refmvs_init(&mut f.rf); addr_of_mut!(f.refp).write(Default::default()); @@ -438,13 +439,13 @@ unsafe fn drain_picture(c: &mut Rav1dContext, out: &mut Rav1dPicture) -> Rav1dRe let mut drained = 0; loop { let next: c_uint = c.frame_thread.next; - let f: &mut Rav1dFrameData = &mut *(c.fc).offset(next as isize); + let fc = &(*(c.fc).offset(next as isize)); let mut task_thread_lock = c.task_thread.delayed_fg.lock().unwrap(); - while !f.task_thread.finished.load(Ordering::SeqCst) { - task_thread_lock = f.task_thread.cond.wait(task_thread_lock).unwrap(); + while !fc.task_thread.finished.load(Ordering::SeqCst) { + task_thread_lock = fc.task_thread.cond.wait(task_thread_lock).unwrap(); } let out_delayed = &mut c.frame_thread.out_delayed[next as usize]; - if out_delayed.p.data.is_some() || f.task_thread.error.load(Ordering::SeqCst) != 0 { + if out_delayed.p.data.is_some() || fc.task_thread.error.load(Ordering::SeqCst) != 0 { let first: c_uint = c.task_thread.first.load(Ordering::SeqCst); if first.wrapping_add(1 as c_uint) < c.n_fc { c.task_thread.first.fetch_add(1, Ordering::SeqCst); @@ -471,7 +472,7 @@ unsafe fn drain_picture(c: &mut Rav1dContext, out: &mut Rav1dPicture) -> Rav1dRe c.frame_thread.next = 0 as c_int as c_uint; } drop(task_thread_lock); - let error = mem::replace(&mut *f.task_thread.retval.try_lock().unwrap(), Ok(())); + let error = mem::replace(&mut *fc.task_thread.retval.try_lock().unwrap(), Ok(())); if error.is_err() { *c.cached_error_props.get_mut().unwrap() = out_delayed.p.m.clone(); rav1d_thread_picture_unref(out_delayed); @@ -730,9 +731,9 @@ pub(crate) unsafe fn rav1d_flush(c: *mut Rav1dContext) { if next == (*c).n_fc { next = 0 as c_int as c_uint; } - let f: &mut Rav1dFrameData = &mut *((*c).fc).offset(next as isize); - rav1d_decode_frame_exit(&*c, f, Err(EGeneric)); - *f.task_thread.retval.try_lock().unwrap() = Ok(()); + let fc = &(*((*c).fc).offset(next as isize)); + rav1d_decode_frame_exit(&*c, fc, Err(EGeneric)); + *fc.task_thread.retval.try_lock().unwrap() = Ok(()); let out_delayed = &mut (*c).frame_thread.out_delayed[next as usize]; if out_delayed.p.frame_hdr.is_some() { rav1d_thread_picture_unref(out_delayed); @@ -800,20 +801,23 @@ impl Drop for Rav1dContext { } let mut n_1: c_uint = 0 as c_int as c_uint; while !(self.fc).is_null() && n_1 < self.n_fc { - let f: &mut Rav1dFrameData = &mut *(self.fc).offset(n_1 as isize); + let fc = &mut (*(self.fc).offset(n_1 as isize)); + let f = fc.data.get_mut().unwrap(); if self.n_fc > 1 as c_uint { let _ = mem::take(&mut f.lowest_pixel_mem); // TODO: remove when context is owned } if self.tc.len() > 1 { - let _ = mem::take(&mut f.task_thread.pending_tasks); // TODO: remove when context is owned + let _ = mem::take(&mut fc.task_thread.pending_tasks); // TODO: remove when context is owned } + mem::take(fc.in_cdf.get_mut().unwrap()); // TODO: remove when context is owned + mem::take(fc.frame_thread_progress.frame.get_mut().unwrap()); // TODO: remove when context is owned + mem::take(fc.frame_thread_progress.copy_lpf.get_mut().unwrap()); // TODO: remove when context is owned let _ = mem::take(&mut f.frame_thread); // TODO: remove when context is owned - mem::take(&mut f.frame_thread_progress.frame); // TODO: remove when context is owned - mem::take(&mut f.frame_thread_progress.copy_lpf); // TODO: remove when context is owned - mem::take(&mut f.task_thread.tasks); // TODO: remove when context is owned + mem::take(&mut fc.task_thread.tasks); // TODO: remove when context is owned rav1d_free_aligned(f.ts as *mut c_void); rav1d_free_aligned(f.ipred_edge[0] as *mut c_void); let _ = mem::take(&mut f.a); // TODO: remove when context is owned + let _ = mem::take(&mut f.out_cdf); // TODO: remove when context is owned let _ = mem::take(&mut f.tiles); let _ = mem::take(&mut f.lf.mask); // TODO: remove when context is owned let _ = mem::take(&mut f.lf.lr_mask); // TODO: remove when context is owned diff --git a/src/obu.rs b/src/obu.rs index 8e22c5462..20ae485e1 100644 --- a/src/obu.rs +++ b/src/obu.rs @@ -2547,12 +2547,13 @@ unsafe fn parse_obus( c.frame_thread.next = 0; } - let f = &mut *c.fc.offset(next as isize); - while !f.task_thread.finished.load(Ordering::SeqCst) { - task_thread_lock = f.task_thread.cond.wait(task_thread_lock).unwrap(); + let fc = &(*c.fc.offset(next as isize)); + while !fc.task_thread.finished.load(Ordering::SeqCst) { + task_thread_lock = fc.task_thread.cond.wait(task_thread_lock).unwrap(); } let out_delayed = &mut c.frame_thread.out_delayed[next as usize]; - if out_delayed.p.data.is_some() || f.task_thread.error.load(Ordering::SeqCst) != 0 { + if out_delayed.p.data.is_some() || fc.task_thread.error.load(Ordering::SeqCst) != 0 + { let first = c.task_thread.first.load(Ordering::SeqCst); if first + 1 < c.n_fc { c.task_thread.first.fetch_add(1, Ordering::SeqCst); @@ -2571,7 +2572,7 @@ unsafe fn parse_obus( c.task_thread.cur.fetch_sub(1, Ordering::Relaxed); } } - let mut error = f.task_thread.retval.try_lock().unwrap(); + let mut error = fc.task_thread.retval.try_lock().unwrap(); if error.is_err() { c.cached_error = mem::replace(&mut *error, Ok(())); *c.cached_error_props.get_mut().unwrap() = out_delayed.p.m.clone(); diff --git a/src/thread_task.rs b/src/thread_task.rs index f17cfd26f..2af4b3596 100644 --- a/src/thread_task.rs +++ b/src/thread_task.rs @@ -17,6 +17,8 @@ use crate::src::fg_apply::rav1d_apply_grain_row; use crate::src::fg_apply::rav1d_prep_grain; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dDSPContext; +use crate::src::internal::Rav1dFrameContext; +use crate::src::internal::Rav1dFrameContext_task_thread; use crate::src::internal::Rav1dFrameData; use crate::src::internal::Rav1dTaskContext; use crate::src::internal::Rav1dTaskContext_task_thread; @@ -148,7 +150,7 @@ unsafe fn reset_task_cur_async(ttd: &TaskThreadData, mut frame_idx: c_uint, n_fr unsafe fn insert_tasks_between( c: &Rav1dContext, - f: &Rav1dFrameData, + f: &Rav1dFrameContext, first: Rav1dTaskIndex, last: Rav1dTaskIndex, a: Option, @@ -178,7 +180,7 @@ unsafe fn insert_tasks_between( unsafe fn insert_tasks( c: &Rav1dContext, - f: &Rav1dFrameData, + f: &Rav1dFrameContext, first: Rav1dTaskIndex, last: Rav1dTaskIndex, cond_signal: c_int, @@ -250,12 +252,17 @@ unsafe fn insert_tasks( } #[inline] -unsafe fn insert_task(c: &Rav1dContext, f: &Rav1dFrameData, t: Rav1dTaskIndex, cond_signal: c_int) { +unsafe fn insert_task( + c: &Rav1dContext, + f: &Rav1dFrameContext, + t: Rav1dTaskIndex, + cond_signal: c_int, +) { insert_tasks(c, f, t, t, cond_signal); } #[inline] -unsafe fn add_pending(f: &Rav1dFrameData, t: Rav1dTaskIndex) { +unsafe fn add_pending(f: &Rav1dFrameContext, t: Rav1dTaskIndex) { let tasks = &mut *f.task_thread.tasks(); let mut pending_tasks = f.task_thread.pending_tasks.lock().unwrap(); tasks[t].next = None; @@ -269,7 +276,7 @@ unsafe fn add_pending(f: &Rav1dFrameData, t: Rav1dTaskIndex) { } #[inline] -unsafe fn merge_pending_frame(c: &Rav1dContext, f: &Rav1dFrameData) -> c_int { +unsafe fn merge_pending_frame(c: &Rav1dContext, f: &Rav1dFrameContext) -> c_int { let tasks = &*f.task_thread.tasks(); let merge = f.task_thread.pending_tasks_merge.load(Ordering::SeqCst); if merge != 0 { @@ -301,7 +308,8 @@ unsafe fn merge_pending(c: &Rav1dContext) -> c_int { unsafe fn create_filter_sbrow( c: &Rav1dContext, - f: &mut Rav1dFrameData, + fc: &Rav1dFrameContext, + f: &Rav1dFrameData, pass: c_int, ) -> Rav1dResult { let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); @@ -311,22 +319,22 @@ unsafe fn create_filter_sbrow( let has_cdef = seq_hdr.cdef; let has_resize = (frame_hdr.size.width[0] != frame_hdr.size.width[1]) as c_int; let has_lr = f.lf.restore_planes; - let tasks = &mut *f.task_thread.tasks(); + let tasks = &mut *fc.task_thread.tasks(); let uses_2pass = (c.n_fc > 1 as c_uint) as c_int; let num_tasks = (f.sbh * (1 + uses_2pass)) as usize; tasks.grow_tasks(num_tasks); let task_idx = Rav1dTaskIndex::Task((f.sbh * (pass & 1)) as usize); if pass & 1 != 0 { - f.frame_thread_progress.entropy.store(0, Ordering::Relaxed); + fc.frame_thread_progress.entropy.store(0, Ordering::Relaxed); } else { let prog_sz = ((f.sbh + 31 & !(31 as c_int)) >> 5) as usize; - let mut frame = f.frame_thread_progress.frame.try_write().unwrap(); + let mut frame = fc.frame_thread_progress.frame.try_write().unwrap(); frame.clear(); frame.resize_with(prog_sz, || AtomicU32::new(0)); - let mut copy_lpf = f.frame_thread_progress.copy_lpf.try_write().unwrap(); + let mut copy_lpf = fc.frame_thread_progress.copy_lpf.try_write().unwrap(); copy_lpf.clear(); copy_lpf.resize_with(prog_sz, || AtomicU32::new(0)); - f.frame_thread_progress.deblock.store(0, Ordering::SeqCst); + fc.frame_thread_progress.deblock.store(0, Ordering::SeqCst); } f.frame_thread.next_tile_row[(pass & 1) as usize].store(0, Ordering::Relaxed); let t = &mut tasks[task_idx]; @@ -350,11 +358,12 @@ unsafe fn create_filter_sbrow( pub(crate) unsafe fn rav1d_task_create_tile_sbrow( c: &Rav1dContext, - f: &mut Rav1dFrameData, + fc: &Rav1dFrameContext, + f: &Rav1dFrameData, pass: c_int, _cond_signal: c_int, ) -> Rav1dResult { - let tasks = &mut *f.task_thread.tasks(); + let tasks = &mut *fc.task_thread.tasks(); let uses_2pass = (c.n_fc > 1 as c_uint) as c_int; let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); let num_tasks = frame_hdr.tiling.cols * frame_hdr.tiling.rows; @@ -364,11 +373,11 @@ pub(crate) unsafe fn rav1d_task_create_tile_sbrow( tasks.tile_tasks[1] = Some(Rav1dTaskIndex::TileTask(num_tasks as usize)); } let tile_tasks = tasks.tile_tasks[0].map(|t| t + (num_tasks * (pass & 1)) as usize); - let mut pf_t = Some(create_filter_sbrow(c, f, pass)?); + let mut pf_t = Some(create_filter_sbrow(c, fc, f, pass)?); let mut prev_t = None; let mut tile_idx = 0; while tile_idx < num_tasks { - let ts: *mut Rav1dTileState = &mut *(f.ts).offset(tile_idx as isize) as *mut Rav1dTileState; + let ts: *mut Rav1dTileState = &mut *f.ts.offset(tile_idx as isize) as *mut Rav1dTileState; let t_idx = tile_tasks.unwrap() + (tile_idx as usize); let t = &mut tasks[t_idx]; t.sby = (*ts).tiling.row_start >> f.sb_shift; @@ -399,8 +408,8 @@ pub(crate) unsafe fn rav1d_task_create_tile_sbrow( prev_t = pf_t; } tasks[prev_t.unwrap()].next = None; - f.task_thread.done[(pass & 1) as usize].store(0, Ordering::SeqCst); - let mut pending_tasks = f.task_thread.pending_tasks.lock().unwrap(); + fc.task_thread.done[(pass & 1) as usize].store(0, Ordering::SeqCst); + let mut pending_tasks = fc.task_thread.pending_tasks.lock().unwrap(); if !(pending_tasks.head.is_none() || pass == 2) { unreachable!(); } @@ -410,22 +419,24 @@ pub(crate) unsafe fn rav1d_task_create_tile_sbrow( tasks[pending_tasks.tail.unwrap()].next = tile_tasks; } pending_tasks.tail = prev_t; - f.task_thread.pending_tasks_merge.store(1, Ordering::SeqCst); - f.task_thread.init_done.store(1, Ordering::SeqCst); + fc.task_thread + .pending_tasks_merge + .store(1, Ordering::SeqCst); + fc.task_thread.init_done.store(1, Ordering::SeqCst); Ok(()) } -pub(crate) unsafe fn rav1d_task_frame_init(c: &Rav1dContext, f: &mut Rav1dFrameData) { - f.task_thread.init_done.store(0, Ordering::SeqCst); - let tasks = f.task_thread.tasks(); +pub(crate) unsafe fn rav1d_task_frame_init(c: &Rav1dContext, fc: &Rav1dFrameContext, index: usize) { + fc.task_thread.init_done.store(0, Ordering::SeqCst); + let tasks = fc.task_thread.tasks(); let t_idx = Rav1dTaskIndex::Init; let t = &mut (*tasks)[t_idx]; t.type_0 = TaskType::Init; - t.frame_idx = f.index as c_uint; + t.frame_idx = index as c_uint; t.sby = 0 as c_int; t.deblock_progress = 0 as c_int; t.recon_progress = t.deblock_progress; - insert_task(c, f, t_idx, 1 as c_int); + insert_task(c, fc, t_idx, 1 as c_int); } pub(crate) unsafe fn rav1d_task_delayed_fg( @@ -448,7 +459,7 @@ pub(crate) unsafe fn rav1d_task_delayed_fg( #[inline] unsafe fn ensure_progress<'l, 'ttd: 'l>( ttd: &'ttd TaskThreadData, - f: &Rav1dFrameData, + f: &Rav1dFrameContext, t_idx: Rav1dTaskIndex, type_0: TaskType, state: &AtomicI32, @@ -469,27 +480,32 @@ unsafe fn ensure_progress<'l, 'ttd: 'l>( } #[inline] -unsafe fn check_tile(t_idx: Rav1dTaskIndex, f: &Rav1dFrameData, frame_mt: c_int) -> c_int { - let tasks = &mut *f.task_thread.tasks(); +unsafe fn check_tile( + t_idx: Rav1dTaskIndex, + f: &Rav1dFrameData, + task_thread: &Rav1dFrameContext_task_thread, + frame_mt: c_int, +) -> c_int { + let tasks = &mut *task_thread.tasks(); let t = &tasks[t_idx]; let tp = t.type_0 == TaskType::TileEntropy; let tile_idx = (t_idx - tasks.tile_tasks[tp as usize].unwrap()) .raw_index() .expect("t_idx was not a valid tile task"); - let ts: *mut Rav1dTileState = &mut *(f.ts).offset(tile_idx as isize) as *mut Rav1dTileState; + let ts: *mut Rav1dTileState = &mut *f.ts.offset(tile_idx as isize) as *mut Rav1dTileState; let p1 = (*ts).progress[tp as usize].load(Ordering::SeqCst); if p1 < t.sby { return 1; } let mut error = (p1 == TILE_ERROR) as c_int; - error |= f.task_thread.error.fetch_or(error, Ordering::SeqCst); + error |= task_thread.error.fetch_or(error, Ordering::SeqCst); if error == 0 && frame_mt != 0 && !tp { let p2 = (*ts).progress[1].load(Ordering::SeqCst); if p2 <= (*t).sby { return 1; } error = (p2 == TILE_ERROR) as c_int; - error |= f.task_thread.error.fetch_or(error, Ordering::SeqCst); + error |= task_thread.error.fetch_or(error, Ordering::SeqCst); } let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); if error == 0 && frame_mt != 0 && !frame_hdr.frame_type.is_key_or_intra() { @@ -532,7 +548,7 @@ unsafe fn check_tile(t_idx: Rav1dTaskIndex, f: &Rav1dFrameData, frame_mt: c_int) if p3 < lowest { return 1; } - f.task_thread + task_thread .error .fetch_or((p3 == FRAME_ERROR) as c_int, Ordering::SeqCst); } @@ -544,7 +560,7 @@ unsafe fn check_tile(t_idx: Rav1dTaskIndex, f: &Rav1dFrameData, frame_mt: c_int) } #[inline] -unsafe fn get_frame_progress(f: &Rav1dFrameData) -> c_int { +unsafe fn get_frame_progress(fc: &Rav1dFrameContext, f: &Rav1dFrameData) -> c_int { // Note that `progress.is_some() == c.n_fc > 1`. let frame_prog = f .sr_cur @@ -557,7 +573,7 @@ unsafe fn get_frame_progress(f: &Rav1dFrameData) -> c_int { } let mut idx = (frame_prog >> f.sb_shift + 7) as c_int; let mut prog; - let frame = f.frame_thread_progress.frame.try_read().unwrap(); + let frame = fc.frame_thread_progress.frame.try_read().unwrap(); loop { let val: c_uint = !frame[idx as usize].load(Ordering::SeqCst); prog = if val != 0 { ctz(val) } else { 32 as c_int }; @@ -574,8 +590,8 @@ unsafe fn get_frame_progress(f: &Rav1dFrameData) -> c_int { } #[inline] -unsafe fn abort_frame(c: &Rav1dContext, f: &mut Rav1dFrameData, error: Rav1dResult) { - f.task_thread.error.store( +unsafe fn abort_frame(c: &Rav1dContext, fc: &Rav1dFrameContext, error: Rav1dResult) { + fc.task_thread.error.store( if error == Err(EINVAL) { 1 as c_int } else { @@ -583,14 +599,17 @@ unsafe fn abort_frame(c: &Rav1dContext, f: &mut Rav1dFrameData, error: Rav1dResu }, Ordering::SeqCst, ); - f.task_thread.task_counter.store(0, Ordering::SeqCst); - f.task_thread.done[0].store(1, Ordering::SeqCst); - f.task_thread.done[1].store(1, Ordering::SeqCst); - let progress = &**f.sr_cur.progress.as_ref().unwrap(); - progress[0].store(FRAME_ERROR, Ordering::SeqCst); - progress[1].store(FRAME_ERROR, Ordering::SeqCst); - rav1d_decode_frame_exit(c, f, error); - f.task_thread.cond.notify_one(); + fc.task_thread.task_counter.store(0, Ordering::SeqCst); + fc.task_thread.done[0].store(1, Ordering::SeqCst); + fc.task_thread.done[1].store(1, Ordering::SeqCst); + { + let f = fc.data.try_read().unwrap(); + let progress = &**f.sr_cur.progress.as_ref().unwrap(); + progress[0].store(FRAME_ERROR, Ordering::SeqCst); + progress[1].store(FRAME_ERROR, Ordering::SeqCst); + } + rav1d_decode_frame_exit(c, fc, error); + fc.task_thread.cond.notify_one(); } #[inline] @@ -744,15 +763,14 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc 1 as c_uint { // run init tasks second 'init_tasks: for i in 0..c.n_fc { let first = ttd.first.load(Ordering::SeqCst); - let f = - &mut *(c.fc).offset(first.wrapping_add(i).wrapping_rem(c.n_fc) as isize); - let tasks = &*f.task_thread.tasks(); - if f.task_thread.init_done.load(Ordering::SeqCst) != 0 { + let fc = &*(c.fc).offset(first.wrapping_add(i).wrapping_rem(c.n_fc) as isize); + let tasks = &*fc.task_thread.tasks(); + if fc.task_thread.init_done.load(Ordering::SeqCst) != 0 { continue 'init_tasks; } let Some(t_idx) = tasks.head else { @@ -760,7 +778,7 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc 1 as c_uint) as c_int) == 0 { - break 'found (f, t_idx, prev_t); + if check_tile( + t_idx, + &fc.data.try_read().unwrap(), + &fc.task_thread, + (c.n_fc > 1 as c_uint) as c_int, + ) == 0 + { + break 'found (fc, t_idx, prev_t); } } else if t.recon_progress != 0 { + let f = fc.data.try_read().unwrap(); let p = t.type_0 == TaskType::EntropyProgress; - let error = f.task_thread.error.load(Ordering::SeqCst); - if !(f.task_thread.done[p as usize].load(Ordering::SeqCst) == 0 + let error = fc.task_thread.error.load(Ordering::SeqCst); + if !(fc.task_thread.done[p as usize].load(Ordering::SeqCst) == 0 || error != 0) { unreachable!(); } - let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); + let frame_hdr = fc.frame_hdr(); let tile_row_base = frame_hdr.tiling.cols * f.frame_thread.next_tile_row[p as usize].load(Ordering::Relaxed); if p { - let p1_0 = f.frame_thread_progress.entropy.load(Ordering::SeqCst); + let p1_0 = fc.frame_thread_progress.entropy.load(Ordering::SeqCst); if p1_0 < t.sby { break 'next; } - f.task_thread + fc.task_thread .error .fetch_or((p1_0 == TILE_ERROR) as c_int, Ordering::SeqCst); } for tc_0 in 0..frame_hdr.tiling.cols { - let ts: *mut Rav1dTileState = &mut *(f.ts) - .offset((tile_row_base + tc_0) as isize) - as *mut Rav1dTileState; + let ts: *mut Rav1dTileState = + &mut *f.ts.offset((tile_row_base + tc_0) as isize) + as *mut Rav1dTileState; let p2 = (*ts).progress[p as usize].load(Ordering::SeqCst); if p2 < t.recon_progress { break 'next; } - f.task_thread + fc.task_thread .error .fetch_or((p2 == TILE_ERROR) as c_int, Ordering::SeqCst); } @@ -866,26 +892,26 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc> 5) as usize] .load(Ordering::SeqCst); if p1_1 as c_uint & (1 as c_uint) << (t.sby - 1 & 31) != 0 { - break 'found (f, t_idx, prev_t); + break 'found (fc, t_idx, prev_t); } } else { if t.deblock_progress == 0 { unreachable!(); } - let p1_2 = f.frame_thread_progress.deblock.load(Ordering::SeqCst); + let p1_2 = fc.frame_thread_progress.deblock.load(Ordering::SeqCst); if p1_2 >= t.deblock_progress { - f.task_thread + fc.task_thread .error .fetch_or((p1_2 == TILE_ERROR) as c_int, Ordering::SeqCst); - break 'found (f, t_idx, prev_t); + break 'found (fc, t_idx, prev_t); } } } @@ -907,7 +933,7 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc 1 as c_uint) { unreachable!(); } - let res = rav1d_decode_frame_init(c, f); - let p1_3 = (if let Some(progress) = f.in_cdf.progress() { + let res = rav1d_decode_frame_init(c, fc); + let p1_3 = (if let Some(progress) = fc.in_cdf().progress() { progress.load(Ordering::SeqCst) } else { 1 as c_int as c_uint @@ -952,14 +978,14 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc 1 as c_uint) { unreachable!(); } let mut p_0 = 1; while p_0 <= 2 { - let res_1 = rav1d_task_create_tile_sbrow(c, f, p_0, 0 as c_int); + let f = fc.data.try_read().unwrap(); + let res_1 = + rav1d_task_create_tile_sbrow(c, fc, &f, p_0, 0 as c_int); if res_1.is_err() { assert!( task_thread_lock.is_none(), @@ -1000,11 +1030,11 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc { + let f = fc.data.try_read().unwrap(); let p_1 = t.type_0 == TaskType::TileEntropy; let tile_idx = (t_idx - tile_tasks[p_1 as usize].unwrap()) .raw_index() .unwrap(); let ts_0: *mut Rav1dTileState = - &mut *(f.ts).offset(tile_idx as isize) as *mut Rav1dTileState; + &mut *f.ts.offset(tile_idx as isize) as *mut Rav1dTileState; tc.ts = tile_idx; tc.b.y = sby << f.sb_shift; let uses_2pass = (c.n_fc > 1 as c_uint) as c_int; @@ -1057,7 +1090,7 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc 0, Err(()) => 1, }; @@ -1065,11 +1098,11 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc= 0) { + if !(fc.task_thread.task_counter.load(Ordering::SeqCst) >= 0) { unreachable!(); } if ttd.cond_signaled.fetch_or(1, Ordering::SeqCst) == 0 { @@ -1137,15 +1172,18 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc { - if f.task_thread.error.load(Ordering::SeqCst) == 0 { - (f.bd_fn().filter_sbrow_deblock_cols)(c, f, &mut tc, sby); + { + let f = fc.data.try_read().unwrap(); + if fc.task_thread.error.load(Ordering::SeqCst) == 0 { + (f.bd_fn().filter_sbrow_deblock_cols)(c, &f, &mut tc, sby); + } } if ensure_progress( ttd, - f, + fc, t_idx, TaskType::DeblockRows, - &f.frame_thread_progress.deblock, + &fc.frame_thread_progress.deblock, &mut task_thread_lock, ) != 0 { @@ -1155,8 +1193,9 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc { - if f.task_thread.error.load(Ordering::SeqCst) == 0 { - (f.bd_fn().filter_sbrow_deblock_rows)(c, f, &mut tc, sby); + let f = fc.data.try_read().unwrap(); + if fc.task_thread.error.load(Ordering::SeqCst) == 0 { + (f.bd_fn().filter_sbrow_deblock_rows)(c, &f, &mut tc, sby); } // signal deblock progress let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); @@ -1164,8 +1203,8 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc> 5) as usize] .fetch_or((1 as c_uint) << (sby & 31), Ordering::SeqCst); // CDEF needs the top buffer to be saved by lr_copy_lpf of the @@ -1186,7 +1225,7 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc { + let f = fc.data.try_read().unwrap(); let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); if seq_hdr.cdef != 0 { - if f.task_thread.error.load(Ordering::SeqCst) == 0 { - (f.bd_fn().filter_sbrow_cdef)(c, f, &mut tc, sby); + if fc.task_thread.error.load(Ordering::SeqCst) == 0 { + (f.bd_fn().filter_sbrow_cdef)(c, &f, &mut tc, sby); } reset_task_cur_async(ttd, t.frame_idx, c.n_fc); if ttd.cond_signaled.fetch_or(1, Ordering::SeqCst) == 0 { @@ -1214,20 +1254,22 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc { + let f = fc.data.try_read().unwrap(); let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); if frame_hdr.size.width[0] != frame_hdr.size.width[1] { - if f.task_thread.error.load(Ordering::SeqCst) == 0 { - (f.bd_fn().filter_sbrow_resize)(c, f, &mut tc, sby); + if fc.task_thread.error.load(Ordering::SeqCst) == 0 { + (f.bd_fn().filter_sbrow_resize)(c, &f, &mut tc, sby); } } task_type = TaskType::LoopRestoration; continue 'fallthrough; } TaskType::LoopRestoration => { - if f.task_thread.error.load(Ordering::SeqCst) == 0 + let f = fc.data.try_read().unwrap(); + if fc.task_thread.error.load(Ordering::SeqCst) == 0 && f.lf.restore_planes != 0 { - (f.bd_fn().filter_sbrow_lr)(c, f, &mut tc, sby); + (f.bd_fn().filter_sbrow_lr)(c, &f, &mut tc, sby); } task_type = TaskType::ReconstructionProgress; continue 'fallthrough; @@ -1245,11 +1287,12 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc 1 as c_uint) as c_int; let sbh = f.sbh; let sbsz = f.sb_step * 4; if t.type_0 == TaskType::EntropyProgress { - error_0 = f.task_thread.error.load(Ordering::SeqCst); + error_0 = fc.task_thread.error.load(Ordering::SeqCst); let y: c_uint = if sby + 1 == sbh { u32::MAX } else { @@ -1260,28 +1303,29 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arctype != DAV1D_TASK_TYPE_ENTROPY_PROGRESS - f.frame_thread_progress.frame.try_read().unwrap()[(sby >> 5) as usize] + fc.frame_thread_progress.frame.try_read().unwrap()[(sby >> 5) as usize] .fetch_or((1 as c_uint) << (sby & 31), Ordering::SeqCst); { - let _task_thread_lock = f.task_thread.lock.lock().unwrap(); - sby = get_frame_progress(f); - error_0 = f.task_thread.error.load(Ordering::SeqCst); + let _task_thread_lock = fc.task_thread.lock.lock().unwrap(); + sby = get_frame_progress(fc, &f); + error_0 = fc.task_thread.error.load(Ordering::SeqCst); let y_0: c_uint = if sby + 1 == sbh { u32::MAX } else { @@ -1319,23 +1363,24 @@ pub unsafe fn rav1d_worker_task(c: &Rav1dContext, task_thread: Arc