From 431afd827ea21b0d0b2b4ab6dfb64de3e2dc4f4d Mon Sep 17 00:00:00 2001 From: Kevin Schweikert <54439512+kevinschweikert@users.noreply.github.com> Date: Thu, 24 Oct 2024 18:08:33 +0200 Subject: [PATCH] Add seeking (#14) --- c_src/xav/reader.c | 50 ++++++++++++++++++++++++++++++++++++++++++ c_src/xav/reader.h | 3 +++ c_src/xav/xav_reader.c | 37 ++++++++++++++++++++++++++++--- lib/reader.ex | 19 ++++++++++++---- lib/reader_nif.ex | 2 ++ test/reader_test.exs | 37 +++++++++++++++++++++++++++++++ 6 files changed, 141 insertions(+), 7 deletions(-) diff --git a/c_src/xav/reader.c b/c_src/xav/reader.c index 97294e4..1a995d0 100644 --- a/c_src/xav/reader.c +++ b/c_src/xav/reader.c @@ -58,6 +58,15 @@ int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, in return -2; } + AVStream *stream = reader->fmt_ctx->streams[reader->stream_idx]; + + // If avg_frame_rate is valid, use it; otherwise, calculate it from time_base. + if (stream->avg_frame_rate.num != 0 && stream->avg_frame_rate.den != 0) { + reader->framerate = stream->avg_frame_rate; + } else { + reader->framerate = av_inv_q(stream->time_base); + } + // TODO why is this actually needed? if (avcodec_parameters_to_context(reader->c, reader->fmt_ctx->streams[reader->stream_idx]->codecpar) < 0) { @@ -175,6 +184,47 @@ int reader_next_frame(struct Reader *reader) { return 0; } +int reader_seek(struct Reader *reader, double time_in_seconds) { + AVRational time_base = reader->fmt_ctx->streams[reader->stream_idx]->time_base; + + // keep floating time precision by multiplying with the internal AV_TIME_BASE (1_000_000) + // and convert to the same time_base for the stream we're using in `av_seek_frame` because we're + // explicitly specifying the stream index. for further information, see param docs in + // [`av_seek_frame`](https://ffmpeg.org/doxygen/7.0/group__lavf__decoding.html#gaa23f7619d8d4ea0857065d9979c75ac8) + int64_t seek_pos = + av_rescale_q((int64_t)(time_in_seconds * AV_TIME_BASE), AV_TIME_BASE_Q, time_base); + + avcodec_flush_buffers(reader->c); + + if (av_seek_frame(reader->fmt_ctx, reader->stream_idx, seek_pos, AVSEEK_FLAG_BACKWARD) < 0) { + XAV_LOG_DEBUG("Error while seeking to position %f / %f seconds", seek_pos, time_in_seconds); + return -1; + } + + // we have to read frames from the last keyframe until the desired timestamp + while (av_read_frame(reader->fmt_ctx, reader->pkt) >= 0) { + + if (reader->pkt->stream_index != reader->stream_idx) { + continue; + } + + reader->pkt->flags |= AV_PKT_FLAG_DISCARD; + int ret = avcodec_send_packet(reader->c, reader->pkt); + if (ret < 0) { + return ret; + } + + int64_t current_pos = reader->pkt->pts != AV_NOPTS_VALUE ? reader->pkt->pts : reader->pkt->dts; + + if (current_pos >= seek_pos) { + break; + } + } + + av_packet_unref(reader->pkt); + return 0; +} + void reader_free_frame(struct Reader *reader) { if (reader->frame != NULL) { av_frame_unref(reader->frame); diff --git a/c_src/xav/reader.h b/c_src/xav/reader.h index 29b4de6..934d482 100644 --- a/c_src/xav/reader.h +++ b/c_src/xav/reader.h @@ -23,6 +23,7 @@ struct Reader { const AVInputFormat *input_format; AVDictionary *options; enum AVMediaType media_type; + AVRational framerate; }; struct Reader *reader_alloc(); @@ -32,6 +33,8 @@ int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, in int reader_next_frame(struct Reader *reader); +int reader_seek(struct Reader *reader, double time_in_seconds); + void reader_free_frame(struct Reader *reader); void reader_free(struct Reader **reader); diff --git a/c_src/xav/xav_reader.c b/c_src/xav/xav_reader.c index b8d83a6..108dbae 100644 --- a/c_src/xav/xav_reader.c +++ b/c_src/xav/xav_reader.c @@ -112,8 +112,12 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { ERL_NIF_TERM in_format_term = enif_make_atom(env, av_get_pix_fmt_name(xav_reader->reader->c->pix_fmt)); ERL_NIF_TERM out_format_term = enif_make_atom(env, "rgb"); - return enif_make_tuple(env, 7, ok_term, xav_term, in_format_term, out_format_term, - bit_rate_term, duration_term, codec_term); + ERL_NIF_TERM framerate_num_term = enif_make_int(env, xav_reader->reader->framerate.num); + ERL_NIF_TERM framerate_den_term = enif_make_int(env, xav_reader->reader->framerate.den); + ERL_NIF_TERM framerate_term = enif_make_tuple(env, 2, framerate_num_term, framerate_den_term); + + return enif_make_tuple(env, 8, ok_term, xav_term, in_format_term, out_format_term, + bit_rate_term, duration_term, codec_term, framerate_term); } else { return xav_nif_raise(env, "unknown_media_type"); } @@ -186,6 +190,32 @@ ERL_NIF_TERM next_frame(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { return xav_nif_ok(env, frame_term); } +ERL_NIF_TERM seek(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + ERL_NIF_TERM frame_term; + + if (argc != 2) { + return xav_nif_raise(env, "invalid_arg_count"); + } + + struct XavReader *xav_reader; + if (!enif_get_resource(env, argv[0], xav_reader_resource_type, (void **)&xav_reader)) { + return xav_nif_raise(env, "couldnt_get_reader_resource"); + } + + double time_in_seconds; + if (!enif_get_double(env, argv[1], &time_in_seconds)) { + return xav_nif_raise(env, "invalid_time_in_seconds"); + } + + int ret = reader_seek(xav_reader->reader, time_in_seconds); + + if (ret < 0) { + return xav_nif_raise(env, "failed to seek"); + } + + return enif_make_atom(env, "ok"); +} + static int init_audio_converter(struct XavReader *xav_reader) { xav_reader->ac = audio_converter_alloc(); @@ -270,7 +300,8 @@ void free_xav_reader(ErlNifEnv *env, void *obj) { } static ErlNifFunc xav_funcs[] = {{"new", 6, new}, - {"next_frame", 1, next_frame, ERL_NIF_DIRTY_JOB_CPU_BOUND}}; + {"next_frame", 1, next_frame, ERL_NIF_DIRTY_JOB_CPU_BOUND}, + {"seek", 2, seek, ERL_NIF_DIRTY_JOB_CPU_BOUND}}; static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) { diff --git a/lib/reader.ex b/lib/reader.ex index 6c61c3f..67c9a5d 100644 --- a/lib/reader.ex +++ b/lib/reader.ex @@ -28,11 +28,13 @@ defmodule Xav.Reader do out_channels: integer() | nil, bit_rate: integer(), duration: integer(), - codec: atom() + codec: atom(), + framerate: {integer(), integer()} | nil } @enforce_keys [:reader, :in_format, :out_format, :bit_rate, :duration, :codec] - defstruct @enforce_keys ++ [:in_sample_rate, :out_sample_rate, :in_channels, :out_channels] + defstruct @enforce_keys ++ + [:in_sample_rate, :out_sample_rate, :in_channels, :out_channels, :framerate] @doc """ The same as new/1 but raises on error. @@ -91,7 +93,7 @@ defmodule Xav.Reader do codec: to_human_readable(codec) }} - {:ok, reader, in_format, out_format, bit_rate, duration, codec} -> + {:ok, reader, in_format, out_format, bit_rate, duration, codec, framerate} -> {:ok, %__MODULE__{ reader: reader, @@ -99,7 +101,8 @@ defmodule Xav.Reader do out_format: out_format, bit_rate: bit_rate, duration: duration, - codec: to_human_readable(codec) + codec: to_human_readable(codec), + framerate: framerate }} {:error, _reason} = err -> @@ -129,6 +132,14 @@ defmodule Xav.Reader do end end + @doc """ + Seeks the reader to the given time in seconds + """ + @spec seek(t(), float()) :: :ok | {:error, term()} + def seek(%__MODULE__{reader: ref}, time_in_seconds) do + Xav.Reader.NIF.seek(ref, time_in_seconds) + end + @doc """ Creates a new reader stream. """ diff --git a/lib/reader_nif.ex b/lib/reader_nif.ex index 0161450..2fc7525 100644 --- a/lib/reader_nif.ex +++ b/lib/reader_nif.ex @@ -12,4 +12,6 @@ defmodule Xav.Reader.NIF do do: :erlang.nif_error(:undef) def next_frame(_reader), do: :erlang.nif_error(:undef) + + def seek(_reader, _time_in_seconds), do: :erlang.nif_error(:undef) end diff --git a/test/reader_test.exs b/test/reader_test.exs index 950e892..b624598 100644 --- a/test/reader_test.exs +++ b/test/reader_test.exs @@ -17,6 +17,43 @@ defmodule Xav.ReaderTest do for _i <- 0..(30 * 5), do: assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) end + describe "seek/2" do + test "works with video" do + {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") + assert :ok = Xav.Reader.seek(r, 5.0) + assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) + end + + test "works with audio" do + {:ok, r} = Xav.Reader.new("./test/fixtures/stt/harvard.mp3", read: :audio) + assert :ok = Xav.Reader.seek(r, 5.0) + assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) + end + + test "negative timestamp just seeks to beginning of file" do + {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") + assert :ok = Xav.Reader.seek(r, -5.0) + assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) + end + + test "timestamp greater than duration" do + {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") + assert :ok = Xav.Reader.seek(r, (r.duration + 1) * 1.0) + assert({:error, :eof} = Xav.Reader.next_frame(r)) + end + + test "seek back returns same frame" do + {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") + assert :ok = Xav.Reader.seek(r, 0.0) + assert({:ok, %Xav.Frame{} = first} = Xav.Reader.next_frame(r)) + assert :ok = Xav.Reader.seek(r, 5.0) + assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) + assert :ok = Xav.Reader.seek(r, 0.0) + assert({:ok, %Xav.Frame{} = other_first} = Xav.Reader.next_frame(r)) + assert first == other_first + end + end + test "stream!" do Xav.Reader.stream!("./test/fixtures/sample_h264.mp4") |> Enum.all?(fn frame -> is_struct(frame, Xav.Frame) end)