Skip to content

Commit

Permalink
S3T3 comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
friendlymatthew committed Oct 9, 2024
1 parent d3360d1 commit c06068d
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 48 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ This repository aims to investigate whether WebCodec's `VideoDecoder` can decode

### Scalable Video Coding (SVC)

Adaptive video streaming is the ability to adapt the video quality to the user's network conditions. Simulcast allows
for adaptive video streaming by independently encoding different versions of a video segment. However, this leads to an
Adaptive video streaming is the ability to adapt the video quality to the user's network conditions. Simulcast does this
by independently encoding different versions of a video segment. However, this leads to an
increased storage penalty due to the redundancy.

SVC reduces this redundancy by encoding a video segment into multiple different spatial and
Expand Down
36 changes: 22 additions & 14 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,10 @@

<div>
<div style="height: 25px;">
<label for="svc-mode">Scalability mode:</label>

<select id="svc-mode" name="svc-layers">
<option value="true">L3T3</option>
<option value="false">S3T3</option>
</select>
</div>
<div>WebRTC Receiver (Remote Peer)</div>
<video autoplay id="video2" playsinline></video>
<video autoplay id="video2a" playsinline></video>
<video autoplay id="video2a" playsinline style="display: none;"></video>
</div>
<div>
<div class="video-header">
Expand All @@ -138,18 +132,32 @@

<div id="layers">
<p>
*There are actually two remote peers with different scalability modes. Above the WebRTC receiver, you can toggle
video elements.
*There are actually two remote computer to peer connections. One connection uses the scalability mode
<code>L3T3</code> to encode video frames. The other uses <code>S3T3</code>. The WebRTC receiver video frame
above uses the former scalability mode.
</p>
<p>
<code>L3T3</code> and <code>S3T3</code> both have three spatial layers and three temporal layers.
<code>L3T3</code> chunks depend on lower layers while <code>S3T3</code> chunks have no inter-layer
dependency, also known as simulcast.
</p>

<div>
<div>
<p>L3T3 Encoded Frames</p>
<div style="display: flex; ">
<div style="margin-right: 8px">
<div>
<p>S3T3 Encoded Frames</p>
</div>
<div id="s3t3-entries">
</div>
</div>
<div id="l3t3-entries">
<div>
<div>
<p>L3T3 Encoded Frames</p>
</div>
<div id="l3t3-entries">
</div>
</div>
</div>

</div>

<script src="https://webrtc.github.io/adapter/adapter-latest.js"></script>
Expand Down
69 changes: 42 additions & 27 deletions js/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ hangupButton.onclick = hangup;


let startToEnd;
let secondaryStartToEnd; // this is S3T3

let localStream;
// eslint-disable-next-line no-unused-vars
Expand Down Expand Up @@ -127,10 +128,10 @@ function gotStream(stream) {
}


function gotRemoteStream(stream) {
function gotRemoteStream(stream, videoElement) {
// console.log('Received remote stream');
remoteStream = stream;
video2.srcObject = stream;
videoElement.srcObject = stream;
}

async function start() {
Expand All @@ -157,7 +158,7 @@ async function start() {
type: "webrtc",
video: {
contentType: "video/av01.0.04M.08",
scalabilityMode: "L3T3",
scalabilityMode: "S3T3",
width: 1280,
height: 720,
bitrate: 10000,
Expand All @@ -176,7 +177,7 @@ async function start() {


// Here we want to decode the encoded video chunk
function setupSenderTransform(sender) {
function setupSenderTransform(sender, layered) {
if (window.RTCRtpScriptTransform) {
sender.transform = new RTCRtpScriptTransform(worker, {operation: 'encode'});
return;
Expand All @@ -185,13 +186,14 @@ function setupSenderTransform(sender) {
const senderStreams = sender.createEncodedStreams();
const {readable, writable} = senderStreams;
worker.postMessage({
operation: 'encode',
operation: `encode-layered-${layered}`,
readable,
writable,
}, [readable, writable]);
}

function setupReceiverTransform(receiver) {

function setupReceiverTransform(receiver, layered) {
if (window.RTCRtpScriptTransform) {
receiver.transform = new RTCRtpScriptTransform(worker, {operation: 'decode'});
return;
Expand All @@ -203,7 +205,7 @@ function setupReceiverTransform(receiver) {
// console.log(`receiverStreams`, receiverStreams);
const {readable, writable} = receiverStreams;
worker.postMessage({
operation: 'decode',
operation: `decode-layered-${layered}`,
readable,
writable,
}, [readable, writable]);
Expand All @@ -218,29 +220,32 @@ worker.postMessage({
}, [writable]);

const encodedL3T3Frames = new Map();
const encodedS3T3Frames = new Map();

worker.onmessage = ({data}) => {
if (data.operation === 'track-ready') {
video3.srcObject = new MediaStream([mediaStreamTrackGenerator]);
}

if (data.operation === "encoded-frame") {
const {timestamp, spatialIndex, temporalIndex, size, type} = data;
const {layered, timestamp, spatialIndex, temporalIndex, size, type} = data;

let frameMap = layered ? encodedL3T3Frames : encodedS3T3Frames;

if (encodedL3T3Frames.has(timestamp)) {
const layers = encodedL3T3Frames.get(timestamp);
if (frameMap.has(timestamp)) {
const layers = frameMap.get(timestamp);
layers.push({
spatialIndex,
temporalIndex,
size,
type,
});

encodedL3T3Frames.set(timestamp, layers);
updateEncodedFrame(timestamp, layers);
frameMap.set(timestamp, layers);
updateEncodedFrame(timestamp, layers, layered);

} else {
encodedL3T3Frames.set(timestamp, [{
frameMap.set(timestamp, [{
spatialIndex,
temporalIndex,
size,
Expand All @@ -251,14 +256,14 @@ worker.onmessage = ({data}) => {
spatialIndex,
temporalIndex,
size,
type
}])
type,
}], layered)
}
}
};

function updateEncodedFrame(timestamp, frames) {
const entry = document.querySelector(`#entry-${timestamp} ul`);
function updateEncodedFrame(timestamp, frames, layered) {
const entry = document.querySelector(`#entry-${layered}-${timestamp} ul`);
if (entry) {
entry.innerHTML = frames.map(f => `
<li style="padding: 2px; background-color: ${f.type === 'delta' ? 'yellow' : 'lawngreen'};">
Expand All @@ -273,10 +278,10 @@ function updateEncodedFrame(timestamp, frames) {
}
}

function appendEncodedFrame(timestamp, frames) {
const container = document.getElementById('l3t3-entries');
function appendEncodedFrame(timestamp, frames, layered) {
const container = document.getElementById(layered ? 'l3t3-entries' : 's3t3-entries');
const frameEntry = document.createElement('div');
frameEntry.setAttribute('id', `entry-${timestamp}`);
frameEntry.setAttribute('id', `entry-${layered}-${timestamp}`);
frameEntry.innerHTML = `
<div><strong>Timestamp ${timestamp}:</strong></div>
<ul>
Expand All @@ -299,20 +304,29 @@ async function call() {
callButton.disabled = true;
hangupButton.disabled = false;

// console.log('Starting call');
startToEnd = new VideoPipe(localStream, true, true, e => {
setupReceiverTransform(e.receiver, true);

if (!supportsSetCodecPreferences) {
throw new Error(`Codec is not supported`);
}

gotRemoteStream(e.streams[0], video2);
}, 'L3T3');
startToEnd.pc1.getSenders().forEach(s => setupSenderTransform(s, true));
await startToEnd.negotiate();

startToEnd = new VideoPipe(localStream, true, true, e => {
setupReceiverTransform(e.receiver);
secondaryStartToEnd = new VideoPipe(localStream, true, true, e => {
setupReceiverTransform(e.receiver, false);

if (!supportsSetCodecPreferences) {
throw new Error(`Codec is not supported`);
}

gotRemoteStream(e.streams[0]);
});
startToEnd.pc1.getSenders().forEach(setupSenderTransform);
startToEnd.negotiate();
gotRemoteStream(e.streams[0], video2a);
}, 'S3T3');
secondaryStartToEnd.pc1.getSenders().forEach(s => setupSenderTransform(s, false));
await secondaryStartToEnd.negotiate();


// console.log('Video pipes created');
Expand All @@ -323,6 +337,7 @@ function hangup() {

console.log(encodedL3T3Frames);
startToEnd.close();
secondaryStartToEnd.close();
hangupButton.disabled = true;
callButton.disabled = false;
}
Expand Down
4 changes: 2 additions & 2 deletions js/videopipe.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
//
'use strict';

function VideoPipe(stream, forceSend, forceReceive, handler) {
function VideoPipe(stream, forceSend, forceReceive, handler, scalabilityMode) {
this.pc1 = new RTCPeerConnection({
encodedInsertableStreams: forceSend,
});
Expand All @@ -38,7 +38,7 @@ function VideoPipe(stream, forceSend, forceReceive, handler) {
let tr = this.pc1.addTransceiver(track, {
streams: [stream],
sendEncodings: [
{scalabilityMode: "L3T3"}
{scalabilityMode: scalabilityMode}
]
});

Expand Down
51 changes: 48 additions & 3 deletions js/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ let highestSpatialLayer = 3, highestTemporalLayer = 3;


async function handleTransform(operation, readable, writable) {
if (operation === 'encode') {
if (operation === 'encode-layered-true') {
const transformer = new TransformStream({
async transform(encodedFrame, controller) {
const {temporalIndex, spatialIndex, width, height} = encodedFrame.getMetadata();
Expand All @@ -79,6 +79,7 @@ async function handleTransform(operation, readable, writable) {

postMessage({
operation: 'encoded-frame',
layered: true,
timestamp,
spatialIndex,
temporalIndex,
Expand All @@ -94,7 +95,38 @@ async function handleTransform(operation, readable, writable) {
await readable
.pipeThrough(transformer)
.pipeTo(writable);
} else if (operation === 'decode') {
} else if (operation === "encode-layered-false") {
const transformer = new TransformStream({
async transform(encodedFrame, controller) {
const {temporalIndex, spatialIndex, width, height} = encodedFrame.getMetadata();
const {timestamp, data, type} = encodedFrame;

const size = data.byteLength;

console.log(`${spatialIndex}, ${temporalIndex}`)

postMessage({
operation: 'encoded-frame',
layered: false,
timestamp,
spatialIndex,
temporalIndex,
size,
type,
});


if (spatialIndex === 2) {
controller.enqueue(encodedFrame);
}

}
})

await readable
.pipeThrough(transformer)
.pipeTo(writable);
} else if (operation === 'decode-layered-true') {
const transformer = new TransformStream({
async transform(encodedFrame, controller) {
const {temporalIndex, spatialIndex, width, height} = encodedFrame.getMetadata();
Expand All @@ -119,6 +151,19 @@ async function handleTransform(operation, readable, writable) {
await readable
.pipeThrough(transformer)
.pipeTo(writable);
} else if (operation === "decode-layered-false") {
const transformer = new TransformStream({
async transform(encodedFrame, controller) {
const {temporalIndex, spatialIndex} = encodedFrame.getMetadata();

if (spatialIndex === 2) {
controller.enqueue(encodedFrame);
}
}
})
await readable
.pipeThrough(transformer)
.pipeTo(writable);
}
}

Expand All @@ -134,7 +179,7 @@ onmessage = async ({data}) => {
})
}

if (operation === 'encode' || operation === 'decode') {
if (operation === 'encode-layered-true' || operation === 'encode-layered-false' || operation === 'decode-layered-true' || operation === 'decode-layered-false') {
let {readable, writable} = data;
return await handleTransform(operation, readable, writable);
}
Expand Down

0 comments on commit c06068d

Please sign in to comment.