diff --git a/README.md b/README.md index af4a615..3d5885d 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ This repository aims to investigate whether WebCodec's `VideoDecoder` can decode ### Scalable Video Coding (SVC) -Adaptive video streaming is the ability to adapt the video quality to the user's network conditions. Simulcast allows -for adaptive video streaming by independently encoding different versions of a video segment. However, this leads to an +Adaptive video streaming is the ability to adapt the video quality to the user's network conditions. Simulcast does this +by independently encoding different versions of a video segment. However, this leads to an increased storage penalty due to the redundancy. SVC reduces this redundancy by encoding a video segment into multiple different spatial and diff --git a/index.html b/index.html index 569a639..f852c1c 100644 --- a/index.html +++ b/index.html @@ -102,16 +102,10 @@
- - -
WebRTC Receiver (Remote Peer)
- +
@@ -138,18 +132,32 @@

- *There are actually two remote peers with different scalability modes. Above the WebRTC receiver, you can toggle - video elements. + *There are actually two remote computer to peer connections. One connection uses the scalability mode + L3T3 to encode video frames. The other uses S3T3. The WebRTC receiver video frame + above uses the former scalability mode. +

+

+ L3T3 and S3T3 both have three spatial layers and three temporal layers. + L3T3 chunks depend on lower layers while S3T3 chunks have no inter-layer + dependency, also known as simulcast.

-
-
-

L3T3 Encoded Frames

+
+
+
+

S3T3 Encoded Frames

+
+
+
-
+
+
+

L3T3 Encoded Frames

+
+
+
-
diff --git a/js/main.js b/js/main.js index c3afd2a..187e4de 100644 --- a/js/main.js +++ b/js/main.js @@ -78,6 +78,7 @@ hangupButton.onclick = hangup; let startToEnd; +let secondaryStartToEnd; // this is S3T3 let localStream; // eslint-disable-next-line no-unused-vars @@ -127,10 +128,10 @@ function gotStream(stream) { } -function gotRemoteStream(stream) { +function gotRemoteStream(stream, videoElement) { // console.log('Received remote stream'); remoteStream = stream; - video2.srcObject = stream; + videoElement.srcObject = stream; } async function start() { @@ -157,7 +158,7 @@ async function start() { type: "webrtc", video: { contentType: "video/av01.0.04M.08", - scalabilityMode: "L3T3", + scalabilityMode: "S3T3", width: 1280, height: 720, bitrate: 10000, @@ -176,7 +177,7 @@ async function start() { // Here we want to decode the encoded video chunk -function setupSenderTransform(sender) { +function setupSenderTransform(sender, layered) { if (window.RTCRtpScriptTransform) { sender.transform = new RTCRtpScriptTransform(worker, {operation: 'encode'}); return; @@ -185,13 +186,14 @@ function setupSenderTransform(sender) { const senderStreams = sender.createEncodedStreams(); const {readable, writable} = senderStreams; worker.postMessage({ - operation: 'encode', + operation: `encode-layered-${layered}`, readable, writable, }, [readable, writable]); } -function setupReceiverTransform(receiver) { + +function setupReceiverTransform(receiver, layered) { if (window.RTCRtpScriptTransform) { receiver.transform = new RTCRtpScriptTransform(worker, {operation: 'decode'}); return; @@ -203,7 +205,7 @@ function setupReceiverTransform(receiver) { // console.log(`receiverStreams`, receiverStreams); const {readable, writable} = receiverStreams; worker.postMessage({ - operation: 'decode', + operation: `decode-layered-${layered}`, readable, writable, }, [readable, writable]); @@ -218,6 +220,7 @@ worker.postMessage({ }, [writable]); const encodedL3T3Frames = new Map(); +const encodedS3T3Frames = new Map(); worker.onmessage = ({data}) => { if (data.operation === 'track-ready') { @@ -225,10 +228,12 @@ worker.onmessage = ({data}) => { } if (data.operation === "encoded-frame") { - const {timestamp, spatialIndex, temporalIndex, size, type} = data; + const {layered, timestamp, spatialIndex, temporalIndex, size, type} = data; + + let frameMap = layered ? encodedL3T3Frames : encodedS3T3Frames; - if (encodedL3T3Frames.has(timestamp)) { - const layers = encodedL3T3Frames.get(timestamp); + if (frameMap.has(timestamp)) { + const layers = frameMap.get(timestamp); layers.push({ spatialIndex, temporalIndex, @@ -236,11 +241,11 @@ worker.onmessage = ({data}) => { type, }); - encodedL3T3Frames.set(timestamp, layers); - updateEncodedFrame(timestamp, layers); + frameMap.set(timestamp, layers); + updateEncodedFrame(timestamp, layers, layered); } else { - encodedL3T3Frames.set(timestamp, [{ + frameMap.set(timestamp, [{ spatialIndex, temporalIndex, size, @@ -251,14 +256,14 @@ worker.onmessage = ({data}) => { spatialIndex, temporalIndex, size, - type - }]) + type, + }], layered) } } }; -function updateEncodedFrame(timestamp, frames) { - const entry = document.querySelector(`#entry-${timestamp} ul`); +function updateEncodedFrame(timestamp, frames, layered) { + const entry = document.querySelector(`#entry-${layered}-${timestamp} ul`); if (entry) { entry.innerHTML = frames.map(f => `
  • @@ -273,10 +278,10 @@ function updateEncodedFrame(timestamp, frames) { } } -function appendEncodedFrame(timestamp, frames) { - const container = document.getElementById('l3t3-entries'); +function appendEncodedFrame(timestamp, frames, layered) { + const container = document.getElementById(layered ? 'l3t3-entries' : 's3t3-entries'); const frameEntry = document.createElement('div'); - frameEntry.setAttribute('id', `entry-${timestamp}`); + frameEntry.setAttribute('id', `entry-${layered}-${timestamp}`); frameEntry.innerHTML = `
    Timestamp ${timestamp}:
      @@ -299,20 +304,29 @@ async function call() { callButton.disabled = true; hangupButton.disabled = false; - // console.log('Starting call'); + startToEnd = new VideoPipe(localStream, true, true, e => { + setupReceiverTransform(e.receiver, true); + + if (!supportsSetCodecPreferences) { + throw new Error(`Codec is not supported`); + } + gotRemoteStream(e.streams[0], video2); + }, 'L3T3'); + startToEnd.pc1.getSenders().forEach(s => setupSenderTransform(s, true)); + await startToEnd.negotiate(); - startToEnd = new VideoPipe(localStream, true, true, e => { - setupReceiverTransform(e.receiver); + secondaryStartToEnd = new VideoPipe(localStream, true, true, e => { + setupReceiverTransform(e.receiver, false); if (!supportsSetCodecPreferences) { throw new Error(`Codec is not supported`); } - gotRemoteStream(e.streams[0]); - }); - startToEnd.pc1.getSenders().forEach(setupSenderTransform); - startToEnd.negotiate(); + gotRemoteStream(e.streams[0], video2a); + }, 'S3T3'); + secondaryStartToEnd.pc1.getSenders().forEach(s => setupSenderTransform(s, false)); + await secondaryStartToEnd.negotiate(); // console.log('Video pipes created'); @@ -323,6 +337,7 @@ function hangup() { console.log(encodedL3T3Frames); startToEnd.close(); + secondaryStartToEnd.close(); hangupButton.disabled = true; callButton.disabled = false; } diff --git a/js/videopipe.js b/js/videopipe.js index 4fa1400..7ab14f9 100644 --- a/js/videopipe.js +++ b/js/videopipe.js @@ -21,7 +21,7 @@ // 'use strict'; -function VideoPipe(stream, forceSend, forceReceive, handler) { +function VideoPipe(stream, forceSend, forceReceive, handler, scalabilityMode) { this.pc1 = new RTCPeerConnection({ encodedInsertableStreams: forceSend, }); @@ -38,7 +38,7 @@ function VideoPipe(stream, forceSend, forceReceive, handler) { let tr = this.pc1.addTransceiver(track, { streams: [stream], sendEncodings: [ - {scalabilityMode: "L3T3"} + {scalabilityMode: scalabilityMode} ] }); diff --git a/js/worker.js b/js/worker.js index aa51239..2f7f7af 100644 --- a/js/worker.js +++ b/js/worker.js @@ -69,7 +69,7 @@ let highestSpatialLayer = 3, highestTemporalLayer = 3; async function handleTransform(operation, readable, writable) { - if (operation === 'encode') { + if (operation === 'encode-layered-true') { const transformer = new TransformStream({ async transform(encodedFrame, controller) { const {temporalIndex, spatialIndex, width, height} = encodedFrame.getMetadata(); @@ -79,6 +79,7 @@ async function handleTransform(operation, readable, writable) { postMessage({ operation: 'encoded-frame', + layered: true, timestamp, spatialIndex, temporalIndex, @@ -94,7 +95,38 @@ async function handleTransform(operation, readable, writable) { await readable .pipeThrough(transformer) .pipeTo(writable); - } else if (operation === 'decode') { + } else if (operation === "encode-layered-false") { + const transformer = new TransformStream({ + async transform(encodedFrame, controller) { + const {temporalIndex, spatialIndex, width, height} = encodedFrame.getMetadata(); + const {timestamp, data, type} = encodedFrame; + + const size = data.byteLength; + + console.log(`${spatialIndex}, ${temporalIndex}`) + + postMessage({ + operation: 'encoded-frame', + layered: false, + timestamp, + spatialIndex, + temporalIndex, + size, + type, + }); + + + if (spatialIndex === 2) { + controller.enqueue(encodedFrame); + } + + } + }) + + await readable + .pipeThrough(transformer) + .pipeTo(writable); + } else if (operation === 'decode-layered-true') { const transformer = new TransformStream({ async transform(encodedFrame, controller) { const {temporalIndex, spatialIndex, width, height} = encodedFrame.getMetadata(); @@ -119,6 +151,19 @@ async function handleTransform(operation, readable, writable) { await readable .pipeThrough(transformer) .pipeTo(writable); + } else if (operation === "decode-layered-false") { + const transformer = new TransformStream({ + async transform(encodedFrame, controller) { + const {temporalIndex, spatialIndex} = encodedFrame.getMetadata(); + + if (spatialIndex === 2) { + controller.enqueue(encodedFrame); + } + } + }) + await readable + .pipeThrough(transformer) + .pipeTo(writable); } } @@ -134,7 +179,7 @@ onmessage = async ({data}) => { }) } - if (operation === 'encode' || operation === 'decode') { + if (operation === 'encode-layered-true' || operation === 'encode-layered-false' || operation === 'decode-layered-true' || operation === 'decode-layered-false') { let {readable, writable} = data; return await handleTransform(operation, readable, writable); }