Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: youtube base.js parsing logic #1217

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 86 additions & 36 deletions lib/sig.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ const Cache = require('./cache');
const utils = require('./utils');
const vm = require('vm');


let nTransformWarning = false;

// A shared cache to keep track of html5player js functions.
exports.cache = new Cache();

Expand All @@ -23,6 +26,49 @@ exports.getFunctions = (html5playerfile, options) => exports.cache.getOrSet(html
return functions;
});

// eslint-disable-next-line max-len
// https://github.com/TeamNewPipe/NewPipeExtractor/blob/41c8dce452aad278420715c00810b1fed0109adf/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java#L816
const DECIPHER_REGEXPS = [
'(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)' +
'\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*""\\s*\\)',
'\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)',
'\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)',
'([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(""\\)\\s*;',
'\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(""\\)\\s*;',
'\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(',
];

const DECIPHER_ARGUMENT = 'sig';
const N_ARGUMENT = 'ncode';

const matchGroup1 = (regex, str) => {
const match = str.match(new RegExp(regex));
if (!match) throw new Error(`Could not match ${regex}`);
return match[1];
};

const getFuncName = (body, regexps) => {
try {
let fn;
for (const regex of regexps) {
try {
fn = matchGroup1(regex, body);
const idx = fn.indexOf('[0]');
if (idx > -1) fn = matchGroup1(`${fn.slice(0, 3)}=\\[([a-zA-Z0-9$\\[\\]]{2,})\\]`, body);
} catch (err) {
continue;
}
}
if (!fn || fn.includes('[')) throw Error("Couldn't find fn name");
return fn;
} catch (e) {
throw Error(`Please open an issue on ytdl-core GitHub: ${e.message}`);
}
};

const getDecipherFuncName = body => getFuncName(body, DECIPHER_REGEXPS);


/**
* Extracts the actions that should be taken to decipher a signature
* and tranform the n parameter
Expand All @@ -31,44 +77,45 @@ exports.getFunctions = (html5playerfile, options) => exports.cache.getOrSet(html
* @returns {Array.<string>}
*/
exports.extractFunctions = body => {
body = body.replace(/\n|\r/g, '');
const functions = [];
const extractManipulations = caller => {
const functionName = utils.between(caller, `a=a.split("");`, `.`);
if (!functionName) return '';
const functionStart = `var ${functionName}={`;
const ndx = body.indexOf(functionStart);
if (ndx < 0) return '';
const subBody = body.slice(ndx + functionStart.length - 1);
return `var ${functionName}=${utils.cutAfterJS(subBody)}`;
};
// This is required function, so we can't continue if it's not found.
const extractDecipher = () => {
const functionName = utils.between(body, `a.set("alr","yes");c&&(c=`, `(decodeURIC`);
if (functionName && functionName.length) {
const functionStart = `${functionName}=function(a)`;
const ndx = body.indexOf(functionStart);
if (ndx >= 0) {
const subBody = body.slice(ndx + functionStart.length);
let functionBody = `var ${functionStart}${utils.cutAfterJS(subBody)}`;
functionBody = `${extractManipulations(functionBody)};${functionBody};${functionName}(sig);`;
functions.push(functionBody);
}
const decipherFuncName = getDecipherFuncName(body);
try {
const functionPattern = `(${decipherFuncName.replace(/\$/g, '\\$')}=function\\([a-zA-Z0-9_]+\\)\\{.+?\\})`;
const decipherFunction = `var ${matchGroup1(functionPattern, body)};`;
const helperObjectName = matchGroup1(';([A-Za-z0-9_\\$]{2,})\\.\\w+\\(', decipherFunction)
.replace(/\$/g, '\\$');
const helperPattern = `(var ${helperObjectName}=\\{[\\s\\S]+?\\}\\};)`;
const helperObject = matchGroup1(helperPattern, body);
const callerFunction = `${decipherFuncName}(${DECIPHER_ARGUMENT});`;
const resultFunction = helperObject + decipherFunction + callerFunction;
functions.push(resultFunction);
} catch (err) {
throw Error(`Could not parse decipher function: ${err}`);
}
};
const extractNCode = () => {
let functionName = utils.between(body, `&&(b=a.get("n"))&&(b=`, `(b)`);
if (functionName.includes('[')) functionName = utils.between(body, `${functionName.split('[')[0]}=[`, `]`);
if (functionName && functionName.length) {
const functionStart = `${functionName}=function(a)`;
const ndx = body.indexOf(functionStart);
if (ndx >= 0) {
const subBody = body.slice(ndx + functionStart.length);
const functionBody = `var ${functionStart}${utils.cutAfterJS(subBody)};${functionName}(ncode);`;
functions.push(functionBody);
// This is optional, so we can continue if it's not found, but it will bottleneck the download.
const extractNTransform = () => {
let nFuncName = utils.between(body, `(b=a.get("n"))&&(b=`, `(b)`);
if (nFuncName.includes('[')) nFuncName = utils.between(body, `${nFuncName.split('[')[0]}=[`, `]`);
if (nFuncName && nFuncName.length) {
const nBegin = `${nFuncName}=function(a)`;
const nEnd = '.join("")};';
const nFunction = utils.between(body, nBegin, nEnd);
if (nFunction) {
const callerFunction = `${nFuncName}(${N_ARGUMENT});`;
const resultFunction = nBegin + nFunction + nEnd + callerFunction;
functions.push(resultFunction);
} else if (!nTransformWarning) {
console.warn('Could not parse n transform function, please report it on @distube/ytdl-core GitHub.');
nTransformWarning = true;
}
}
};
extractDecipher();
extractNCode();
extractNTransform();
return functions;
};

Expand All @@ -82,22 +129,25 @@ exports.extractFunctions = body => {
exports.setDownloadURL = (format, decipherScript, nTransformScript) => {
const decipher = url => {
const args = querystring.parse(url);
if (!args.s || !decipherScript) return args.url;
if (!args.s) return args.url;
const components = new URL(decodeURIComponent(args.url));
components.searchParams.set(args.sp ? args.sp : 'signature',
decipherScript.runInNewContext({ sig: decodeURIComponent(args.s) }));
const context = {};
context[DECIPHER_ARGUMENT] = decodeURIComponent(args.s);
components.searchParams.set(args.sp || 'sig', decipherScript.runInNewContext(context));
return components.toString();
};
const ncode = url => {
const nTransform = url => {
const components = new URL(decodeURIComponent(url));
const n = components.searchParams.get('n');
if (!n || !nTransformScript) return url;
components.searchParams.set('n', nTransformScript.runInNewContext({ ncode: n }));
const context = {};
context[N_ARGUMENT] = n;
components.searchParams.set('n', nTransformScript.runInNewContext(context));
return components.toString();
};
const cipher = !format.url;
const url = format.url || format.signatureCipher || format.cipher;
format.url = cipher ? ncode(decipher(url)) : ncode(url);
format.url = cipher ? nTransform(decipher(url)) : nTransform(url);
delete format.signatureCipher;
delete format.cipher;
};
Expand Down
6 changes: 3 additions & 3 deletions test/basic-info-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ describe('ytdl.getBasicInfo()', () => {
});

it('Retrieves just enough metainfo without all formats', async() => {
const id = '5qap5aO4i9A';
const id = 'jfKfPfyJRdk';
const expected = require('./files/videos/live-now/expected-info.json');
const scope = nock(id, 'live-now', {
player: false,
Expand All @@ -40,7 +40,7 @@ describe('ytdl.getBasicInfo()', () => {

describe('Use `ytdl.downloadFromInfo()`', () => {
it('Throw error', async() => {
const id = '5qap5aO4i9A';
const id = 'jfKfPfyJRdk';
const scope = nock(id, 'regular', {
watchHtml: false,
player: false,
Expand Down Expand Up @@ -123,7 +123,7 @@ describe('ytdl.getBasicInfo()', () => {

describe('From a live video', () => {
it('Returns correct video metainfo', async() => {
const id = '5qap5aO4i9A';
const id = 'jfKfPfyJRdk';
const scope = nock(id, 'live-now', {
player: false,
dashmpd: false,
Expand Down
6 changes: 3 additions & 3 deletions test/download-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,7 @@ describe('Download video', () => {

describe('that is broadcasted live', () => {
it('Begins downloading video succesfully', done => {
const testId = '5qap5aO4i9A';
const testId = 'jfKfPfyJRdk';
const scope = nock(testId, 'live-now');
const stream = ytdl(testId, { filter: format => format.isHLS });
stream.on('info', (info, format) => {
Expand Down Expand Up @@ -636,7 +636,7 @@ describe('Download video', () => {

describe('end download early', () => {
it('Stops downloading video', done => {
const testId = '5qap5aO4i9A';
const testId = 'jfKfPfyJRdk';
const scope = nock(testId, 'live-now');
const stream = ytdl(testId);
stream.on('info', () => {
Expand All @@ -653,7 +653,7 @@ describe('Download video', () => {

describe('from a dash-mpd itag', () => {
it('Begins downloading video succesfully', done => {
const testId = '5qap5aO4i9A';
const testId = 'jfKfPfyJRdk';
let dashResponse = fs.readFileSync(path.resolve(__dirname, `files/videos/live-now/dash-manifest.xml`), 'utf8');
const replaceBetweenTags = (tagName, content) => {
const regex = new RegExp(`<${tagName}>(.+?)</${tagName}`, 'g');
Expand Down
2 changes: 1 addition & 1 deletion test/files/refresh.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

const videos = [
{
id: '5qap5aO4i9A',
id: 'jfKfPfyJRdk',
type: 'live-now',
saveInfo: true,
},
Expand Down
Loading