From f516848b99f34e2fc346a094775638145d474d22 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Sat, 30 Dec 2023 00:34:41 +0100 Subject: [PATCH] feat(video): improve iframe detection --- packages/metascraper-video/src/index.js | 45 +++++++++-------- packages/metascraper-video/test/inframe.js | 57 ++++++++++++++++++++++ 2 files changed, 81 insertions(+), 21 deletions(-) create mode 100644 packages/metascraper-video/test/inframe.js diff --git a/packages/metascraper-video/src/index.js b/packages/metascraper-video/src/index.js index e359d664b..8bb287a12 100644 --- a/packages/metascraper-video/src/index.js +++ b/packages/metascraper-video/src/index.js @@ -3,13 +3,17 @@ const { $jsonld, $twitter, - loadIframe, findRule, + has, + loadIframe, + normalizeUrl, toRule, url: urlFn, video } = require('@metascraper/helpers') +const pReflect = require('p-reflect') + const { chain, find, isEqual } = require('lodash') const toUrl = toRule(urlFn) @@ -74,26 +78,25 @@ const _getIframe = (url, $, { src }) => const withIframe = (rules, getIframe) => rules.concat( - // async ({ htmlDom: $, url }) => { - // // TODO: write a test embedding a youtube video as iframe - // const srcs = [ - // ...new $('iframe[src^="http"], iframe[src^="/"]') - // .map((_, element) => $(element).attr('src')) - // .get() - // .map(src => normalizeUrl(url, src)) - // ] - // if (srcs.length === 0) return - // return pReflect( - // Promise.any( - // srcs.map(async src => { - // const htmlDom = await getIframe(url, $, { src }) - // const result = await findRule(audioRules, { htmlDom, url }) - // if (!has(result)) throw TypeError('no result') - // return result - // }) - // ) - // ).then(({ value }) => value) - // }, + async ({ htmlDom: $, url }) => { + const srcs = [ + ...new $('iframe[src^="http"], iframe[src^="/"]') + .map((_, element) => $(element).attr('src')) + .get() + .map(src => normalizeUrl(url, src)) + ] + if (srcs.length === 0) return + return pReflect( + Promise.any( + srcs.map(async src => { + const htmlDom = await getIframe(url, $, { src }) + const result = await findRule(videoRules, { htmlDom, url }) + if (!has(result)) throw TypeError('no result') + return result + }) + ) + ).then(({ value }) => value) + }, async ({ htmlDom: $, url }) => { const src = $twitter($, 'twitter:player') return src diff --git a/packages/metascraper-video/test/inframe.js b/packages/metascraper-video/test/inframe.js new file mode 100644 index 000000000..ca5a57b95 --- /dev/null +++ b/packages/metascraper-video/test/inframe.js @@ -0,0 +1,57 @@ +'use strict' + +const { default: listen } = require('async-listen') +const { createServer } = require('http') +const { promisify } = require('util') +const test = require('ava') + +const closeServer = server => promisify(server.close) + +const createMetascraper = (...args) => + require('metascraper')([require('../src')(...args)]) + +test('absolute http', async t => { + const server = createServer((_, res) => { + res.setHeader('Content-Type', 'text/html') + res.end( + '' + ) + }) + + t.teardown(() => closeServer(server)) + const url = (await listen(server, { port: 0, host: '0.0.0.0' })).toString() + const html = `