Skip to content

Commit

Permalink
feat(video): improve iframe detection
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Dec 29, 2023
1 parent c5b7a01 commit 9f220d8
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 22 deletions.
3 changes: 2 additions & 1 deletion packages/metascraper-video/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"description": "Get video property from HTML markup",
"homepage": "https://github.com/microlinkhq/metascraper/packages/metascraper-video",
"version": "5.42.5",
"main": "src/index.js",
"types": "src/index.d.ts",
"main": "src/index.js",
"author": {
"email": "hello@microlink.io",
"name": "microlink.io",
Expand All @@ -27,6 +27,7 @@
"lodash": "~4.17.21"
},
"devDependencies": {
"async-listen": "latest",
"ava": "5"
},
"engines": {
Expand Down
45 changes: 24 additions & 21 deletions packages/metascraper-video/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
const {
$jsonld,
$twitter,
loadIframe,
findRule,
has,
loadIframe,
normalizeUrl,
toRule,
url: urlFn,
video
} = require('@metascraper/helpers')

const pReflect = require('p-reflect')

const { chain, find, isEqual } = require('lodash')

const toUrl = toRule(urlFn)
Expand Down Expand Up @@ -74,26 +78,25 @@ const _getIframe = (url, $, { src }) =>

const withIframe = (rules, getIframe) =>
rules.concat(
// async ({ htmlDom: $, url }) => {
// // TODO: write a test embedding a youtube video as iframe
// const srcs = [
// ...new $('iframe[src^="http"], iframe[src^="/"]')
// .map((_, element) => $(element).attr('src'))
// .get()
// .map(src => normalizeUrl(url, src))
// ]
// if (srcs.length === 0) return
// return pReflect(
// Promise.any(
// srcs.map(async src => {
// const htmlDom = await getIframe(url, $, { src })
// const result = await findRule(audioRules, { htmlDom, url })
// if (!has(result)) throw TypeError('no result')
// return result
// })
// )
// ).then(({ value }) => value)
// },
async ({ htmlDom: $, url }) => {
const srcs = [
...new $('iframe[src^="http"], iframe[src^="/"]')
.map((_, element) => $(element).attr('src'))
.get()
.map(src => normalizeUrl(url, src))
]
if (srcs.length === 0) return
return pReflect(
Promise.any(
srcs.map(async src => {
const htmlDom = await getIframe(url, $, { src })
const result = await findRule(videoRules, { htmlDom, url })
if (!has(result)) throw TypeError('no result')
return result
})
)
).then(({ value }) => value)
},
async ({ htmlDom: $, url }) => {
const src = $twitter($, 'twitter:player')
return src
Expand Down
57 changes: 57 additions & 0 deletions packages/metascraper-video/test/inframe.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
'use strict'

const { default: listen } = require('async-listen')
const { createServer } = require('http')
const { promisify } = require('util')
const test = require('ava')

const closeServer = server => promisify(server.close)

const createMetascraper = (...args) =>
require('metascraper')([require('../src')(...args)])

test('absolute http', async t => {
const server = createServer((_, res) => {
res.setHeader('Content-Type', 'text/html')
res.end(
'<meta property="og:video" content="https://cdn.microlink.io/file-examples/sample.mp4">'
)
})

t.teardown(() => closeServer(server))
const url = (await listen(server, { port: 0, host: '0.0.0.0' })).toString()
const html = `<iframe src="${url}">`
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.is(metadata.video, 'https://cdn.microlink.io/file-examples/sample.mp4')
})

test('relative http', async t => {
const server = createServer((_, res) => {
res.setHeader('Content-Type', 'text/html')
res.end('<meta property="og:video" content="/file-examples/sample.mp4">')
})

t.teardown(() => closeServer(server))
const url = (await listen(server, { port: 0, host: '0.0.0.0' })).toString()
const html = '<iframe src="/">'
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.is(metadata.video, url + 'file-examples/sample.mp4')
})

test('ignore non http urls', async t => {
const server = createServer((_, res) => {
res.setHeader('Content-Type', 'text/html')
res.end(
'<meta property="og:video" content="tg://join?invite=n3gS0R7pjFJhMWM0">'
)
})

t.teardown(() => closeServer(server))
const url = (await listen(server, { port: 0, host: '0.0.0.0' })).toString()
const html = `<iframe src="${url}">`
const metascraper = createMetascraper()
const metadata = await metascraper({ html, url })
t.is(metadata.video, null)
})

0 comments on commit 9f220d8

Please sign in to comment.