diff --git a/packages/metascraper-amazon/src/index.js b/packages/metascraper-amazon/src/index.js index 6a5bac6d4..40c96d7a4 100644 --- a/packages/metascraper-amazon/src/index.js +++ b/packages/metascraper-amazon/src/index.js @@ -60,5 +60,7 @@ module.exports = () => { rules.test = ({ url }) => test(url) + rules.pkgName = 'metascraper-amazon' + return rules } diff --git a/packages/metascraper-audio/src/index.js b/packages/metascraper-audio/src/index.js index 9ffd9b0c7..451e54e5e 100644 --- a/packages/metascraper-audio/src/index.js +++ b/packages/metascraper-audio/src/index.js @@ -78,7 +78,7 @@ const _getIframe = (url, $, { src }) => loadIframe(url, $.load(``)) module.exports = ({ getIframe = _getIframe } = {}) => { - return { + const rules = { audio: audioRules.concat( async ({ htmlDom: $, url }) => { const srcs = [ @@ -110,4 +110,8 @@ module.exports = ({ getIframe = _getIframe } = {}) => { } ) } + + rules.pkgName = 'metascraper-audio' + + return rules } diff --git a/packages/metascraper-author/src/index.js b/packages/metascraper-author/src/index.js index 59471d34f..00563893a 100644 --- a/packages/metascraper-author/src/index.js +++ b/packages/metascraper-author/src/index.js @@ -24,27 +24,33 @@ const strict = rule => $ => { return REGEX_STRICT.test(value) && value } -module.exports = () => ({ - author: [ - toAuthor($jsonld('author.name')), - toAuthor($jsonld('brand.name')), - toAuthor($ => $('meta[name="author"]').attr('content')), - toAuthor($ => $('meta[property="article:author"]').attr('content')), - toAuthor($ => $filter($, $('[itemprop*="author" i] [itemprop="name"]'))), - toAuthor($ => $filter($, $('[itemprop*="author" i]'))), - toAuthor($ => $filter($, $('[rel="author"]'))), - strict(toAuthor($ => $filter($, $('a[class*="author" i]')))), - strict(toAuthor($ => $filter($, $('[class*="author" i] a')))), - strict(toAuthor($ => $filter($, $('a[href*="/author/" i]')))), - toAuthor($ => $filter($, $('a[class*="screenname" i]'))), - strict(toAuthor($ => $filter($, $('[class*="author" i]')))), - strict( - toAuthor($ => - $filter($, $('[class*="byline" i]'), el => { - const value = $filter.fn(el) - return !date(value) && value - }) +module.exports = () => { + const rules = { + author: [ + toAuthor($jsonld('author.name')), + toAuthor($jsonld('brand.name')), + toAuthor($ => $('meta[name="author"]').attr('content')), + toAuthor($ => $('meta[property="article:author"]').attr('content')), + toAuthor($ => $filter($, $('[itemprop*="author" i] [itemprop="name"]'))), + toAuthor($ => $filter($, $('[itemprop*="author" i]'))), + toAuthor($ => $filter($, $('[rel="author"]'))), + strict(toAuthor($ => $filter($, $('a[class*="author" i]')))), + strict(toAuthor($ => $filter($, $('[class*="author" i] a')))), + strict(toAuthor($ => $filter($, $('a[href*="/author/" i]')))), + toAuthor($ => $filter($, $('a[class*="screenname" i]'))), + strict(toAuthor($ => $filter($, $('[class*="author" i]')))), + strict( + toAuthor($ => + $filter($, $('[class*="byline" i]'), el => { + const value = $filter.fn(el) + return !date(value) && value + }) + ) ) - ) - ] -}) + ] + } + + rules.pkgName = 'metascraper-author' + + return rules +} diff --git a/packages/metascraper-clearbit/src/index.js b/packages/metascraper-clearbit/src/index.js index bdd262cff..69a368e68 100644 --- a/packages/metascraper-clearbit/src/index.js +++ b/packages/metascraper-clearbit/src/index.js @@ -43,8 +43,12 @@ module.exports = opts => { const clearbit = createClearbit(opts) const getClearbit = composeRule(($, url) => clearbit(parseUrl(url).domain)) - return { + const rules = { logo: getClearbit({ from: 'logo' }), publisher: getClearbit({ from: 'name', to: 'publisher' }) } + + rules.pkgName = 'metascraper-clearbit' + + return rules } diff --git a/packages/metascraper-date/src/index.js b/packages/metascraper-date/src/index.js index d79788af3..ac6b878d6 100644 --- a/packages/metascraper-date/src/index.js +++ b/packages/metascraper-date/src/index.js @@ -43,17 +43,19 @@ module.exports = ( dateModified: false } ) => { - const result = { + const rules = { date: dateModifiedRules().concat(datePublishedRules(), dateRules()) } if (datePublished) { - result.datePublished = datePublishedRules() + rules.datePublished = datePublishedRules() } if (dateModified) { - result.dateModified = dateModifiedRules() + rules.dateModified = dateModifiedRules() } - return result + rules.pkgName = 'metascraper-date' + + return rules } diff --git a/packages/metascraper-description/src/index.js b/packages/metascraper-description/src/index.js index ef9f7fad2..6b1679ec2 100644 --- a/packages/metascraper-description/src/index.js +++ b/packages/metascraper-description/src/index.js @@ -5,7 +5,7 @@ const { $jsonld, toRule, description } = require('@metascraper/helpers') module.exports = opts => { const toDescription = toRule(description, opts) - return { + const rules = { description: [ toDescription($ => $('meta[property="og:description"]').attr('content')), toDescription($ => $('meta[name="twitter:description"]').attr('content')), @@ -18,4 +18,8 @@ module.exports = opts => { toDescription($jsonld('description')) ] } + + rules.pkgName = 'metascraper-description' + + return rules } diff --git a/packages/metascraper-feed/src/index.js b/packages/metascraper-feed/src/index.js index c60fcbf87..a6afc3c49 100644 --- a/packages/metascraper-feed/src/index.js +++ b/packages/metascraper-feed/src/index.js @@ -5,11 +5,15 @@ const { toRule, url } = require('@metascraper/helpers') const toUrl = toRule(url) module.exports = () => { - return { + const rules = { feed: [ toUrl($ => $('link[type="application/rss+xml"]').attr('href')), toUrl($ => $('link[type="application/feed+json"]').attr('href')), toUrl($ => $('link[type="application/atom+xml"]').attr('href')) ] } + + rules.pkgName = 'metascraper-feed' + + return rules } diff --git a/packages/metascraper-iframe/src/index.js b/packages/metascraper-iframe/src/index.js index 2f813408c..aa964b75a 100644 --- a/packages/metascraper-iframe/src/index.js +++ b/packages/metascraper-iframe/src/index.js @@ -25,6 +25,8 @@ module.exports = ({ gotOpts } = {}) => { rules.test = ({ url, htmlDom }) => test(url, htmlDom) + rules.pkgName = 'metascraper-iframe' + return rules } diff --git a/packages/metascraper-image/src/index.js b/packages/metascraper-image/src/index.js index 0c4e51996..23c4f61db 100644 --- a/packages/metascraper-image/src/index.js +++ b/packages/metascraper-image/src/index.js @@ -6,22 +6,28 @@ const toImage = toRule(image) const getSrc = el => el.attr('src') -module.exports = () => ({ - image: [ - toImage($ => $('meta[property="og:image:secure_url"]').attr('content')), - toImage($ => $('meta[property="og:image:url"]').attr('content')), - toImage($ => $('meta[property="og:image"]').attr('content')), - toImage($ => $('meta[name="twitter:image:src"]').attr('content')), - toImage($ => $('meta[property="twitter:image:src"]').attr('content')), - toImage($ => $('meta[name="twitter:image"]').attr('content')), - toImage($ => $('meta[property="twitter:image"]').attr('content')), - toImage($ => $('meta[itemprop="image"]').attr('content')), - toImage($jsonld('image.0.url')), - toImage($jsonld('image.url')), - toImage($jsonld('image')), - toImage($ => $filter($, $('article img[src]'), getSrc)), - toImage($ => $filter($, $('#content img[src]'), getSrc)), - toImage($ => $('img[alt*="author" i]').attr('src')), - toImage($ => $('img[src]:not([aria-hidden="true"])').attr('src')) - ] -}) +module.exports = () => { + const rules = { + image: [ + toImage($ => $('meta[property="og:image:secure_url"]').attr('content')), + toImage($ => $('meta[property="og:image:url"]').attr('content')), + toImage($ => $('meta[property="og:image"]').attr('content')), + toImage($ => $('meta[name="twitter:image:src"]').attr('content')), + toImage($ => $('meta[property="twitter:image:src"]').attr('content')), + toImage($ => $('meta[name="twitter:image"]').attr('content')), + toImage($ => $('meta[property="twitter:image"]').attr('content')), + toImage($ => $('meta[itemprop="image"]').attr('content')), + toImage($jsonld('image.0.url')), + toImage($jsonld('image.url')), + toImage($jsonld('image')), + toImage($ => $filter($, $('article img[src]'), getSrc)), + toImage($ => $filter($, $('#content img[src]'), getSrc)), + toImage($ => $('img[alt*="author" i]').attr('src')), + toImage($ => $('img[src]:not([aria-hidden="true"])').attr('src')) + ] + } + + rules.pkgName = 'metascraper-image' + + return rules +} diff --git a/packages/metascraper-instagram/src/index.js b/packages/metascraper-instagram/src/index.js index ad5be32f2..23f464ae0 100644 --- a/packages/metascraper-instagram/src/index.js +++ b/packages/metascraper-instagram/src/index.js @@ -48,5 +48,7 @@ module.exports = () => { rules.test = ({ url }) => test(url) + rules.pkgName = 'metascraper-instagram' + return rules } diff --git a/packages/metascraper-lang/src/index.js b/packages/metascraper-lang/src/index.js index d8d6e43be..b65e5c410 100644 --- a/packages/metascraper-lang/src/index.js +++ b/packages/metascraper-lang/src/index.js @@ -4,10 +4,16 @@ const { lang, toRule } = require('@metascraper/helpers') const toLang = toRule(lang) -module.exports = () => ({ - lang: [ - toLang($ => $('meta[property="og:locale"]').attr('content')), - toLang($ => $('meta[itemprop="inLanguage"]').attr('content')), - toLang($ => $('html').attr('lang')) - ] -}) +module.exports = () => { + const rules = { + lang: [ + toLang($ => $('meta[property="og:locale"]').attr('content')), + toLang($ => $('meta[itemprop="inLanguage"]').attr('content')), + toLang($ => $('html').attr('lang')) + ] + } + + rules.pkgName = 'metascraper-lang' + + return rules +} diff --git a/packages/metascraper-logo-favicon/src/index.js b/packages/metascraper-logo-favicon/src/index.js index 6b2ed8661..a13d39e32 100644 --- a/packages/metascraper-logo-favicon/src/index.js +++ b/packages/metascraper-logo-favicon/src/index.js @@ -237,7 +237,7 @@ module.exports = ({ withGoogle }) const rootFavicon = createRootFavicon({ getLogo, withRootFavicon }) - return { + const rules = { logo: [ toLogo(async ($, url) => { const sizes = getSizes($, sizeSelectors, url) @@ -251,6 +251,10 @@ module.exports = ({ rootFavicon ].filter(Boolean) } + + rules.pkgName = 'metascraper-logo-favicon' + + return rules } module.exports.google = google diff --git a/packages/metascraper-logo/src/index.js b/packages/metascraper-logo/src/index.js index 3f66bc9b1..ae88b70c9 100644 --- a/packages/metascraper-logo/src/index.js +++ b/packages/metascraper-logo/src/index.js @@ -20,7 +20,7 @@ module.exports = ({ filter } = {}) => { const toLogo = toRule(mapper) - return { + const rules = { logo: [ toLogo($ => $('meta[property="og:logo"]').attr('content')), toLogo($ => $('meta[itemprop="logo"]').attr('content')), @@ -35,4 +35,8 @@ module.exports = ({ filter } = {}) => { toLogo($ => toLogoUrl($, 'logo')) ] } + + rules.pkgName = 'metascraper-logo' + + return rules } diff --git a/packages/metascraper-media-provider/src/index.js b/packages/metascraper-media-provider/src/index.js index 223810540..96e9ba1a6 100644 --- a/packages/metascraper-media-provider/src/index.js +++ b/packages/metascraper-media-provider/src/index.js @@ -136,7 +136,7 @@ const getDescription = ({ description }) => descriptionFn(description) module.exports = (opts = {}) => { const getMedia = createGetMedia(opts) - return { + const rules = { audio: async ({ url }) => getAudio(await getMedia(url)), author: async ({ url }) => getAuthor(await getMedia(url)), date: async ({ url }) => getDate(await getMedia(url)), @@ -147,6 +147,10 @@ module.exports = (opts = {}) => { title: async ({ url }) => getTitle(await getMedia(url)), video: async ({ url }) => getVideo(await getMedia(url)) } + + rules.pkgName = 'metascraper-media-provider' + + return rules } module.exports.getAudio = getAudio diff --git a/packages/metascraper-publisher/src/index.js b/packages/metascraper-publisher/src/index.js index 8764b47ec..5aa88c6a0 100644 --- a/packages/metascraper-publisher/src/index.js +++ b/packages/metascraper-publisher/src/index.js @@ -14,35 +14,45 @@ const getFromTitle = (text, regex = REGEX_TITLE) => { return result } -module.exports = () => ({ - publisher: [ - toPublisher($jsonld('publisher.name')), - toPublisher($ => $('meta[property="og:site_name"]').attr('content')), - toPublisher($ => $('meta[name*="application-name" i]').attr('content')), - toPublisher($ => $('meta[name*="app-title" i]').attr('content')), - toPublisher($ => $('meta[property*="app_name" i]').attr('content')), - toPublisher($ => $('meta[name="publisher" i]').attr('content')), - toPublisher($ => $('meta[name="twitter:app:name:iphone"]').attr('content')), - toPublisher($ => - $('meta[property="twitter:app:name:iphone"]').attr('content') - ), - toPublisher($ => $('meta[name="twitter:app:name:ipad"]').attr('content')), - toPublisher($ => - $('meta[property="twitter:app:name:ipad"]').attr('content') - ), - toPublisher($ => - $('meta[name="twitter:app:name:googleplay"]').attr('content') - ), - toPublisher($ => - $('meta[property="twitter:app:name:googleplay"]').attr('content') - ), - toPublisher($ => $filter($, $('#logo'))), - toPublisher($ => $filter($, $('.logo'))), - toPublisher($ => $filter($, $('a[class*="brand" i]'))), - toPublisher($ => $('[class*="logo" i] a img[alt]').attr('alt')), - toPublisher($ => $('[class*="logo" i] img[alt]').attr('alt')), - toPublisher($ => $filter($, $('title'), el => getFromTitle($filter.fn(el)))) - ] -}) +module.exports = () => { + const rules = { + publisher: [ + toPublisher($jsonld('publisher.name')), + toPublisher($ => $('meta[property="og:site_name"]').attr('content')), + toPublisher($ => $('meta[name*="application-name" i]').attr('content')), + toPublisher($ => $('meta[name*="app-title" i]').attr('content')), + toPublisher($ => $('meta[property*="app_name" i]').attr('content')), + toPublisher($ => $('meta[name="publisher" i]').attr('content')), + toPublisher($ => + $('meta[name="twitter:app:name:iphone"]').attr('content') + ), + toPublisher($ => + $('meta[property="twitter:app:name:iphone"]').attr('content') + ), + toPublisher($ => $('meta[name="twitter:app:name:ipad"]').attr('content')), + toPublisher($ => + $('meta[property="twitter:app:name:ipad"]').attr('content') + ), + toPublisher($ => + $('meta[name="twitter:app:name:googleplay"]').attr('content') + ), + toPublisher($ => + $('meta[property="twitter:app:name:googleplay"]').attr('content') + ), + toPublisher($ => $filter($, $('#logo'))), + toPublisher($ => $filter($, $('.logo'))), + toPublisher($ => $filter($, $('a[class*="brand" i]'))), + toPublisher($ => $('[class*="logo" i] a img[alt]').attr('alt')), + toPublisher($ => $('[class*="logo" i] img[alt]').attr('alt')), + toPublisher($ => + $filter($, $('title'), el => getFromTitle($filter.fn(el))) + ) + ] + } + + rules.pkgName = 'metascraper-publisher' + + return rules +} module.exports.getFromTitle = getFromTitle diff --git a/packages/metascraper-readability/src/index.js b/packages/metascraper-readability/src/index.js index 1c39083c0..dd6bdf00d 100644 --- a/packages/metascraper-readability/src/index.js +++ b/packages/metascraper-readability/src/index.js @@ -22,11 +22,15 @@ const readability = memoizeOne((url, html) => { const getReadbility = composeRule(($, url) => readability(url, $.html())) module.exports = () => { - return { + const rules = { author: getReadbility({ from: 'byline', to: 'author' }), description: getReadbility({ from: 'excerpt', to: 'description' }), lang: getReadbility({ from: 'lang' }), publisher: getReadbility({ from: 'siteName', to: 'publisher' }), title: getReadbility({ from: 'title' }) } + + rules.pkgName = 'metascraper-readability' + + return rules } diff --git a/packages/metascraper-soundcloud/src/index.js b/packages/metascraper-soundcloud/src/index.js index 44ff0ba59..7dee4b6c4 100644 --- a/packages/metascraper-soundcloud/src/index.js +++ b/packages/metascraper-soundcloud/src/index.js @@ -24,5 +24,7 @@ module.exports = () => { rules.test = ({ url }) => test(url) + rules.pkgName = 'metascraper-soundcloud' + return rules } diff --git a/packages/metascraper-spotify/src/index.js b/packages/metascraper-spotify/src/index.js index b2d5cdc8f..f717c2323 100644 --- a/packages/metascraper-spotify/src/index.js +++ b/packages/metascraper-spotify/src/index.js @@ -72,6 +72,8 @@ module.exports = ({ gotOpts, keyvOpts } = {}) => { rules.test = ({ url }) => test(url) + rules.pkgName = 'metascraper-spotify' + return rules } diff --git a/packages/metascraper-telegram/src/index.js b/packages/metascraper-telegram/src/index.js index be14c5130..c55db498f 100644 --- a/packages/metascraper-telegram/src/index.js +++ b/packages/metascraper-telegram/src/index.js @@ -73,6 +73,8 @@ module.exports = ({ gotOpts, keyvOpts } = {}) => { rules.test = ({ url }) => test(url) + rules.pkgName = 'metascraper-telegram' + return rules } diff --git a/packages/metascraper-title/src/index.js b/packages/metascraper-title/src/index.js index 4111a19c6..7a3e6226b 100644 --- a/packages/metascraper-title/src/index.js +++ b/packages/metascraper-title/src/index.js @@ -4,16 +4,22 @@ const { $jsonld, $filter, title, toRule } = require('@metascraper/helpers') const toTitle = toRule(title) -module.exports = () => ({ - title: [ - toTitle($ => $('meta[property="og:title"]').attr('content')), - toTitle($ => $('meta[name="twitter:title"]').attr('content')), - toTitle($ => $('meta[property="twitter:title"]').attr('content')), - toTitle($ => $filter($, $('title'))), - toTitle($jsonld('headline')), - toTitle($ => $filter($, $('.post-title'))), - toTitle($ => $filter($, $('.entry-title'))), - toTitle($ => $filter($, $('h1[class*="title" i] a'))), - toTitle($ => $filter($, $('h1[class*="title" i]'))) - ] -}) +module.exports = () => { + const rules = { + title: [ + toTitle($ => $('meta[property="og:title"]').attr('content')), + toTitle($ => $('meta[name="twitter:title"]').attr('content')), + toTitle($ => $('meta[property="twitter:title"]').attr('content')), + toTitle($ => $filter($, $('title'))), + toTitle($jsonld('headline')), + toTitle($ => $filter($, $('.post-title'))), + toTitle($ => $filter($, $('.entry-title'))), + toTitle($ => $filter($, $('h1[class*="title" i] a'))), + toTitle($ => $filter($, $('h1[class*="title" i]'))) + ] + } + + rules.pkgName = 'metascraper-title' + + return rules +} diff --git a/packages/metascraper-uol/src/index.js b/packages/metascraper-uol/src/index.js index bab0e3ae1..fab870858 100644 --- a/packages/metascraper-uol/src/index.js +++ b/packages/metascraper-uol/src/index.js @@ -31,6 +31,8 @@ module.exports = () => { rules.test = ({ url }) => test(url) + rules.pkgName = 'metascraper-uol' + return rules } diff --git a/packages/metascraper-url/src/index.js b/packages/metascraper-url/src/index.js index 4b37c132f..68295beca 100644 --- a/packages/metascraper-url/src/index.js +++ b/packages/metascraper-url/src/index.js @@ -4,13 +4,19 @@ const { toRule, url: urlFn } = require('@metascraper/helpers') const toUrl = toRule(urlFn) -module.exports = () => ({ - url: [ - toUrl($ => $('meta[property="og:url"]').attr('content')), - toUrl($ => $('meta[name="twitter:url"]').attr('content')), - toUrl($ => $('meta[property="twitter:url"]').attr('content')), - toUrl($ => $('link[rel="canonical"]').attr('href')), - toUrl($ => $('link[rel="alternate"][hreflang="x-default"]').attr('href')), - ({ url }) => url - ] -}) +module.exports = () => { + const rules = { + url: [ + toUrl($ => $('meta[property="og:url"]').attr('content')), + toUrl($ => $('meta[name="twitter:url"]').attr('content')), + toUrl($ => $('meta[property="twitter:url"]').attr('content')), + toUrl($ => $('link[rel="canonical"]').attr('href')), + toUrl($ => $('link[rel="alternate"][hreflang="x-default"]').attr('href')), + ({ url }) => url + ] + } + + rules.pkgName = 'metascraper-url' + + return rules +} diff --git a/packages/metascraper-video/src/index.js b/packages/metascraper-video/src/index.js index 65ca1cb82..04975e255 100644 --- a/packages/metascraper-video/src/index.js +++ b/packages/metascraper-video/src/index.js @@ -109,7 +109,13 @@ const withIframe = (rules, getIframe) => } ) -module.exports = ({ getIframe = _getIframe } = {}) => ({ - image: withIframe(imageRules, getIframe), - video: withIframe(videoRules, getIframe) -}) +module.exports = ({ getIframe = _getIframe } = {}) => { + const rules = { + image: withIframe(imageRules, getIframe), + video: withIframe(videoRules, getIframe) + } + + rules.pkgName = 'metascraper-video' + + return rules +} diff --git a/packages/metascraper-x/src/index.js b/packages/metascraper-x/src/index.js index 6db724ac5..cf26b5bad 100644 --- a/packages/metascraper-x/src/index.js +++ b/packages/metascraper-x/src/index.js @@ -69,6 +69,8 @@ module.exports = ({ resolveUrls = false, resolveUrl = url => url } = {}) => { rules.test = ({ url }) => test(url) + rules.pkgName = 'metascraper-x' + return rules } diff --git a/packages/metascraper-youtube/src/index.js b/packages/metascraper-youtube/src/index.js index bb47cf2db..6936ced0e 100644 --- a/packages/metascraper-youtube/src/index.js +++ b/packages/metascraper-youtube/src/index.js @@ -56,6 +56,8 @@ module.exports = ({ gotOpts } = {}) => { rules.test = ({ url }) => test(url) + rules.pkgName = 'metascraper-youtube' + return rules }