Skip to content

Commit

Permalink
chore: add rules.pkgName
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Jan 10, 2025
1 parent 9370e3c commit 7c047cd
Show file tree
Hide file tree
Showing 25 changed files with 216 additions and 118 deletions.
2 changes: 2 additions & 0 deletions packages/metascraper-amazon/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,7 @@ module.exports = () => {

rules.test = ({ url }) => test(url)

rules.pkgName = 'metascraper-amazon'

return rules
}
6 changes: 5 additions & 1 deletion packages/metascraper-audio/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ const _getIframe = (url, $, { src }) =>
loadIframe(url, $.load(`<iframe src="${src}"></iframe>`))

module.exports = ({ getIframe = _getIframe } = {}) => {
return {
const rules = {
audio: audioRules.concat(
async ({ htmlDom: $, url }) => {
const srcs = [
Expand Down Expand Up @@ -110,4 +110,8 @@ module.exports = ({ getIframe = _getIframe } = {}) => {
}
)
}

rules.pkgName = 'metascraper-audio'

return rules
}
52 changes: 29 additions & 23 deletions packages/metascraper-author/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,33 @@ const strict = rule => $ => {
return REGEX_STRICT.test(value) && value
}

module.exports = () => ({
author: [
toAuthor($jsonld('author.name')),
toAuthor($jsonld('brand.name')),
toAuthor($ => $('meta[name="author"]').attr('content')),
toAuthor($ => $('meta[property="article:author"]').attr('content')),
toAuthor($ => $filter($, $('[itemprop*="author" i] [itemprop="name"]'))),
toAuthor($ => $filter($, $('[itemprop*="author" i]'))),
toAuthor($ => $filter($, $('[rel="author"]'))),
strict(toAuthor($ => $filter($, $('a[class*="author" i]')))),
strict(toAuthor($ => $filter($, $('[class*="author" i] a')))),
strict(toAuthor($ => $filter($, $('a[href*="/author/" i]')))),
toAuthor($ => $filter($, $('a[class*="screenname" i]'))),
strict(toAuthor($ => $filter($, $('[class*="author" i]')))),
strict(
toAuthor($ =>
$filter($, $('[class*="byline" i]'), el => {
const value = $filter.fn(el)
return !date(value) && value
})
module.exports = () => {
const rules = {
author: [
toAuthor($jsonld('author.name')),
toAuthor($jsonld('brand.name')),
toAuthor($ => $('meta[name="author"]').attr('content')),
toAuthor($ => $('meta[property="article:author"]').attr('content')),
toAuthor($ => $filter($, $('[itemprop*="author" i] [itemprop="name"]'))),
toAuthor($ => $filter($, $('[itemprop*="author" i]'))),
toAuthor($ => $filter($, $('[rel="author"]'))),
strict(toAuthor($ => $filter($, $('a[class*="author" i]')))),
strict(toAuthor($ => $filter($, $('[class*="author" i] a')))),
strict(toAuthor($ => $filter($, $('a[href*="/author/" i]')))),
toAuthor($ => $filter($, $('a[class*="screenname" i]'))),
strict(toAuthor($ => $filter($, $('[class*="author" i]')))),
strict(
toAuthor($ =>
$filter($, $('[class*="byline" i]'), el => {
const value = $filter.fn(el)
return !date(value) && value
})
)
)
)
]
})
]
}

rules.pkgName = 'metascraper-author'

return rules
}
6 changes: 5 additions & 1 deletion packages/metascraper-clearbit/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,12 @@ module.exports = opts => {
const clearbit = createClearbit(opts)
const getClearbit = composeRule(($, url) => clearbit(parseUrl(url).domain))

return {
const rules = {
logo: getClearbit({ from: 'logo' }),
publisher: getClearbit({ from: 'name', to: 'publisher' })
}

rules.pkgName = 'metascraper-clearbit'

return rules
}
10 changes: 6 additions & 4 deletions packages/metascraper-date/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,19 @@ module.exports = (
dateModified: false
}
) => {
const result = {
const rules = {
date: dateModifiedRules().concat(datePublishedRules(), dateRules())
}

if (datePublished) {
result.datePublished = datePublishedRules()
rules.datePublished = datePublishedRules()
}

if (dateModified) {
result.dateModified = dateModifiedRules()
rules.dateModified = dateModifiedRules()
}

return result
rules.pkgName = 'metascraper-date'

return rules
}
6 changes: 5 additions & 1 deletion packages/metascraper-description/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const { $jsonld, toRule, description } = require('@metascraper/helpers')
module.exports = opts => {
const toDescription = toRule(description, opts)

return {
const rules = {
description: [
toDescription($ => $('meta[property="og:description"]').attr('content')),
toDescription($ => $('meta[name="twitter:description"]').attr('content')),
Expand All @@ -18,4 +18,8 @@ module.exports = opts => {
toDescription($jsonld('description'))
]
}

rules.pkgName = 'metascraper-description'

return rules
}
6 changes: 5 additions & 1 deletion packages/metascraper-feed/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@ const { toRule, url } = require('@metascraper/helpers')
const toUrl = toRule(url)

module.exports = () => {
return {
const rules = {
feed: [
toUrl($ => $('link[type="application/rss+xml"]').attr('href')),
toUrl($ => $('link[type="application/feed+json"]').attr('href')),
toUrl($ => $('link[type="application/atom+xml"]').attr('href'))
]
}

rules.pkgName = 'metascraper-feed'

return rules
}
2 changes: 2 additions & 0 deletions packages/metascraper-iframe/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ module.exports = ({ gotOpts } = {}) => {

rules.test = ({ url, htmlDom }) => test(url, htmlDom)

rules.pkgName = 'metascraper-iframe'

return rules
}

Expand Down
44 changes: 25 additions & 19 deletions packages/metascraper-image/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,28 @@ const toImage = toRule(image)

const getSrc = el => el.attr('src')

module.exports = () => ({
image: [
toImage($ => $('meta[property="og:image:secure_url"]').attr('content')),
toImage($ => $('meta[property="og:image:url"]').attr('content')),
toImage($ => $('meta[property="og:image"]').attr('content')),
toImage($ => $('meta[name="twitter:image:src"]').attr('content')),
toImage($ => $('meta[property="twitter:image:src"]').attr('content')),
toImage($ => $('meta[name="twitter:image"]').attr('content')),
toImage($ => $('meta[property="twitter:image"]').attr('content')),
toImage($ => $('meta[itemprop="image"]').attr('content')),
toImage($jsonld('image.0.url')),
toImage($jsonld('image.url')),
toImage($jsonld('image')),
toImage($ => $filter($, $('article img[src]'), getSrc)),
toImage($ => $filter($, $('#content img[src]'), getSrc)),
toImage($ => $('img[alt*="author" i]').attr('src')),
toImage($ => $('img[src]:not([aria-hidden="true"])').attr('src'))
]
})
module.exports = () => {
const rules = {
image: [
toImage($ => $('meta[property="og:image:secure_url"]').attr('content')),
toImage($ => $('meta[property="og:image:url"]').attr('content')),
toImage($ => $('meta[property="og:image"]').attr('content')),
toImage($ => $('meta[name="twitter:image:src"]').attr('content')),
toImage($ => $('meta[property="twitter:image:src"]').attr('content')),
toImage($ => $('meta[name="twitter:image"]').attr('content')),
toImage($ => $('meta[property="twitter:image"]').attr('content')),
toImage($ => $('meta[itemprop="image"]').attr('content')),
toImage($jsonld('image.0.url')),
toImage($jsonld('image.url')),
toImage($jsonld('image')),
toImage($ => $filter($, $('article img[src]'), getSrc)),
toImage($ => $filter($, $('#content img[src]'), getSrc)),
toImage($ => $('img[alt*="author" i]').attr('src')),
toImage($ => $('img[src]:not([aria-hidden="true"])').attr('src'))
]
}

rules.pkgName = 'metascraper-image'

return rules
}
2 changes: 2 additions & 0 deletions packages/metascraper-instagram/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,7 @@ module.exports = () => {

rules.test = ({ url }) => test(url)

rules.pkgName = 'metascraper-instagram'

return rules
}
20 changes: 13 additions & 7 deletions packages/metascraper-lang/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@ const { lang, toRule } = require('@metascraper/helpers')

const toLang = toRule(lang)

module.exports = () => ({
lang: [
toLang($ => $('meta[property="og:locale"]').attr('content')),
toLang($ => $('meta[itemprop="inLanguage"]').attr('content')),
toLang($ => $('html').attr('lang'))
]
})
module.exports = () => {
const rules = {
lang: [
toLang($ => $('meta[property="og:locale"]').attr('content')),
toLang($ => $('meta[itemprop="inLanguage"]').attr('content')),
toLang($ => $('html').attr('lang'))
]
}

rules.pkgName = 'metascraper-lang'

return rules
}
6 changes: 5 additions & 1 deletion packages/metascraper-logo-favicon/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ module.exports = ({
withGoogle
})
const rootFavicon = createRootFavicon({ getLogo, withRootFavicon })
return {
const rules = {
logo: [
toLogo(async ($, url) => {
const sizes = getSizes($, sizeSelectors, url)
Expand All @@ -251,6 +251,10 @@ module.exports = ({
rootFavicon
].filter(Boolean)
}

rules.pkgName = 'metascraper-logo-favicon'

return rules
}

module.exports.google = google
Expand Down
6 changes: 5 additions & 1 deletion packages/metascraper-logo/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module.exports = ({ filter } = {}) => {

const toLogo = toRule(mapper)

return {
const rules = {
logo: [
toLogo($ => $('meta[property="og:logo"]').attr('content')),
toLogo($ => $('meta[itemprop="logo"]').attr('content')),
Expand All @@ -35,4 +35,8 @@ module.exports = ({ filter } = {}) => {
toLogo($ => toLogoUrl($, 'logo'))
]
}

rules.pkgName = 'metascraper-logo'

return rules
}
6 changes: 5 additions & 1 deletion packages/metascraper-media-provider/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ const getDescription = ({ description }) => descriptionFn(description)
module.exports = (opts = {}) => {
const getMedia = createGetMedia(opts)

return {
const rules = {
audio: async ({ url }) => getAudio(await getMedia(url)),
author: async ({ url }) => getAuthor(await getMedia(url)),
date: async ({ url }) => getDate(await getMedia(url)),
Expand All @@ -147,6 +147,10 @@ module.exports = (opts = {}) => {
title: async ({ url }) => getTitle(await getMedia(url)),
video: async ({ url }) => getVideo(await getMedia(url))
}

rules.pkgName = 'metascraper-media-provider'

return rules
}

module.exports.getAudio = getAudio
Expand Down
70 changes: 40 additions & 30 deletions packages/metascraper-publisher/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,45 @@ const getFromTitle = (text, regex = REGEX_TITLE) => {
return result
}

module.exports = () => ({
publisher: [
toPublisher($jsonld('publisher.name')),
toPublisher($ => $('meta[property="og:site_name"]').attr('content')),
toPublisher($ => $('meta[name*="application-name" i]').attr('content')),
toPublisher($ => $('meta[name*="app-title" i]').attr('content')),
toPublisher($ => $('meta[property*="app_name" i]').attr('content')),
toPublisher($ => $('meta[name="publisher" i]').attr('content')),
toPublisher($ => $('meta[name="twitter:app:name:iphone"]').attr('content')),
toPublisher($ =>
$('meta[property="twitter:app:name:iphone"]').attr('content')
),
toPublisher($ => $('meta[name="twitter:app:name:ipad"]').attr('content')),
toPublisher($ =>
$('meta[property="twitter:app:name:ipad"]').attr('content')
),
toPublisher($ =>
$('meta[name="twitter:app:name:googleplay"]').attr('content')
),
toPublisher($ =>
$('meta[property="twitter:app:name:googleplay"]').attr('content')
),
toPublisher($ => $filter($, $('#logo'))),
toPublisher($ => $filter($, $('.logo'))),
toPublisher($ => $filter($, $('a[class*="brand" i]'))),
toPublisher($ => $('[class*="logo" i] a img[alt]').attr('alt')),
toPublisher($ => $('[class*="logo" i] img[alt]').attr('alt')),
toPublisher($ => $filter($, $('title'), el => getFromTitle($filter.fn(el))))
]
})
module.exports = () => {
const rules = {
publisher: [
toPublisher($jsonld('publisher.name')),
toPublisher($ => $('meta[property="og:site_name"]').attr('content')),
toPublisher($ => $('meta[name*="application-name" i]').attr('content')),
toPublisher($ => $('meta[name*="app-title" i]').attr('content')),
toPublisher($ => $('meta[property*="app_name" i]').attr('content')),
toPublisher($ => $('meta[name="publisher" i]').attr('content')),
toPublisher($ =>
$('meta[name="twitter:app:name:iphone"]').attr('content')
),
toPublisher($ =>
$('meta[property="twitter:app:name:iphone"]').attr('content')
),
toPublisher($ => $('meta[name="twitter:app:name:ipad"]').attr('content')),
toPublisher($ =>
$('meta[property="twitter:app:name:ipad"]').attr('content')
),
toPublisher($ =>
$('meta[name="twitter:app:name:googleplay"]').attr('content')
),
toPublisher($ =>
$('meta[property="twitter:app:name:googleplay"]').attr('content')
),
toPublisher($ => $filter($, $('#logo'))),
toPublisher($ => $filter($, $('.logo'))),
toPublisher($ => $filter($, $('a[class*="brand" i]'))),
toPublisher($ => $('[class*="logo" i] a img[alt]').attr('alt')),
toPublisher($ => $('[class*="logo" i] img[alt]').attr('alt')),
toPublisher($ =>
$filter($, $('title'), el => getFromTitle($filter.fn(el)))
)
]
}

rules.pkgName = 'metascraper-publisher'

return rules
}

module.exports.getFromTitle = getFromTitle
6 changes: 5 additions & 1 deletion packages/metascraper-readability/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@ const readability = memoizeOne((url, html) => {
const getReadbility = composeRule(($, url) => readability(url, $.html()))

module.exports = () => {
return {
const rules = {
author: getReadbility({ from: 'byline', to: 'author' }),
description: getReadbility({ from: 'excerpt', to: 'description' }),
lang: getReadbility({ from: 'lang' }),
publisher: getReadbility({ from: 'siteName', to: 'publisher' }),
title: getReadbility({ from: 'title' })
}

rules.pkgName = 'metascraper-readability'

return rules
}
Loading

0 comments on commit 7c047cd

Please sign in to comment.