Skip to content

Commit

Permalink
feat: use custom wordcount more faithful to skimming readers
Browse files Browse the repository at this point in the history
- use `innerText`, ignore spaces from html tags
- counts code by identifiers, ignores symbol tokens (e.g. `->`)
- ignore collapsed details and footnotes
  • Loading branch information
TrebledJ committed Nov 27, 2023
1 parent 7e0c4ad commit ca29dcf
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 1 deletion.
1 change: 1 addition & 0 deletions .eslintrc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ rules:
no-new: off
no-lonely-if: off
prefer-destructuring: off
prefer-arrow-callback: off
2 changes: 1 addition & 1 deletion _includes/post/metadata.html
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
<i class="far fa-clock post-meta-icon me-1"></i>
</span>
<span class="fs-7">
{% set words = post.content | default(content) | striptags | wordcount %}
{% set words = post.content | default(content) | wordcountFocused %}
{% set minutes = ((words + 240) / 240) | int %}
{{ minutes + " minute read" }}
</span>
Expand Down
25 changes: 25 additions & 0 deletions eleventy/filters.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@ const { nonEmptyContainerSentinel } = require('./detail/utils');
const selectHomePosts = require('./detail/select-home-posts');
const findKeywords = require('./detail/keywords');

function count(str, needle) {
return (str.match(needle) || []).length;
}

module.exports = function (eleventyConfig) {

// Formatting tokens for Luxon: https://moment.github.io/luxon/#/formatting?id=table-of-tokens
eleventyConfig.addFilter('date', (dateObj, format, zone) => (
DateTime.fromJSDate(dateObj, { zone: zone ?? 'utc' }).toFormat(format ?? 'yyyy-LL-dd').replace(/-/g, '&#8209;')
Expand Down Expand Up @@ -78,6 +83,26 @@ module.exports = function (eleventyConfig) {
return appendAfterTruncate(truncated, append);
});

if (process.env.ENVIRONMENT === 'fast') {
eleventyConfig.addFilter('wordcountFocused', content => count(content, /\s+/g));
} else {
eleventyConfig.addFilter('wordcountFocused', function (content) {
const $ = cheerio.load(content);
const codeWords = $('code').toArray().map(e => (
count($(e).prop('innerText') ?? '', /[A-Za-z_][A-Za-z0-9_-]*/g)
)).reduce((a, b) => a + b, 0);

// Ignore certain blocks to
$('code').remove();
$('details:not([open])').remove();
$('img').remove();
$('.footnotes').remove();
const article = $('.post-body').prop('innerText') ?? $('*').prop('innerText');
const words = count(article, /\s+/g);
return words + codeWords;
});
}

// A filter to murder tags and their children brutally with regex. Please don't take this comment seriously.
eleventyConfig.addFilter('annihilateTags', (html, tags) => {
const dumbHTMLRegex = tag => new RegExp(`<${tag}(\\s+\\w+\\s*=\\s*("[^"]*"|'[^']*'))*/?>.*?(</${tag}>)?`, 'ig');
Expand Down

0 comments on commit ca29dcf

Please sign in to comment.