From cd3f4d6be533bc993f156b8ad5e4e04140ba9f22 Mon Sep 17 00:00:00 2001 From: George <31376482+george-gca@users.noreply.github.com> Date: Wed, 28 Aug 2024 15:22:20 -0300 Subject: [PATCH] Fixed bug when external posts title is composed of non-ascii chars Fixed a bug in external-posts.rb when post title is composed of non-ascii chars --- _plugins/external-posts.rb | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/_plugins/external-posts.rb b/_plugins/external-posts.rb index 91ef8d6d7f53..41a6c4360657 100644 --- a/_plugins/external-posts.rb +++ b/_plugins/external-posts.rb @@ -42,7 +42,16 @@ def process_entries(site, src, entries) end def create_document(site, source_name, url, content) - slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') + # check if title is composed only of whitespace or foreign characters + if content[:title].gsub(/[^\w]/, '').strip.empty? + # use the source name and last url segment as fallback + slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" + else + # parse title from the post or use the source name and last url segment as fallback + slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') + slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" if slug.empty? + end + path = site.in_source_dir("_posts/#{slug}.md") doc = Jekyll::Document.new( path, { :site => site, :collection => site.collections['posts'] } @@ -80,7 +89,7 @@ def fetch_content_from_url(url) html = HTTParty.get(url).body parsed_html = Nokogiri::HTML(html) - title = parsed_html.at('head title')&.text || '' + title = parsed_html.at('head title')&.text.strip || '' description = parsed_html.at('head meta[name="description"]')&.attr('content') || '' body_content = parsed_html.at('body')&.inner_html || ''