Fixed bug when external posts title is composed of non-ascii chars

Fixed a bug in external-posts.rb when post title is composed of non-ascii chars
alshedivat · Aug 28, 2024 · cd3f4d6 · cd3f4d6
1 parent 6c6932f
commit cd3f4d6
Showing 1 changed file with 11 additions and 2 deletions.
diff --git a/_plugins/external-posts.rb b/_plugins/external-posts.rb
@@ -42,7 +42,16 @@ def process_entries(site, src, entries)
     end
 
     def create_document(site, source_name, url, content)
-      slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
+      # check if title is composed only of whitespace or foreign characters
+      if content[:title].gsub(/[^\w]/, '').strip.empty?
+        # use the source name and last url segment as fallback
+        slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}"
+      else
+        # parse title from the post or use the source name and last url segment as fallback
+        slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
+        slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" if slug.empty?
+      end
+
       path = site.in_source_dir("_posts/#{slug}.md")
       doc = Jekyll::Document.new(
         path, { :site => site, :collection => site.collections['posts'] }
@@ -80,7 +89,7 @@ def fetch_content_from_url(url)
       html = HTTParty.get(url).body
       parsed_html = Nokogiri::HTML(html)
 
-      title = parsed_html.at('head title')&.text || ''
+      title = parsed_html.at('head title')&.text.strip || ''
       description = parsed_html.at('head meta[name="description"]')&.attr('content') || ''
       body_content = parsed_html.at('body')&.inner_html || ''