From e71d8c7fac110c46ef383fa2c0dca27d3cff8385 Mon Sep 17 00:00:00 2001 From: Edward Kerry Date: Wed, 13 Jan 2021 09:45:09 +0000 Subject: [PATCH] Allow optional relaxed elements to be excluded from sanitization This change allows an optional array of `relaxed_sanitization_elements` to be passed to a new Document, and these will be excluded from HTML sanitization. --- lib/govspeak.rb | 3 ++- lib/govspeak/html_sanitizer.rb | 8 ++++++-- test/govspeak_test.rb | 5 +++++ test/html_sanitizer_test.rb | 6 ++++++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/govspeak.rb b/lib/govspeak.rb index fff6912a..98f3a127 100644 --- a/lib/govspeak.rb +++ b/lib/govspeak.rb @@ -53,6 +53,7 @@ def initialize(source, options = {}) @source = source ? source.dup : "" @images = options.delete(:images) || [] + @relaxed_sanitization_elements = options.delete(:relaxed_sanitization_elements) || [] @attachments = Array.wrap(options.delete(:attachments)) @links = Array.wrap(options.delete(:links)) @contacts = Array.wrap(options.delete(:contacts)) @@ -66,7 +67,7 @@ def initialize(source, options = {}) def to_html @to_html ||= begin html = if @options[:sanitize] - HtmlSanitizer.new(kramdown_doc.to_html).sanitize + HtmlSanitizer.new(kramdown_doc.to_html).sanitize(relaxed_elements: @relaxed_sanitization_elements) else kramdown_doc.to_html end diff --git a/lib/govspeak/html_sanitizer.rb b/lib/govspeak/html_sanitizer.rb index 4adef8e1..bdd9e166 100644 --- a/lib/govspeak/html_sanitizer.rb +++ b/lib/govspeak/html_sanitizer.rb @@ -40,12 +40,16 @@ def initialize(dirty_html, options = {}) @allowed_image_hosts = options[:allowed_image_hosts] end - def sanitize + def sanitize(relaxed_elements: []) transformers = [TableCellTextAlignWhitelister.new] if @allowed_image_hosts && @allowed_image_hosts.any? transformers << ImageSourceWhitelister.new(@allowed_image_hosts) end - Sanitize.clean(@dirty_html, Sanitize::Config.merge(sanitize_config, transformers: transformers)) + + config = sanitize_config + relaxed_elements.each { |el| config[:elements].add(el) } + + Sanitize.clean(@dirty_html, Sanitize::Config.merge(config, transformers: transformers)) end def sanitize_config diff --git a/test/govspeak_test.rb b/test/govspeak_test.rb index 41c3c767..476e47f0 100644 --- a/test/govspeak_test.rb +++ b/test/govspeak_test.rb @@ -666,6 +666,11 @@ class GovspeakTest < Minitest::Test assert_equal "", document.to_html.strip end + test "it can exclude stipulated elements from sanitization" do + document = Govspeak::Document.new("some content", relaxed_sanitization_elements: %w[uncommon-element]) + assert_equal "some content", document.to_html.strip + end + test "identifies a Govspeak document containing malicious HTML as invalid" do document = Govspeak::Document.new("") refute document.valid? diff --git a/test/html_sanitizer_test.rb b/test/html_sanitizer_test.rb index e881b3a6..14c9e08a 100644 --- a/test/html_sanitizer_test.rb +++ b/test/html_sanitizer_test.rb @@ -96,4 +96,10 @@ class HtmlSanitizerTest < Minitest::Test assert_equal "
thing
thing
", Govspeak::HtmlSanitizer.new(html).sanitize end end + + test "excludes specified elements from sanitization" do + html = "

text

" + assert_equal "

text

", Govspeak::HtmlSanitizer.new(html).sanitize + assert_equal html, Govspeak::HtmlSanitizer.new(html).sanitize(relaxed_elements: %w[custom-relaxed-element]) + end end