From 5fb40a79dbc6ab8f11739ecc4eb78fab260d1d6f Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Sun, 22 Jul 2018 15:18:42 -0700 Subject: [PATCH] Optimize Sanitize#transform_node! Since transform_node! may be called in a tight loop to process thousands of items, we can optimize both memory and CPU performance by: 1. Reusing the same config hash for each transformer 2. Directly assigning values to hash instead of using merge!. Not only does merge! create a new hash, it is also 2.6x slower: https://github.com/JuanitoFatas/fast-ruby#hashmerge-vs-hash-code --- lib/sanitize.rb | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/lib/sanitize.rb b/lib/sanitize.rb index c7dd4c8..3f6f90c 100644 --- a/lib/sanitize.rb +++ b/lib/sanitize.rb @@ -96,6 +96,8 @@ def initialize(config = {}) @transformers << Transformers::CleanDoctype @transformers << Transformers::CleanCDATA + + @transformer_config = { config: @config } end # Returns a sanitized copy of the given _html_ document. @@ -217,13 +219,20 @@ def to_html(node) def transform_node!(node, node_whitelist) @transformers.each do |transformer| - result = transformer.call( - :config => @config, - :is_whitelisted => node_whitelist.include?(node), - :node => node, - :node_name => node.name.downcase, - :node_whitelist => node_whitelist - ) + # Since transform_node! may be called in a tight loop to process thousands + # of items, we can optimize both memory and CPU performance by: + # + # 1. Reusing the same config hash for each transformer + # 2. Directly assigning values to hash instead of using merge!. Not only + # does merge! create a new hash, it is also 2.6x slower: + # https://github.com/JuanitoFatas/fast-ruby#hashmerge-vs-hashmerge-code + config = @transformer_config + config[:is_whitelisted] = node_whitelist.include?(node) + config[:node] = node + config[:node_name] = node.name.downcase + config[:node_whitelist] = node_whitelist + + result = transformer.call(config) if result.is_a?(Hash) && result[:node_whitelist].respond_to?(:each) node_whitelist.merge(result[:node_whitelist])