From 6e411807dcaa6b13a4607e7c4d787ed13dcedffd Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Mon, 8 Feb 2016 00:05:02 -0500 Subject: [PATCH 1/2] support for Event#from_json run specs for both Event#from_json or legacy context if possible --- lib/logstash/codecs/json.rb | 77 ++++++----- spec/codecs/json_spec.rb | 255 ++++++++++++++++++++---------------- 2 files changed, 185 insertions(+), 147 deletions(-) diff --git a/lib/logstash/codecs/json.rb b/lib/logstash/codecs/json.rb index fcc46a9..0367924 100644 --- a/lib/logstash/codecs/json.rb +++ b/lib/logstash/codecs/json.rb @@ -17,7 +17,6 @@ class LogStash::Codecs::JSON < LogStash::Codecs::Base config_name "json" - # The character encoding used in this codec. Examples include "UTF-8" and # "CP1252". # @@ -29,43 +28,57 @@ class LogStash::Codecs::JSON < LogStash::Codecs::Base # For nxlog users, you may to set this to "CP1252". config :charset, :validate => ::Encoding.name_list, :default => "UTF-8" - public def register @converter = LogStash::Util::Charset.new(@charset) @converter.logger = @logger end - public - def decode(data) - data = @converter.convert(data) - begin - decoded = LogStash::Json.load(data) - if decoded.is_a?(Array) - decoded.each {|item| yield(LogStash::Event.new(item)) } - elsif decoded.is_a?(Hash) - yield LogStash::Event.new(decoded) - else - @logger.info? && @logger.info("JSON codec received a scalar instead of an Arary or Object!", :data => data) - yield LogStash::Event.new("message" => data, "tags" => ["_jsonparsefailure"]) - end - - rescue LogStash::Json::ParserError => e - @logger.info("JSON parse failure. Falling back to plain-text", :error => e, :data => data) - yield LogStash::Event.new("message" => data, "tags" => ["_jsonparsefailure"]) - rescue StandardError => e - # This should NEVER happen. But hubris has been the cause of many pipeline breaking things - # If something bad should happen we just don't want to crash logstash here. - @logger.warn("An unexpected error occurred parsing input to JSON", - :input => data, - :message => e.message, - :class => e.class.name, - :backtrace => e.backtrace) - end - end # def decode + def decode(data, &block) + parse(@converter.convert(data), &block) + end - public def encode(event) @on_event.call(event, event.to_json) - end # def encode + end + + private + + def from_json_parse(json, &block) + LogStash::Event.from_json(json).each { |event| yield event } + rescue LogStash::Json::ParserError => e + @logger.error("JSON parse error, original data now in message field", :error => e, :data => json) + yield LogStash::Event.new("message" => json, "tags" => ["_jsonparsefailure"]) + end + + def legacy_parse(json, &block) + decoded = LogStash::Json.load(json) + + case decoded + when Array + decoded.each {|item| yield(LogStash::Event.new(item)) } + when Hash + yield LogStash::Event.new(decoded) + else + @logger.error("JSON codec is expecting array or object/map", :data => json) + yield LogStash::Event.new("message" => json, "tags" => ["_jsonparsefailure"]) + end + rescue LogStash::Json::ParserError => e + @logger.error("JSON parse failure. Falling back to plain-text", :error => e, :data => json) + yield LogStash::Event.new("message" => json, "tags" => ["_jsonparsefailure"]) + rescue StandardError => e + # This should NEVER happen. But hubris has been the cause of many pipeline breaking things + # If something bad should happen we just don't want to crash logstash here. + @logger.warn( + "An unexpected error occurred parsing JSON data", + :data => json, + :message => e.message, + :class => e.class.name, + :backtrace => e.backtrace + ) + end + + # keep compatibility with all v2.x distributions. only in 2.3 will the Event#from_json method be introduced + # and we need to keep compatibility for all v2 releases. + alias_method :parse, LogStash::Event.respond_to?(:from_json) ? :from_json_parse : :legacy_parse -end # class LogStash::Codecs::JSON +end diff --git a/spec/codecs/json_spec.rb b/spec/codecs/json_spec.rb index 25c5ca1..c7f5b54 100644 --- a/spec/codecs/json_spec.rb +++ b/spec/codecs/json_spec.rb @@ -6,161 +6,186 @@ describe LogStash::Codecs::JSON do subject do - next LogStash::Codecs::JSON.new + LogStash::Codecs::JSON.new end - context "#decode" do - it "should return an event from json data" do - data = {"foo" => "bar", "baz" => {"bah" => ["a","b","c"]}} - subject.decode(LogStash::Json.dump(data)) do |event| - insist { event.is_a? LogStash::Event } - insist { event["foo"] } == data["foo"] - insist { event["baz"] } == data["baz"] - insist { event["bah"] } == data["bah"] + shared_examples :codec do + + context "#decode" do + it "should return an event from json data" do + data = {"foo" => "bar", "baz" => {"bah" => ["a","b","c"]}} + subject.decode(LogStash::Json.dump(data)) do |event| + insist { event.is_a? LogStash::Event } + insist { event["foo"] } == data["foo"] + insist { event["baz"] } == data["baz"] + insist { event["bah"] } == data["bah"] + end end - end - it "should be fast", :performance => true do - json = '{"message":"Hello world!","@timestamp":"2013-12-21T07:01:25.616Z","@version":"1","host":"Macintosh.local","sequence":1572456}' - iterations = 500000 - count = 0 + it "should be fast", :performance => true do + json = '{"message":"Hello world!","@timestamp":"2013-12-21T07:01:25.616Z","@version":"1","host":"Macintosh.local","sequence":1572456}' + iterations = 500000 + count = 0 - # Warmup - 10000.times { subject.decode(json) { } } + # Warmup + 10000.times { subject.decode(json) { } } - start = Time.now - iterations.times do - subject.decode(json) do |event| - count += 1 + start = Time.now + iterations.times do + subject.decode(json) do |event| + count += 1 + end end + duration = Time.now - start + insist { count } == iterations + puts "codecs/json rate: #{"%02.0f/sec" % (iterations / duration)}, elapsed: #{duration}s" end - duration = Time.now - start - insist { count } == iterations - puts "codecs/json rate: #{"%02.0f/sec" % (iterations / duration)}, elapsed: #{duration}s" - end - context "processing plain text" do - it "falls back to plain text" do - decoded = false - subject.decode("something that isn't json") do |event| - decoded = true - insist { event.is_a?(LogStash::Event) } - insist { event["message"] } == "something that isn't json" - insist { event["tags"] }.include?("_jsonparsefailure") + context "processing plain text" do + it "falls back to plain text" do + decoded = false + subject.decode("something that isn't json") do |event| + decoded = true + insist { event.is_a?(LogStash::Event) } + insist { event["message"] } == "something that isn't json" + insist { event["tags"] }.include?("_jsonparsefailure") + end + insist { decoded } == true end - insist { decoded } == true end - end - describe "scalar values" do - shared_examples "given a value" do |value_arg| - context "where value is #{value_arg}" do - let(:value) { value_arg } - let(:event) { LogStash::Event.new(value) } - let(:value_json) { LogStash::Json.dump(value)} - let(:event) do - e = nil - subject.decode(value_json) do |decoded| - e = decoded + describe "scalar values" do + shared_examples "given a value" do |value_arg| + context "where value is #{value_arg}" do + let(:value) { value_arg } + let(:event) { LogStash::Event.new(value) } + let(:value_json) { LogStash::Json.dump(value)} + let(:event) do + e = nil + subject.decode(value_json) do |decoded| + e = decoded + end + e end - e - end - it "should store the value in 'message'" do - expect(event["message"]).to eql(value_json) - end + it "should store the value in 'message'" do + expect(event["message"]).to eql(value_json) + end - it "should have the json parse failure tag" do - expect(event["tags"]).to include("_jsonparsefailure") + it "should have the json parse failure tag" do + expect(event["tags"]).to include("_jsonparsefailure") + end end end - end - include_examples "given a value", 123 - include_examples "given a value", "hello" - include_examples "given a value", "-1" - include_examples "given a value", " " - end + include_examples "given a value", 123 + include_examples "given a value", "hello" + include_examples "given a value", "-1" + include_examples "given a value", " " + end - context "processing JSON with an array root" do - let(:data) { - [ - {"foo" => "bar"}, - {"foo" => "baz"} - ] - } - let(:data_json) { - LogStash::Json.dump(data) - } - - it "should yield multiple events" do - count = 0 - subject.decode(data_json) do |event| - count += 1 + context "processing JSON with an array root" do + let(:data) { + [ + {"foo" => "bar"}, + {"foo" => "baz"} + ] + } + let(:data_json) { + LogStash::Json.dump(data) + } + + it "should yield multiple events" do + count = 0 + subject.decode(data_json) do |event| + count += 1 + end + expect(count).to eql(data.count) end - expect(count).to eql(data.count) - end - it "should yield the correct objects" do - index = 0 - subject.decode(data_json) do |event| - expect(event.to_hash).to include(data[index]) - index += 1 + it "should yield the correct objects" do + index = 0 + subject.decode(data_json) do |event| + expect(event.to_hash).to include(data[index]) + index += 1 + end end end - end - context "processing weird binary blobs" do - it "falls back to plain text and doesn't crash (LOGSTASH-1595)" do - decoded = false - blob = (128..255).to_a.pack("C*").force_encoding("ASCII-8BIT") - subject.decode(blob) do |event| - decoded = true - insist { event.is_a?(LogStash::Event) } - insist { event["message"].encoding.to_s } == "UTF-8" + context "processing weird binary blobs" do + it "falls back to plain text and doesn't crash (LOGSTASH-1595)" do + decoded = false + blob = (128..255).to_a.pack("C*").force_encoding("ASCII-8BIT") + subject.decode(blob) do |event| + decoded = true + insist { event.is_a?(LogStash::Event) } + insist { event["message"].encoding.to_s } == "UTF-8" + end + insist { decoded } == true end - insist { decoded } == true end - end - context "when json could not be parsed" do + context "when json could not be parsed" do - let(:message) { "random_message" } + let(:message) { "random_message" } - it "add the failure tag" do - subject.decode(message) do |event| - expect(event).to include "tags" + it "add the failure tag" do + subject.decode(message) do |event| + expect(event).to include "tags" + end end - end - it "uses an array to store the tags" do - subject.decode(message) do |event| - expect(event['tags']).to be_a Array + it "uses an array to store the tags" do + subject.decode(message) do |event| + expect(event['tags']).to be_a Array + end + end + + it "add a json parser failure tag" do + subject.decode(message) do |event| + expect(event['tags']).to include "_jsonparsefailure" + end end end + end - it "add a json parser failure tag" do - subject.decode(message) do |event| - expect(event['tags']).to include "_jsonparsefailure" + context "#encode" do + it "should return json data" do + data = {"foo" => "bar", "baz" => {"bah" => ["a","b","c"]}} + event = LogStash::Event.new(data) + got_event = false + subject.on_event do |e, d| + insist { d.chomp } == event.to_json + insist { LogStash::Json.load(d)["foo"] } == data["foo"] + insist { LogStash::Json.load(d)["baz"] } == data["baz"] + insist { LogStash::Json.load(d)["bah"] } == data["bah"] + got_event = true end + subject.encode(event) + insist { got_event } end end end - context "#encode" do - it "should return json data" do - data = {"foo" => "bar", "baz" => {"bah" => ["a","b","c"]}} - event = LogStash::Event.new(data) - got_event = false - subject.on_event do |e, d| - insist { d.chomp } == event.to_json - insist { LogStash::Json.load(d)["foo"] } == data["foo"] - insist { LogStash::Json.load(d)["baz"] } == data["baz"] - insist { LogStash::Json.load(d)["bah"] } == data["bah"] - got_event = true + context "forcing legacy parsing" do + it_behaves_like :codec do + before(:each) do + # stub codec parse method to force use of the legacy parser. + # this is very implementation specific but I am not sure how + # this can be tested otherwise. + allow(subject).to receive(:parse) do |data, &block| + subject.send(:legacy_parse, data, &block) + end end - subject.encode(event) - insist { got_event } end end + + context "default parser choice" do + # here we cannot force the use of the Event#from_json since if this test is run in the + # legacy context (no Java Event) it will fail but if in the new context, it will be picked up. + it_behaves_like :codec do + # do nothing + end + end + end From 52baa8764292f5ddd28f2c650133ded9cb8aca49 Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Tue, 9 Feb 2016 13:03:40 -0500 Subject: [PATCH 2/2] bump version to 2.1.0 --- CHANGELOG.md | 3 +++ logstash-codec-json.gemspec | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32b8d35..cdd23b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 2.1.0 + - Backward compatible support for `Event#from_json` method https://github.com/logstash-plugins/logstash-codec-json/pull/21 + ## 2.0.4 - Reduce the size of the gem by removing the vendor files diff --git a/logstash-codec-json.gemspec b/logstash-codec-json.gemspec index a8c0b24..f632fd5 100644 --- a/logstash-codec-json.gemspec +++ b/logstash-codec-json.gemspec @@ -1,17 +1,17 @@ Gem::Specification.new do |s| s.name = 'logstash-codec-json' - s.version = '2.0.4' + s.version = '2.1.0' s.licenses = ['Apache License (2.0)'] s.summary = "This codec may be used to decode (via inputs) and encode (via outputs) full JSON messages" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" s.authors = ["Elastic"] s.email = 'info@elastic.co' s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html" - s.require_paths = ["lib"] + s.require_paths = ["lib"] # Files - s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT'] + s.files = Dir['lib/**/*','spec/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT'] # Tests s.test_files = s.files.grep(%r{^(test|spec|features)/})