Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GeoIP database add license files #12756

Merged
merged 6 commits into from
Mar 25, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions x-pack/lib/filters/geoip/database_manager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def initialize(geoip, database_path, database_type, vendor_path)
end
end

DEFAULT_DATABASE_FILENAME = ["GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb"].freeze
DEFAULT_DATABASE_FILENAME = %w{
GeoLite2-City.mmdb
GeoLite2-ASN.mmdb
}.map(&:freeze).freeze

public

Expand Down Expand Up @@ -98,7 +101,7 @@ def database_path
# return a valid database path or default database path
def patch_database_path(database_path)
return database_path if file_exist?(database_path)
return database_path if database_path = get_file_path("GeoLite2-#{@database_type}.mmdb") and file_exist?(database_path)
return database_path if database_path = get_file_path(database_name) and file_exist?(database_path)
raise "You must specify 'database => ...' in your geoip filter (I looked for '#{database_path}')"
end

Expand All @@ -120,11 +123,12 @@ def check_age
end
end

# Clean up files .mmdb, .gz which are not mentioned in metadata and not default database
# Clean up files .mmdb, .tgz which are not mentioned in metadata and not default database
def clean_up_database
if @metadata.exist?
protected_filenames = (@metadata.database_filenames + DEFAULT_DATABASE_FILENAME).uniq
existing_filenames = ::Dir.glob(get_file_path('*.{mmdb,gz}')).map { |path| path.split("/").last }
existing_filenames = ::Dir.glob(get_file_path("*.{#{DB_EXT},#{GZ_EXT}}"))
.map { |path| ::File.basename(path) }

(existing_filenames - protected_filenames).each do |filename|
::File.delete(get_file_path(filename))
Expand Down
7 changes: 4 additions & 3 deletions x-pack/lib/filters/geoip/database_metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def initialize(database_type, vendor_path)
# csv format: database_type, update_at, gz_md5, md5, filename
def save_timestamp(database_path)
metadata = get_metadata(false)
metadata << [@database_type, Time.now.to_i, md5(database_path + '.gz'), md5(database_path), database_path.split("/").last]
metadata << [@database_type, Time.now.to_i, md5(get_gz_name(database_path)), md5(database_path),
::File.basename(database_path)]

::CSV.open @metadata_path, 'w' do |csv|
metadata.each { |row| csv << row }
Expand Down Expand Up @@ -58,9 +59,9 @@ def updated_at
.last || 0).to_i
end

# Return database related filenames in .mmdb .gz
# Return database related filenames in .mmdb .tgz
def database_filenames
get_all.flat_map { |metadata| [metadata[Column::FILENAME], metadata[Column::FILENAME] + '.gz'] }
get_all.flat_map { |metadata| [ metadata[Column::FILENAME], get_gz_name(metadata[Column::FILENAME]) ] }
end

def exist?
Expand Down
30 changes: 19 additions & 11 deletions x-pack/lib/filters/geoip/download_manager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# or more contributor license agreements. Licensed under the Elastic License;
# you may not use this file except in compliance with the Elastic License.

require_relative '../../../../lib/bootstrap/util/compress'
require "logstash/util/loggable"
require_relative "util"
require_relative "database_metadata"
Expand All @@ -11,6 +12,7 @@
require "zlib"
require "stud/try"
require "down"
require "fileutils"

module LogStash module Filters module Geoip class DownloadManager
include LogStash::Util::Loggable
Expand All @@ -23,7 +25,8 @@ def initialize(database_type, metadata, vendor_path)
end

GEOIP_HOST = "https://geoip.elastic.co".freeze
GEOIP_ENDPOINT = "#{GEOIP_HOST}/v1/database".freeze
GEOIP_PATH = "/v1/database".freeze
GEOIP_ENDPOINT = "#{GEOIP_HOST}#{GEOIP_PATH}".freeze

public
# Check available update and download it. Unzip and validate the file.
Expand All @@ -49,7 +52,7 @@ def check_update
logger.debug("check update", :endpoint => GEOIP_ENDPOINT, :response => res.status)

dbs = JSON.parse(res.body)
target_db = dbs.select { |db| db['name'].include?(@database_type) }.first
target_db = dbs.select { |db| db['name'].eql?("#{database_name_prefix}.#{GZ_EXT}") }.first
has_update = @metadata.gz_md5 != target_db['md5_hash']
logger.info "new database version detected? #{has_update}"

Expand All @@ -58,7 +61,7 @@ def check_update

def download_database(server_db)
Stud.try(3.times) do
new_database_zip_path = get_file_path("GeoLite2-#{@database_type}_#{Time.now.to_i}.mmdb.gz")
new_database_zip_path = get_file_path("#{database_name_prefix}_#{Time.now.to_i}.#{GZ_EXT}")
Down.download(server_db['url'], destination: new_database_zip_path)
raise "the new download has wrong checksum" if md5(new_database_zip_path) != server_db['md5_hash']

Expand All @@ -67,19 +70,24 @@ def download_database(server_db)
end
end

# extract COPYRIGHT.txt, LICENSE.txt and GeoLite2-{ASN,City}.mmdb from .tgz to temp directory
def unzip(zip_path)
database_path = zip_path[0...-3]
Zlib::GzipReader.open(zip_path) do |gz|
::File.open(database_path, "wb") do |f|
f.print gz.read
end
end
database_path
new_database_path = zip_path[0...-(GZ_EXT.length)] + DB_EXT
temp_dir = Stud::Temporary.pathname

LogStash::Util::Tar.extract(zip_path, temp_dir)
logger.debug("extract database to ", :path => temp_dir)


FileUtils.cp(::File.join(temp_dir, database_name), new_database_path)
FileUtils.cp_r(::Dir.glob(::File.join(temp_dir, "{COPYRIGHT,LICENSE}.txt")), @vendor_path)

new_database_path
end

# Make sure the path has usable database
def assert_database!(database_path)
raise "failed to load database #{database_path}" unless org.logstash.filters.GeoIPFilter.database_valid?(database_path)
raise "failed to load database #{database_path}" unless org.logstash.filters.geoip.GeoIPFilter.database_valid?(database_path)
Copy link
Contributor Author

@kaisecheng kaisecheng Mar 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logstash-filter-geoip rename the package to org.logstash.filters.geoip and is waiting to merge that cause the failure of CI. Once the plugin is published, CI should be green

end

def rest_client
Expand Down
40 changes: 29 additions & 11 deletions x-pack/lib/filters/geoip/util.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,36 @@
require "digest"


module LogStash module Filters module Geoip module Util
module LogStash module Filters
module Geoip
GZ_EXT = 'tgz'.freeze
DB_EXT = 'mmdb'.freeze

def get_file_path(filename)
::File.join(@vendor_path, filename)
end
module Util
def get_file_path(filename)
::File.join(@vendor_path, filename)
end

def file_exist?(path)
!path.nil? && ::File.exist?(path) && !::File.empty?(path)
end
def file_exist?(path)
!path.nil? && ::File.exist?(path) && !::File.empty?(path)
end

def md5(file_path)
file_exist?(file_path) ? Digest::MD5.hexdigest(::File.read(file_path)): ""
end
def md5(file_path)
file_exist?(file_path) ? Digest::MD5.hexdigest(::File.read(file_path)): ""
end

end end end end
# replace *.mmdb to *.tgz
def get_gz_name(filename)
filename[0...-(DB_EXT.length)] + GZ_EXT
end

def database_name_prefix
@database_name_prefix ||= "GeoLite2-#{@database_type}"
yaauie marked this conversation as resolved.
Show resolved Hide resolved
end

def database_name
@database_name ||= database_name_prefix + '.' + DB_EXT
end
end
end
end end
6 changes: 3 additions & 3 deletions x-pack/spec/filters/geoip/database_manager_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,11 @@ module LogStash module Filters module Geoip

context "clean up database" do
let(:asn00) { get_file_path("GeoLite2-ASN_000000000.mmdb") }
let(:asn00gz) { get_file_path("GeoLite2-ASN_000000000.gz") }
let(:asn00gz) { get_file_path("GeoLite2-ASN_000000000.tgz") }
let(:city00) { get_file_path("GeoLite2-City_000000000.mmdb") }
let(:city00gz) { get_file_path("GeoLite2-City_000000000.gz") }
let(:city00gz) { get_file_path("GeoLite2-City_000000000.tgz") }
let(:city44) { get_file_path("GeoLite2-City_4444444444.mmdb") }
let(:city44gz) { get_file_path("GeoLite2-City_4444444444.gz") }
let(:city44gz) { get_file_path("GeoLite2-City_4444444444.tgz") }

before(:each) do
[asn00, asn00gz, city00, city00gz, city44, city44gz].each { |file_path| ::File.delete(file_path) if ::File.exist?(file_path) }
Expand Down
17 changes: 14 additions & 3 deletions x-pack/spec/filters/geoip/database_metadata_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,23 @@ module LogStash module Filters module Geoip
end

context "save timestamp" do
before do
::File.open(DEFAULT_CITY_GZ_PATH, "w") { |f| f.write "make a non empty file" }
end

after do
delete_file(DEFAULT_CITY_GZ_PATH)
end

it "write the current time" do
dbm.save_timestamp(DEFAULT_CITY_DB_PATH)

metadata = dbm.get_metadata.last
expect(metadata[DatabaseMetadata::Column::DATABASE_TYPE]).to eq("City")
past = metadata[DatabaseMetadata::Column::UPDATE_AT]
expect(Time.now.to_i - past.to_i).to be < 100
expect(metadata[DatabaseMetadata::Column::GZ_MD5]).to eq('')
expect(metadata[DatabaseMetadata::Column::GZ_MD5]).not_to be_empty
expect(metadata[DatabaseMetadata::Column::GZ_MD5]).to eq(md5(DEFAULT_CITY_GZ_PATH))
Copy link
Member

@yaauie yaauie Mar 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm having a hard time finding where this md5 method is coming from, and am worried that we are polluting the Object space somehow in these specs in a way that is not replicated in released artifacts. EDIT: we are in fact polluting Object, but not in the way I had guessed. See other comment about our spec helpers.

I see that this spec opens up the Geoip filter's definition, then opens up a spec context with describe, effectively monkey-patching the filter definition in the process. This is at best unclear, and at worst will cause us to validate behaviour provided by the specs themselves instead of code available from lib.

module LogStash module Filters module Geoip
  describe DatabaseMetadata, :aggregate_failures do
    # ...
  end
end end end

Should be:

context LogStash::Filters::Geoip do
  describe DatabaseMetadata, :aggregate_failures do
    # ...
  end
end

expect(metadata[DatabaseMetadata::Column::MD5]).to eq(DEFAULT_CITY_DB_MD5)
expect(metadata[DatabaseMetadata::Column::FILENAME]).to eq(DEFAULT_CITY_DB_NAME)
end
Expand Down Expand Up @@ -128,9 +137,10 @@ module LogStash module Filters module Geoip
end

context "database filenames" do
it "should give filename in .mmdb .gz" do
it "should give filename in .mmdb .tgz" do
write_temp_metadata(temp_metadata_path)
expect(dbm.database_filenames).to match_array([DEFAULT_CITY_DB_NAME, DEFAULT_ASN_DB_NAME, 'GeoLite2-City.mmdb.gz', 'GeoLite2-ASN.mmdb.gz'])
expect(dbm.database_filenames).to match_array([DEFAULT_CITY_DB_NAME, DEFAULT_ASN_DB_NAME,
'GeoLite2-City.tgz', 'GeoLite2-ASN.tgz'])
end
end

Expand All @@ -145,5 +155,6 @@ module LogStash module Filters module Geoip
expect(dbm.exist?).to be_truthy
end
end

end
end end end
44 changes: 32 additions & 12 deletions x-pack/spec/filters/geoip/download_manager_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ module LogStash module Filters module Geoip
end
let(:logger) { double("Logger") }

GEOIP_STAGING_HOST = "https://geoip.elastic.dev"
GEOIP_STAGING_ENDPOINT = "#{GEOIP_STAGING_HOST}#{LogStash::Filters::Geoip::DownloadManager::GEOIP_PATH}"

before do
stub_const('LogStash::Filters::Geoip::DownloadManager::GEOIP_ENDPOINT', GEOIP_STAGING_ENDPOINT)
end

context "rest client" do
it "can call endpoint" do
Expand All @@ -32,7 +38,9 @@ module LogStash module Filters module Geoip
context "check update" do
before(:each) do
expect(download_manager).to receive(:get_uuid).and_return(SecureRandom.uuid)
mock_resp = double("geoip_endpoint", :body => ::File.read(::File.expand_path("./fixtures/normal_resp.json", ::File.dirname(__FILE__))), :status => 200)
mock_resp = double("geoip_endpoint",
:body => ::File.read(::File.expand_path("./fixtures/normal_resp.json", ::File.dirname(__FILE__))),
:status => 200)
allow(download_manager).to receive_message_chain("rest_client.get").and_return(mock_resp)
end

Expand All @@ -50,7 +58,7 @@ module LogStash module Filters module Geoip
end

it "should return false when md5 is the same" do
expect(mock_metadata).to receive(:gz_md5).and_return("2449075797a3ecd7cd2d4ea9d01e6e8f")
expect(mock_metadata).to receive(:gz_md5).and_return("89d225ac546310b1e7979502ac9ad11c")

has_update, info = download_manager.send(:check_update)
expect(has_update).to be_falsey
Expand All @@ -75,7 +83,7 @@ module LogStash module Filters module Geoip
}
end
let(:md5_hash) { SecureRandom.hex }
let(:filename) { "GeoLite2-City.mmdb.gz"}
let(:filename) { "GeoLite2-City.tgz"}

it "should raise error if md5 does not match" do
allow(Down).to receive(:download)
Expand All @@ -86,26 +94,38 @@ module LogStash module Filters module Geoip
expect(download_manager).to receive(:md5).and_return(md5_hash)

path = download_manager.send(:download_database, db_info)
expect(path).to match /GeoLite2-City_\d+\.mmdb\.gz/
expect(path).to match /GeoLite2-City_\d+\.tgz/
expect(::File.exist?(path)).to be_truthy
::File.delete(path) if ::File.exist?(path)

delete_file(path)
end
end

context "unzip" do
before(:each) do
let(:copyright_path) { get_file_path('COPYRIGHT.txt') }
let(:license_path) { get_file_path('LICENSE.txt') }
let(:readme_path) { get_file_path('README.txt') }

before do
file_path = ::File.expand_path("./fixtures/sample", ::File.dirname(__FILE__))
::File.delete(file_path) if ::File.exist?(file_path)
delete_file(file_path, copyright_path, license_path, readme_path)
end

it "gz file" do
path = ::File.expand_path("./fixtures/sample.gz", ::File.dirname(__FILE__))
unzip_path = download_manager.send(:unzip, path)
expect(::File.exist?(unzip_path)).to be_truthy
it "should extract database and license related files" do
path = ::File.expand_path("./fixtures/sample.tgz", ::File.dirname(__FILE__))
unzip_db_path = download_manager.send(:unzip, path)

expect(unzip_db_path).to match /\.mmdb/
expect(::File.exist?(unzip_db_path)).to be_truthy
expect(::File.exist?(copyright_path)).to be_truthy
expect(::File.exist?(license_path)).to be_truthy
expect(::File.exist?(readme_path)).to be_falsey

delete_file(unzip_db_path, copyright_path, license_path)
end
end

context "assert database" do
xcontext "assert database" do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are we skipping this group of assertions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we move theGeoIPFilter.java from org.logstash.filters to org.logstash.filters.geoip in ECS support PR.
Once the plugin release a new version, this test case will reopen again

it "should raise error if file is invalid" do
expect{ download_manager.send(:assert_database!, "Gemfile") }.to raise_error /failed to load database/
end
Expand Down
Loading