From a1b2ebbd3430175dd86bb8a4c43f082299e5b525 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 19 Feb 2020 16:11:55 -0700 Subject: [PATCH 1/3] Check expand_event_list_from_field before checking content-type --- x-pack/filebeat/input/s3/input.go | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/x-pack/filebeat/input/s3/input.go b/x-pack/filebeat/input/s3/input.go index c038797a807..9ceba051bf9 100644 --- a/x-pack/filebeat/input/s3/input.go +++ b/x-pack/filebeat/input/s3/input.go @@ -427,17 +427,6 @@ func (p *s3Input) createEventsFromS3Info(svc s3iface.ClientAPI, info s3Info, s3C defer resp.Body.Close() reader := bufio.NewReader(resp.Body) - // Check content-type - if (resp.ContentType != nil && *resp.ContentType == "application/x-gzip") || strings.HasSuffix(info.key, ".gz") { - gzipReader, err := gzip.NewReader(resp.Body) - if err != nil { - err = errors.Wrap(err, "gzip.NewReader failed") - p.logger.Error(err) - return err - } - reader = bufio.NewReader(gzipReader) - gzipReader.Close() - } // Decode JSON documents when expand_event_list_from_field is given in config if p.config.ExpandEventListFromField != "" { @@ -451,6 +440,18 @@ func (p *s3Input) createEventsFromS3Info(svc s3iface.ClientAPI, info s3Info, s3C return nil } + // Check content-type + if (resp.ContentType != nil && *resp.ContentType == "application/x-gzip") || strings.HasSuffix(info.key, ".gz") { + gzipReader, err := gzip.NewReader(resp.Body) + if err != nil { + err = errors.Wrap(err, "gzip.NewReader failed") + p.logger.Error(err) + return err + } + reader = bufio.NewReader(gzipReader) + gzipReader.Close() + } + // handle s3 objects that are not json content-type offset := 0 for { From ef7eb1c2540737c5fdc61e33ffb9996806163372 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 19 Feb 2020 16:25:04 -0700 Subject: [PATCH 2/3] update changelog --- CHANGELOG.next.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 2cc088b8a7a..35bc6b42246 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -72,6 +72,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Prevent Elasticsearch from spewing log warnings about redundant wildcards when setting up ingest pipelines for the `elasticsearch` module. {issue}15840[15840] {pull}15900[15900] - Fix mapping error for cloudtrail additionalEventData field {pull}16088[16088] - Fix a connection error in httpjson input. {pull}16123[16123] +- Fix s3 input with cloudtrail fileset reading json file. {issue}16374[16374] {pull}16441[16441] *Heartbeat* From 0c80f5e5774387c4c75cc9179bda2c561c6680e6 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 26 Feb 2020 07:37:50 -0700 Subject: [PATCH 3/3] improve documentation --- x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc index 6715b854747..fb84c486a42 100644 --- a/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-aws-s3.asciidoc @@ -59,7 +59,11 @@ If the fileset using this input expects to receive multiple messages bundled under a specific field then the config option expand_event_list_from_field value can be assigned the name of the field. This setting will be able to split the messages under the group value into separate events. For example, CloudTrail logs -are in JSON format and events are found under the JSON object "Records": +are in JSON format and events are found under the JSON object "Records". + +Note: When `expand_event_list_from_field` parameter is given in the config, s3 +input will assume the logs are in JSON format and decode them as JSON. Content +type will not be checked. [float] ==== `api_timeout`