Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

import-beats: copy ingest pipelines #270

Merged
merged 4 commits into from
Mar 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions dev/import-beats/datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@ import (
)

type datasetContent struct {
fields fieldsContent
fields fieldsContent
elasticsearch elasticsearchContent
}

type fieldsContent struct {
files map[string][]byte
type datasetManifestMultiplePipelines struct {
IngestPipeline []string `yaml:"ingest_pipeline"`
}

type datasetManifestSinglePipeline struct {
IngestPipeline string `yaml:"ingest_pipeline"`
}

func createDatasets(modulePath, moduleName string) (map[string]datasetContent, error) {
Expand All @@ -43,7 +48,8 @@ func createDatasets(modulePath, moduleName string) (map[string]datasetContent, e
continue
}

_, err := os.Stat(path.Join(modulePath, datasetName, "_meta"))
datasetPath := path.Join(modulePath, datasetName)
_, err := os.Stat(path.Join(datasetPath, "_meta"))
if os.IsNotExist(err) {
log.Printf("\t%s: not a valid dataset, skipped", datasetName)
continue
Expand All @@ -57,13 +63,19 @@ func createDatasets(modulePath, moduleName string) (map[string]datasetContent, e
return nil, errors.Wrapf(err, "loading dataset fields failed (modulePath: %s, datasetName: %s)",
modulePath, datasetName)
}

content.fields = fieldsContent{
files: map[string][]byte{
"package-fields.yml": moduleFieldsFiles,
"fields.yml": fieldsFiles,
},
}

elasticsearch, err := loadElasticsearchContent(datasetPath)
if err != nil {
return nil, errors.Wrapf(err, "loading elasticsearch content failed (datasetPath: %s)", datasetPath)
}
content.elasticsearch = elasticsearch

contents[datasetName] = content
}
return contents, nil
Expand Down
101 changes: 101 additions & 0 deletions dev/import-beats/elasticsearch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package main

import (
"fmt"
"io/ioutil"
"log"
"os"
"path"
"strings"

"github.com/pkg/errors"
"gopkg.in/yaml.v2"
)

type elasticsearchContent struct {
ingestPipelines []ingestPipelineContent
}

type ingestPipelineContent struct {
source string
targetFileName string
}

func loadElasticsearchContent(datasetPath string) (elasticsearchContent, error) {
var esc elasticsearchContent

datasetManifestPath := path.Join(datasetPath, "manifest.yml")
datasetManifestFile, err := ioutil.ReadFile(datasetManifestPath)
if os.IsNotExist(err) {
return elasticsearchContent{}, nil // no manifest.yml file found,
}
if err != nil {
return elasticsearchContent{}, errors.Wrapf(err, "reading dataset manifest file failed (path: %s)", datasetManifestPath)
}

var ingestPipelines []string
var dmsp datasetManifestSinglePipeline
err = yaml.Unmarshal(datasetManifestFile, &dmsp)
if err == nil {
if len(dmsp.IngestPipeline) > 0 {
ingestPipelines = append(ingestPipelines, dmsp.IngestPipeline)
}
} else {
var dmmp datasetManifestMultiplePipelines
err = yaml.Unmarshal(datasetManifestFile, &dmmp)
if err != nil {
return elasticsearchContent{}, errors.Wrapf(err, "unmarshalling dataset manifest file failed (path: %s)", datasetManifestPath)
}

if len(dmmp.IngestPipeline) > 0 {
ingestPipelines = append(ingestPipelines, dmmp.IngestPipeline...)
}
}

for _, ingestPipeline := range ingestPipelines {
ingestPipeline = ensurePipelineFormat(ingestPipeline)

log.Printf("\tingest-pipeline found: %s", ingestPipeline)

targetFileName, err := determineIngestPipelineTargetName(ingestPipeline)
if err != nil {
return elasticsearchContent{}, errors.Wrapf(err, "can't determine ingest pipeline target name (path: %s)", ingestPipeline)
}
esc.ingestPipelines = append(esc.ingestPipelines, ingestPipelineContent{
source: path.Join(datasetPath, ingestPipeline),
targetFileName: targetFileName,
})
}

return esc, nil
}

func ensurePipelineFormat(ingestPipeline string) string {
if strings.Contains(ingestPipeline, "{{.format}}") {
ingestPipeline = strings.ReplaceAll(ingestPipeline, "{{.format}}", "json")
}
return ingestPipeline
}

func determineIngestPipelineTargetName(path string) (string, error) {
fileName := path
if strings.Contains(path, "/") {
fileName = path[strings.LastIndex(path, "/")+1:]
}

lastDot := strings.LastIndex(fileName, ".")
if lastDot == -1 {
return "", fmt.Errorf("ingest pipeline file must have an extension")
}
fileNameWithoutExt := fileName[:lastDot]
fileExt := fileName[lastDot+1:]

if fileNameWithoutExt == "pipeline" || fileNameWithoutExt == "pipeline-entry" {
return "default." + fileExt, nil
}
return fileName, nil
}
4 changes: 4 additions & 0 deletions dev/import-beats/fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ import (
"github.com/pkg/errors"
)

type fieldsContent struct {
files map[string][]byte
}

func loadModuleFields(modulePath string) ([]byte, error) {
moduleFieldsPath := filepath.Join(modulePath, "_meta", "fields.yml")
moduleFieldsFile, err := os.Open(moduleFieldsPath)
Expand Down
28 changes: 24 additions & 4 deletions dev/import-beats/packages.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,22 @@ func (r *packageRepository) save(outputDir string) error {
}
}
}

if len(dataset.elasticsearch.ingestPipelines) > 0 {
ingestPipelinePath := filepath.Join(datasetPath, "elasticsearch", "ingest-pipeline")
err := os.MkdirAll(ingestPipelinePath, 0755)
if err != nil {
return errors.Wrapf(err, "cannot make directory for dataset ingest pipelines: '%s'", ingestPipelinePath)
}

for _, ingestPipeline := range dataset.elasticsearch.ingestPipelines {
log.Printf("\tcopy ingest pipeline file '%s' to '%s'", ingestPipeline.source, ingestPipelinePath)
err := copyFileToTarget(ingestPipeline.source, ingestPipelinePath, ingestPipeline.targetFileName)
if err != nil {
return errors.Wrapf(err, "copying file failed")
}
}
}
}

// img
Expand Down Expand Up @@ -289,16 +305,20 @@ func (r *packageRepository) save(outputDir string) error {
}

func copyFile(src, dstDir string) error {
i := strings.LastIndex(src, "/")
sourceFileName := src[i:]

return copyFileToTarget(src, dstDir, sourceFileName)
}

func copyFileToTarget(src, dstDir, targetFileName string) error {
sourceFile, err := os.Open(src)
if err != nil {
return errors.Wrapf(err, "opening file failed (src: %s)", src)
}
defer sourceFile.Close()

i := strings.LastIndex(sourceFile.Name(), "/")
sourceFileName := sourceFile.Name()[i:]

dst := path.Join(dstDir, sourceFileName)
dst := path.Join(dstDir, targetFileName)
err = os.MkdirAll(dstDir, 0755)
if err != nil {
return errors.Wrapf(err, "cannot make directory: '%s'", dst)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---
description: Pipeline for parsing ActiveMQ audit logs.
processors:
- grok:
field: message
pattern_definitions:
NOPIPEGREEDYDATA: "((?! \\|).)*"
THREAD_NAME: "((?! \n).)*"
patterns:
- "%{LOGLEVEL:log.level}%{SPACE}\\|%{SPACE}%{WORD:activemq.user}%{SPACE}%{NOPIPEGREEDYDATA:message}%{SPACE}\\|%{SPACE}%{THREAD_NAME:activemq.thread}"
ignore_missing: true
- set:
field: event.kind
value: event
- set:
if: "ctx?.activemq?.user != null"
field: user.name
value: "{{activemq.user}}"
- script:
if: "ctx?.log?.level != null"
lang: painless
source: >-
def err_levels = ["FATAL", "ERROR", "WARN"];
if (err_levels.contains(ctx.log.level)) {
ctx.event.type = "error";
} else {
ctx.event.type = "info";
}
on_failure:
- set:
field: error.message
value: "{{ _ingest.on_failure_message }}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
---
description: Pipeline for parsing ActiveMQ logs.
processors:
- grok:
field: message
pattern_definitions:
GREEDYMULTILINE: "(.|\\n|\\t)*"
NOPIPEGREEDYDATA: "((?! \\|).)*"
THREAD_NAME: "((?! \n).)*"
patterns:
- "%{TIMESTAMP_ISO8601:timestamp}%{SPACE}\\|%{SPACE}%{LOGLEVEL:log.level}%{SPACE}\\|%{SPACE}%{NOPIPEGREEDYDATA:message}%{SPACE}\\|%{SPACE}%{NOPIPEGREEDYDATA:activemq.caller}%{SPACE}\\|%{SPACE}%{THREAD_NAME:activemq.thread}%{SPACE}?%{GREEDYMULTILINE:activemq.log.stack_trace}"
ignore_missing: true
- date:
if: "ctx.event.timezone == null"
field: timestamp
target_field: "@timestamp"
formats: ["yyyy-MM-dd HH:mm:ss,SSS"]
- date:
if: "ctx.event.timezone != null"
field: "timestamp"
target_field: "@timestamp"
timezone: "{{ event.timezone }}"
formats: ["yyyy-MM-dd HH:mm:ss,SSS"]
- remove:
field:
- timestamp
- set:
field: event.kind
value: event
- script:
if: "ctx?.log?.level != null"
lang: painless
source: >-
def err_levels = ["FATAL", "ERROR", "WARN"];
if (err_levels.contains(ctx.log.level)) {
ctx.event.type = "error";
} else {
ctx.event.type = "info";
}
on_failure:
- set:
field: error.message
value: "{{ _ingest.on_failure_message }}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
description: "Pipeline for parsing Apache HTTP Server access logs. Requires the geoip and user_agent plugins."

processors:
- grok:
field: message
patterns:
- '%{IPORHOST:destination.domain} %{IPORHOST:source.ip} - %{DATA:user.name} \[%{HTTPDATE:apache.access.time}\]
"(?:%{WORD:http.request.method} %{DATA:url.original} HTTP/%{NUMBER:http.version}|-)?"
%{NUMBER:http.response.status_code:long} (?:%{NUMBER:http.response.body.bytes:long}|-)(
"%{DATA:http.request.referrer}")?( "%{DATA:user_agent.original}")?'
- '%{IPORHOST:source.address} - %{DATA:user.name} \[%{HTTPDATE:apache.access.time}\]
"(?:%{WORD:http.request.method} %{DATA:url.original} HTTP/%{NUMBER:http.version}|-)?"
%{NUMBER:http.response.status_code:long} (?:%{NUMBER:http.response.body.bytes:long}|-)(
"%{DATA:http.request.referrer}")?( "%{DATA:user_agent.original}")?'
- '%{IPORHOST:source.address} - %{DATA:user.name} \[%{HTTPDATE:apache.access.time}\]
"-" %{NUMBER:http.response.status_code:long} -'
- \[%{HTTPDATE:apache.access.time}\] %{IPORHOST:source.address} %{DATA:apache.access.ssl.protocol}
%{DATA:apache.access.ssl.cipher} "%{WORD:http.request.method} %{DATA:url.original}
HTTP/%{NUMBER:http.version}" (-|%{NUMBER:http.response.body.bytes:long})
ignore_missing: true
- remove:
field: message
- set:
field: event.kind
value: event
- set:
field: event.category
value: web
- set:
field: event.outcome
value: success
if: "ctx?.http?.response?.status_code != null && ctx.http.response.status_code < 400"
- set:
field: event.outcome
value: failure
if: "ctx?.http?.response?.status_code != null && ctx.http.response.status_code > 399"
- lowercase:
field: http.request.method
ignore_missing: true
- grok:
field: source.address
ignore_missing: true
patterns:
- ^(%{IP:source.ip}|%{HOSTNAME:source.domain})$
- rename:
field: '@timestamp'
target_field: event.created
- date:
field: apache.access.time
target_field: '@timestamp'
formats:
- dd/MMM/yyyy:H:m:s Z
ignore_failure: true
- remove:
field: apache.access.time
ignore_failure: true
- user_agent:
field: user_agent.original
ignore_failure: true
- geoip:
field: source.ip
target_field: source.geo
ignore_missing: true
- geoip:
database_file: GeoLite2-ASN.mmdb
field: source.ip
target_field: source.as
properties:
- asn
- organization_name
ignore_missing: true
- rename:
field: source.as.asn
target_field: source.as.number
ignore_missing: true
- rename:
field: source.as.organization_name
target_field: source.as.organization.name
ignore_missing: true
- set:
field: tls.cipher
value: '{{apache.access.ssl.cipher}}'
if: ctx?.apache?.access?.ssl?.cipher != null

- script:
lang: painless
if: ctx?.apache?.access?.ssl?.protocol != null
source: >-
def parts = ctx.apache.access.ssl.protocol.toLowerCase().splitOnToken("v");
if (parts.length != 2) {
return;
}
if (parts[1].contains(".")) {
ctx.tls.version = parts[1];
} else {
ctx.tls.version = parts[1] + ".0";
}
ctx.tls.version_protocol = parts[0];

on_failure:
- set:
field: error.message
value: '{{ _ingest.on_failure_message }}'
Loading