Skip to content

Commit

Permalink
[pkg/ottl] Add GetXML Converter
Browse files Browse the repository at this point in the history
  • Loading branch information
djaglowski committed Sep 27, 2024
1 parent 507ec47 commit 3d1da30
Show file tree
Hide file tree
Showing 6 changed files with 247 additions and 1 deletion.
6 changes: 6 additions & 0 deletions pkg/ottl/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,12 @@ func Test_e2e_converters(t *testing.T) {
tCtx.GetLogRecord().Attributes().PutInt("test", 1)
},
},
{
statement: `set(attributes["test"], GetXML("<a><b>1</b><c><b>2</b></c></a>", "/a//b"))`,
want: func(tCtx ottllog.TransformContext) {
tCtx.GetLogRecord().Attributes().PutStr("test", "<b>1</b><b>2</b>")
},
},
{
statement: `set(attributes["test"], Hex(1.0))`,
want: func(tCtx ottllog.TransformContext) {
Expand Down
32 changes: 32 additions & 0 deletions pkg/ottl/ottlfuncs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ Available Converters:
- [ExtractGrokPatterns](#extractgrokpatterns)
- [FNV](#fnv)
- [Format](#format)
- [GetXML](#getxml)
- [Hex](#hex)
- [Hour](#hour)
- [Hours](#hours)
Expand Down Expand Up @@ -742,6 +743,37 @@ Examples:
- `Format("%04d-%02d-%02d", [Year(Now()), Month(Now()), Day(Now())])`
- `Format("%s/%s/%04d-%02d-%02d.log", [attributes["hostname"], body["program"], Year(Now()), Month(Now()), Day(Now())])`


### GetXML

`GetXML(target, xpath)`

The `GetXML` Converter returns an XML string with selected elements.

`target` is a Getter that returns a string. This string should be in XML format.
If `target` is not a string, nil, or is not valid xml, `GetXML` will return an error.

`xpath` is a string that specifies an [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expression that
selects one or more elements. Currently, this converter only supports selecting elements.

Examples:

Get all elements at the root of the document with tag "a"

- `GetXML(body, "/a")`

Gel all elements anywhere in the document with tag "a"

- `GetXML(body, "//a")`

Get the first element at the root of the document with tag "a"

- `GetXML(body, "/a[1]")`

Get all elements in the document with tag "a" that have an attribute "b" with value "c"

- `GetXML(body, "//a[@b='c']")`

### Hex

`Hex(value)`
Expand Down
63 changes: 63 additions & 0 deletions pkg/ottl/ottlfuncs/func_get_xml.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"context"
"fmt"

"github.com/antchfx/xmlquery"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

type GetXMLArguments[K any] struct {
Target ottl.StringGetter[K]
XPath string
}

func NewGetXMLFactory[K any]() ottl.Factory[K] {
return ottl.NewFactory("GetXML", &GetXMLArguments[K]{}, createGetXMLFunction[K])
}

func createGetXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
args, ok := oArgs.(*GetXMLArguments[K])

if !ok {
return nil, fmt.Errorf("GetXML args must be of type *GetXMLAguments[K]")
}

if err := validateXPath(args.XPath); err != nil {
return nil, err
}

return getXML(args.Target, args.XPath), nil
}

// getXML returns a XML formatted string that is a result of removing all matching nodes from the target XML.
// This currently supports removal of elements, attributes, text values, comments, and CharData.
func getXML[K any](target ottl.StringGetter[K], xPath string) ottl.ExprFunc[K] {
return func(ctx context.Context, tCtx K) (any, error) {
var doc *xmlquery.Node
if targetVal, err := target.Get(ctx, tCtx); err != nil {
return nil, err
} else if doc, err = parseNodesXML(targetVal); err != nil {
return nil, err
}

nodes, err := xmlquery.QueryAll(doc, xPath)
if err != nil {
return nil, err
}

result := &xmlquery.Node{Type: xmlquery.DocumentNode}
for _, n := range nodes {
if n.Type != xmlquery.ElementNode {
continue
}
xmlquery.AddChild(result, n)
}
return result.OutputXML(false), nil
}
}
144 changes: 144 additions & 0 deletions pkg/ottl/ottlfuncs/func_get_xml_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"context"
"testing"

"github.com/stretchr/testify/assert"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

func Test_GetXML(t *testing.T) {
tests := []struct {
name string
document string
xPath string
want string
}{
{
name: "get single element",
document: `<a><b/></a>`,
xPath: "/a/b",
want: `<b></b>`,
},
{
name: "get single complex element",
document: `<a foo="bar"><b>hello</b></a>`,
xPath: "/a",
want: `<a foo="bar"><b>hello</b></a>`,
},
{
name: "get uniform elements from same parent",
document: `<a><b>hello</b><b>world</b></a>`,
xPath: "/a/b",
want: `<b>hello</b><b>world</b>`,
},
{
name: "get nonuniform elements from same parent",
document: `<a><b>hello</b><b><c>world</c></b><d/></a>`,
xPath: "/a/*",
want: `<b>hello</b><b><c>world</c></b><d></d>`,
},
{
name: "get elements from various places",
document: `<a><x>1</x><b><x>2</x></b><d><e><f><x>3</x></f></e></d></a>`,
xPath: "/a//x",
want: `<x>1</x><x>2</x><x>3</x>`,
},
{
name: "get filtered elements from various places",
document: `<a><x env="prod">1</x><b><x env="dev">2</x></b><d><e><f><x env="prod">3</x></f></e></d></a>`,
xPath: "/a//x[@env='prod']",
want: `<x env="prod">1</x><x env="prod">3</x>`,
},
{
name: "ignore empty",
document: ``,
xPath: "/",
want: ``,
},
{
name: "ignore declaration",
document: `<?xml version="1.0" encoding="UTF-8"?><a></a>`,
xPath: "/*",
want: `<a></a>`,
},
{
name: "ignore comments",
document: `<!-- comment --><a></a><!-- comment -->`,
xPath: "/*",
want: `<a></a>`,
},
{
name: "ignore attribute selection",
document: `<a foo="bar"></a>`,
xPath: "/@foo",
want: ``,
},
{
name: "ignore text selection",
document: `<a>hello</a>`,
xPath: "/a/text()",
want: ``,
},
{
name: "ignore chardata selection",
document: `<a><![CDATA[hello]]></a>`,
xPath: "/a/text()",
want: ``,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
factory := NewGetXMLFactory[any]()
exprFunc, err := factory.CreateFunction(
ottl.FunctionContext{},
&GetXMLArguments[any]{
Target: ottl.StandardStringGetter[any]{
Getter: func(_ context.Context, _ any) (any, error) {
return tt.document, nil
},
},
XPath: tt.xPath,
})
assert.NoError(t, err)

result, err := exprFunc(context.Background(), nil)
assert.NoError(t, err)
assert.Equal(t, tt.want, result)
})
}
}

func TestCreateGetXMLFunc(t *testing.T) {
factory := NewGetXMLFactory[any]()
fCtx := ottl.FunctionContext{}

// Invalid arg type
exprFunc, err := factory.CreateFunction(fCtx, nil)
assert.Error(t, err)
assert.Nil(t, exprFunc)

// Invalid XPath should error on function creation
exprFunc, err = factory.CreateFunction(
fCtx, &GetXMLArguments[any]{
XPath: "!",
})
assert.Error(t, err)
assert.Nil(t, exprFunc)

// Invalid XML should error on function execution
exprFunc, err = factory.CreateFunction(
fCtx, &GetXMLArguments[any]{
Target: invalidXMLGetter(),
XPath: "/",
})
assert.NoError(t, err)
assert.NotNil(t, exprFunc)
_, err = exprFunc(context.Background(), nil)
assert.Error(t, err)
}
2 changes: 1 addition & 1 deletion pkg/ottl/ottlfuncs/func_remove_xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func parseNodesXML(targetVal string) (*xmlquery.Node, error) {
if err != nil {
return nil, fmt.Errorf("parse xml: %w", err)
}
if !preserveDeclearation {
if !preserveDeclearation && top.FirstChild != nil {
xmlquery.RemoveFromTree(top.FirstChild)
}
return top, nil
Expand Down
1 change: 1 addition & 0 deletions pkg/ottl/ottlfuncs/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func converters[K any]() []ottl.Factory[K] {
NewExtractPatternsFactory[K](),
NewExtractGrokPatternsFactory[K](),
NewFnvFactory[K](),
NewGetXMLFactory[K](),
NewHourFactory[K](),
NewHoursFactory[K](),
NewIntFactory[K](),
Expand Down

0 comments on commit 3d1da30

Please sign in to comment.