diff --git a/docs/operators/file_input.md b/docs/operators/file_input.md index 6062d620..4a9eff27 100644 --- a/docs/operators/file_input.md +++ b/docs/operators/file_input.md @@ -13,7 +13,7 @@ The `file_input` operator reads logs from files. It will place the lines read in | `poll_interval` | 200ms | The duration between filesystem polls | | `multiline` | | A `multiline` configuration block. See below for details | | `write_to` | `$body` | The body [field](/docs/types/field.md) written to when creating a new log entry | -| `encoding` | `nop` | The encoding of the file being read. See the list of supported encodings below for available options | +| `encoding` | `utf-8` | The encoding of the file being read. See the list of supported encodings below for available options | | `include_file_name` | `true` | Whether to add the file name as the attribute `file_name` | | `include_file_path` | `false` | Whether to add the file path as the label `file_path` | | `start_at` | `end` | At startup, where to start reading logs from the file. Options are `beginning` or `end` | diff --git a/docs/operators/tcp_input.md b/docs/operators/tcp_input.md index cde6cc43..05d1d36d 100644 --- a/docs/operators/tcp_input.md +++ b/docs/operators/tcp_input.md @@ -16,7 +16,7 @@ The `tcp_input` operator listens for logs on one or more TCP connections. The op | `resource` | {} | A map of `key: value` pairs to add to the entry's resource | | `add_attributes` | false | Adds `net.*` attributes according to [semantic convention][https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/span-general.md#general-network-connection-attributes] | | `multiline` | | A `multiline` configuration block. See below for details | -| `encoding` | `nop` | The encoding of the file being read. See the list of supported encodings below for available options | +| `encoding` | `utf-8` | The encoding of the file being read. See the list of supported encodings below for available options | #### TLS Configuration diff --git a/docs/operators/udp_input.md b/docs/operators/udp_input.md index 4674beb0..5fc8a5ce 100644 --- a/docs/operators/udp_input.md +++ b/docs/operators/udp_input.md @@ -14,7 +14,7 @@ The `udp_input` operator listens for logs from UDP packets. | `resource` | {} | A map of `key: value` pairs to add to the entry's resource | | `add_attributes` | false | Adds `net.*` attributes according to [semantic convention][https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/span-general.md#general-network-connection-attributes] | | `multiline` | | A `multiline` configuration block. See below for details | -| `encoding` | `nop` | The encoding of the file being read. See the list of supported encodings below for available options | +| `encoding` | `utf-8` | The encoding of the file being read. See the list of supported encodings below for available options | #### `multiline` configuration diff --git a/operator/helper/encoding.go b/operator/helper/encoding.go index 875921ff..5c116b89 100644 --- a/operator/helper/encoding.go +++ b/operator/helper/encoding.go @@ -29,7 +29,7 @@ import ( // NewBasicConfig creates a new Encoding config func NewEncodingConfig() EncodingConfig { return EncodingConfig{ - Encoding: "nop", + Encoding: "utf-8", } } @@ -76,11 +76,12 @@ func (e *Encoding) Decode(msgBuf []byte) (string, error) { var encodingOverrides = map[string]encoding.Encoding{ "utf-16": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf16": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), + "utf-8": unicode.UTF8, "utf8": unicode.UTF8, "ascii": unicode.UTF8, "us-ascii": unicode.UTF8, "nop": encoding.Nop, - "": encoding.Nop, + "": unicode.UTF8, } func lookupEncoding(enc string) (encoding.Encoding, error) {