From beb78355203fa7dc7d264cf47a4d2bd6207103d5 Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Tue, 3 Sep 2024 12:12:50 -0700 Subject: [PATCH 1/9] Add apachespark receiver --- CHANGELOG.md | 4 + docker/docker-compose.yml | 7 + docs/components.md | 1 + go.mod | 1 + go.sum | 2 + internal/components/components.go | 2 + internal/components/components_test.go | 1 + .../receivers/apachespark/apachespark_test.go | 43 + .../testdata/all_metrics_config.yaml | 21 + .../testdata/all_metrics_expected.yaml | 1249 +++++++++++++++++ 10 files changed, 1331 insertions(+) create mode 100644 tests/receivers/apachespark/apachespark_test.go create mode 100644 tests/receivers/apachespark/testdata/all_metrics_config.yaml create mode 100644 tests/receivers/apachespark/testdata/all_metrics_expected.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e02f22ab6..3f8244a56f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +### 🚀 New components 🚀 + +- (Splunk) Add `apachespark` receiver ([#]()) + ## v0.108.1 This Splunk OpenTelemetry Collector release includes changes from the [opentelemetry-collector v0.108.1](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.108.1) and the [opentelemetry-collector-contrib v0.108.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.108.0) releases where appropriate. diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 837a6ba7ea..1f0daadc67 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -15,6 +15,13 @@ services: build: ./apache ports: - "18080:80" + apachespark: + image: quay.io/splunko11ytest/apachespark:latest + profiles: + - integration + build: ./apachespark + ports: + - "4040:4040" # Cassandra image for collectd-cassandra test: cassandra: image: quay.io/splunko11ytest/cassandra:latest diff --git a/docs/components.md b/docs/components.md index 597a43c2b1..24b42ccc96 100644 --- a/docs/components.md +++ b/docs/components.md @@ -17,6 +17,7 @@ The distribution offers support for the following components. | [awscontainerinsights](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/awscontainerinsightreceiver) | [beta] | | [awsecscontainermetrics](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/awsecscontainermetricsreceiver) | [beta] | | [apache](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/apachereceiver) | [alpha] | +| [apachespark](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/apachesparkreceiver) | [alpha] | | [azureblob](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/azureblobreceiver) | [alpha] | | [azureeventhub](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/azureeventhubreceiver) | [alpha] | | [azuremonitor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/azuremonitorreceiver) | [in development] | diff --git a/go.mod b/go.mod index 6c88df9f7f..2f7efb9393 100644 --- a/go.mod +++ b/go.mod @@ -62,6 +62,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirectorydsreceiver v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver v0.108.0 + github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/azureblobreceiver v0.108.0 diff --git a/go.sum b/go.sum index 2e4d5c3aef..fa5a340252 100644 --- a/go.sum +++ b/go.sum @@ -1396,6 +1396,8 @@ github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirecto github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirectorydsreceiver v0.108.0/go.mod h1:Zn5FJSJ2h6DUw84ZqwPiJFCXZWsEeBo++SV3xNeSuO4= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver v0.108.0 h1:RFB7p/HUt46C4HDZjwwDDYYv+ipn/SVTpbuLgXzEbZQ= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver v0.108.0/go.mod h1:7o6+Ojnd9enSTTVUfzORsgMT83qy0Ds0rNl6CXJb5cQ= +github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver v0.108.0 h1:alseMqI1EDNxMBxfBMQde3r0g/KUauEWbm/u6vKzjnM= +github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver v0.108.0/go.mod h1:GgM4G2KU5/CYVKUpnJkTsud8B6OvOlnNm6gz3KjsuwU= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.108.0 h1:GQPIFe/91bnSymg3/4cfW6kbQD6+VMCX8yPzgaVGoaQ= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.108.0/go.mod h1:BK/JiWZQtGM94UOwa+JWgI6ABUtJExPvM82uTQPXOns= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver v0.108.0 h1:4/SyCI9608uBuk0qkl7BWboXxWlArSys/qHBU/Wk4fQ= diff --git a/internal/components/components.go b/internal/components/components.go index 7789314f15..ff5bebab91 100644 --- a/internal/components/components.go +++ b/internal/components/components.go @@ -57,6 +57,7 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirectorydsreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver" + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/azureblobreceiver" @@ -166,6 +167,7 @@ func Get() (otelcol.Factories, error) { awscontainerinsightreceiver.NewFactory(), awsecscontainermetricsreceiver.NewFactory(), apachereceiver.NewFactory(), + apachesparkreceiver.NewFactory(), azureblobreceiver.NewFactory(), azureeventhubreceiver.NewFactory(), azuremonitorreceiver.NewFactory(), diff --git a/internal/components/components_test.go b/internal/components/components_test.go index 5368bb094f..4fa1a91f29 100644 --- a/internal/components/components_test.go +++ b/internal/components/components_test.go @@ -51,6 +51,7 @@ func TestDefaultComponents(t *testing.T) { "azureblob", "azuremonitor", "apache", + "apachespark", "carbon", "cloudfoundry", "collectd", diff --git a/tests/receivers/apachespark/apachespark_test.go b/tests/receivers/apachespark/apachespark_test.go new file mode 100644 index 0000000000..59dd78c044 --- /dev/null +++ b/tests/receivers/apachespark/apachespark_test.go @@ -0,0 +1,43 @@ +// Copyright Splunk, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build integration + +package tests + +import ( + "testing" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest/pmetrictest" + + "github.com/signalfx/splunk-otel-collector/tests/testutils" +) + +const sparkPort = "4040" + +func TestApacheSparkIntegration(t *testing.T) { + testutils.CheckGoldenFile(t, "all_metrics_config.yaml", "all_expected_metrics.yaml", + pmetrictest.IgnoreMetricValues(), + pmetrictest.IgnoreStartTimestamp(), + pmetrictest.IgnoreTimestamp(), + pmetrictest.IgnoreResourceAttributeValue("spark.application.id"), + pmetrictest.IgnoreResourceAttributeValue("spark.application.name"), + pmetrictest.IgnoreMetricAttributeValue("active", "spark.stage.status"), + pmetrictest.IgnoreMetricAttributeValue("complete", "spark.stage.status"), + pmetrictest.IgnoreMetricAttributeValue("failed", "spark.stage.status"), + pmetrictest.IgnoreMetricAttributeValue("pending", "spark.stage.status"), + pmetrictest.IgnoreMetricDataPointsOrder(), + ), + ).Run(t) +} diff --git a/tests/receivers/apachespark/testdata/all_metrics_config.yaml b/tests/receivers/apachespark/testdata/all_metrics_config.yaml new file mode 100644 index 0000000000..c2c6a695ae --- /dev/null +++ b/tests/receivers/apachespark/testdata/all_metrics_config.yaml @@ -0,0 +1,21 @@ +# ./bin/otelcontribcol_darwin_arm64 --config ./receiver/apachesparkreceiver/testdata/all_metrics_config.yaml +receivers: + apachespark: + collection_interval: 15s + +exporters: + otlp: + endpoint: "${OTLP_ENDPOINT}" + tls: + insecure: true + +service: + telemetry: + logs: + level: "info" + pipelines: + metrics: + receivers: + - apachespark + exporters: + - otlp diff --git a/tests/receivers/apachespark/testdata/all_metrics_expected.yaml b/tests/receivers/apachespark/testdata/all_metrics_expected.yaml new file mode 100644 index 0000000000..1bb2f22256 --- /dev/null +++ b/tests/receivers/apachespark/testdata/all_metrics_expected.yaml @@ -0,0 +1,1249 @@ +resourceMetrics: + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + scopeMetrics: + - metrics: + - description: Disk space used by the BlockManager. + name: spark.driver.block_manager.disk.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: mb + - description: Memory usage for the driver's BlockManager. + name: spark.driver.block_manager.memory.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + - key: state + value: + stringValue: used + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + - key: state + value: + stringValue: used + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + - key: state + value: + stringValue: free + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "434" + attributes: + - key: location + value: + stringValue: on_heap + - key: state + value: + stringValue: free + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: mb + - description: Average time spent during CodeGenerator source code compilation operations. + gauge: + dataPoints: + - asDouble: 0 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.code_generator.compilation.average_time + unit: ms + - description: Number of source code compilation operations performed by the CodeGenerator. + name: spark.driver.code_generator.compilation.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ compilation }' + - description: Average class size of the classes generated by the CodeGenerator. + gauge: + dataPoints: + - asDouble: 0 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.code_generator.generated_class.average_size + unit: bytes + - description: Number of classes generated by the CodeGenerator. + name: spark.driver.code_generator.generated_class.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ class }' + - description: Average method size of the classes generated by the CodeGenerator. + gauge: + dataPoints: + - asDouble: 0 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.code_generator.generated_method.average_size + unit: bytes + - description: Number of methods generated by the CodeGenerator. + name: spark.driver.code_generator.generated_method.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ method }' + - description: Average size of the source code generated by a CodeGenerator code generation operation. + gauge: + dataPoints: + - asDouble: 0 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.code_generator.source_code.average_size + unit: bytes + - description: Number of source code generation operations performed by the CodeGenerator. + name: spark.driver.code_generator.source_code.operations + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ operation }' + - description: Number of active jobs currently being processed by the DAGScheduler. + name: spark.driver.dag_scheduler.job.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ job }' + - description: Number of jobs that have been submitted to the DAGScheduler. + name: spark.driver.dag_scheduler.job.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ job }' + - description: Number of stages the DAGScheduler is either running or needs to run. + name: spark.driver.dag_scheduler.stage.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + attributes: + - key: status + value: + stringValue: running + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: status + value: + stringValue: waiting + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ stage }' + - description: Number of failed stages run by the DAGScheduler. + name: spark.driver.dag_scheduler.stage.failed + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ stage }' + - description: Number of garbage collection operations performed by the driver. + name: spark.driver.executor.gc.operations + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: gc_type + value: + stringValue: minor + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: gc_type + value: + stringValue: major + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ gc_operation }' + - description: Total elapsed time during garbage collection operations performed by the driver. + name: spark.driver.executor.gc.time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: gc_type + value: + stringValue: minor + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: gc_type + value: + stringValue: major + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Amount of execution memory currently used by the driver. + name: spark.driver.executor.memory.execution + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Amount of memory used by the driver's JVM. + name: spark.driver.executor.memory.jvm + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Amount of pool memory currently used by the driver. + name: spark.driver.executor.memory.pool + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: type + value: + stringValue: direct + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: type + value: + stringValue: mapped + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Amount of storage memory currently used by the driver. + name: spark.driver.executor.memory.storage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Number of file cache hits on the HiveExternalCatalog. + name: spark.driver.hive_external_catalog.file_cache_hits + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ hit }' + - description: Number of files discovered while listing the partitions of a table in the Hive metastore + name: spark.driver.hive_external_catalog.files_discovered + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ file }' + - description: Number of calls to the underlying Hive Metastore client made by the Spark application. + name: spark.driver.hive_external_catalog.hive_client_calls + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ call }' + - description: Number of parallel listing jobs initiated by the HiveExternalCatalog when listing partitions of a table. + name: spark.driver.hive_external_catalog.parallel_listing_jobs + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ listing_job }' + - description: Table partitions fetched by the HiveExternalCatalog. + name: spark.driver.hive_external_catalog.partitions_fetched + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ partition }' + - description: Current CPU time taken by the Spark driver. + name: spark.driver.jvm_cpu_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "8330000000" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: Number of events that have been dropped by the LiveListenerBus. + name: spark.driver.live_listener_bus.dropped + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ event }' + - description: Number of events that have been posted on the LiveListenerBus. + name: spark.driver.live_listener_bus.posted + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "37" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ event }' + - description: Average time taken for the LiveListenerBus to process an event posted to it. + gauge: + dataPoints: + - asDouble: 1.743476601344461 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.live_listener_bus.processing_time.average + unit: ms + - description: Number of events currently waiting to be processed by the LiveListenerBus. + name: spark.driver.live_listener_bus.queue_size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ event }' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + - key: spark.stage.id + value: + intValue: "1" + scopeMetrics: + - metrics: + - description: The amount of disk space used for storing portions of overly large data chunks that couldn't fit in memory in this stage. + name: spark.stage.disk.spilled + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: CPU time spent by the executor in this stage. + name: spark.stage.executor.cpu_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: Amount of time spent by the executor in this stage. + name: spark.stage.executor.run_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Number of records written and read in this stage. + name: spark.stage.io.records + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ record }' + - description: Amount of data written and read at this stage. + name: spark.stage.io.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The amount of time the JVM spent on garbage collection in this stage. + name: spark.stage.jvm_gc_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Peak memory used by internal data structures created during shuffles, aggregations and joins in this stage. + name: spark.stage.memory.peak + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The amount of memory moved to disk due to size constraints (spilled) in this stage. + name: spark.stage.memory.spilled + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Number of blocks fetched in shuffle operations in this stage. + name: spark.stage.shuffle.blocks_fetched + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: source + value: + stringValue: remote + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: source + value: + stringValue: local + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ block }' + - description: Time spent in this stage waiting for remote shuffle blocks. + name: spark.stage.shuffle.fetch_wait_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Amount of data read to disk in shuffle operations (sometimes required for large blocks, as opposed to the default behavior of reading into memory). + name: spark.stage.shuffle.io.disk + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Amount of data read in shuffle operations in this stage. + name: spark.stage.shuffle.io.read.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: source + value: + stringValue: local + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: source + value: + stringValue: remote + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Number of records written or read in shuffle operations in this stage. + name: spark.stage.shuffle.io.records + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ record }' + - description: Amount of data written in shuffle operations in this stage. + name: spark.stage.shuffle.io.write.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Time spent blocking on writes to disk or buffer cache in this stage. + name: spark.stage.shuffle.write_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: A one-hot encoding representing the status of this stage. + name: spark.stage.status + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: active + value: + boolValue: true + - key: complete + value: + boolValue: false + - key: pending + value: + boolValue: false + - key: failed + value: + boolValue: false + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ status }' + - description: Number of active tasks in this stage. + name: spark.stage.task.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Number of tasks with a specific result in this stage. + name: spark.stage.task.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: killed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ task }' + - description: The amount of data transmitted back to the driver by all the tasks in this stage. + name: spark.stage.task.result_size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + - key: spark.stage.id + value: + intValue: "0" + scopeMetrics: + - metrics: + - description: The amount of disk space used for storing portions of overly large data chunks that couldn't fit in memory in this stage. + name: spark.stage.disk.spilled + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: CPU time spent by the executor in this stage. + name: spark.stage.executor.cpu_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "365309717" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: Amount of time spent by the executor in this stage. + name: spark.stage.executor.run_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "23131" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Number of records written and read in this stage. + name: spark.stage.io.records + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ record }' + - description: Amount of data written and read at this stage. + name: spark.stage.io.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The amount of time the JVM spent on garbage collection in this stage. + name: spark.stage.jvm_gc_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "58" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Peak memory used by internal data structures created during shuffles, aggregations and joins in this stage. + name: spark.stage.memory.peak + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The amount of memory moved to disk due to size constraints (spilled) in this stage. + name: spark.stage.memory.spilled + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Number of blocks fetched in shuffle operations in this stage. + name: spark.stage.shuffle.blocks_fetched + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: source + value: + stringValue: remote + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: source + value: + stringValue: local + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ block }' + - description: Time spent in this stage waiting for remote shuffle blocks. + name: spark.stage.shuffle.fetch_wait_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Amount of data read to disk in shuffle operations (sometimes required for large blocks, as opposed to the default behavior of reading into memory). + name: spark.stage.shuffle.io.disk + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Amount of data read in shuffle operations in this stage. + name: spark.stage.shuffle.io.read.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: source + value: + stringValue: local + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: source + value: + stringValue: remote + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Number of records written or read in shuffle operations in this stage. + name: spark.stage.shuffle.io.records + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "10" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ record }' + - description: Amount of data written in shuffle operations in this stage. + name: spark.stage.shuffle.io.write.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "698" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Time spent blocking on writes to disk or buffer cache in this stage. + name: spark.stage.shuffle.write_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "42947581" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: A one-hot encoding representing the status of this stage. + name: spark.stage.status + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: active + value: + boolValue: false + - key: complete + value: + boolValue: true + - key: pending + value: + boolValue: false + - key: failed + value: + boolValue: false + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ status }' + - description: Number of active tasks in this stage. + name: spark.stage.task.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Number of tasks with a specific result in this stage. + name: spark.stage.task.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "10" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: killed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ task }' + - description: The amount of data transmitted back to the driver by all the tasks in this stage. + name: spark.stage.task.result_size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "15675" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + - key: spark.executor.id + value: + stringValue: driver + scopeMetrics: + - metrics: + - description: Disk space used by this executor for RDD storage. + name: spark.executor.disk.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Elapsed time the JVM spent in garbage collection in this executor. + name: spark.executor.gc_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Amount of data input for this executor. + name: spark.executor.input_size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Storage memory used by this executor. + name: spark.executor.memory.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "6890" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Amount of data written and read during shuffle operations for this executor. + name: spark.executor.shuffle.io.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "698" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The executor's storage memory usage. + name: spark.executor.storage_memory.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + - key: state + value: + stringValue: used + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + - key: state + value: + stringValue: free + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + - key: state + value: + stringValue: used + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + - key: state + value: + stringValue: free + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Number of tasks currently running in this executor. + name: spark.executor.task.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Maximum number of tasks that can run concurrently in this executor. + name: spark.executor.task.limit + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "6" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Number of tasks with a specific result in this executor. + name: spark.executor.task.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "10" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ task }' + - description: Elapsed time the JVM spent executing tasks in this executor. + name: spark.executor.time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "6876" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + - key: spark.job.id + value: + intValue: "0" + scopeMetrics: + - metrics: + - description: Number of active stages in this job. + name: spark.job.stage.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ stage }' + - description: Number of stages with a specific result in this job. + name: spark.job.stage.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: skipped + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ stage }' + - description: Number of active tasks in this job. + name: spark.job.task.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Number of tasks with a specific result in this job. + name: spark.job.task.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "10" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: skipped + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ task }' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest From 5bfdf396f577841e90b1226f3e03d5f1f8eb2d69 Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Tue, 3 Sep 2024 12:14:38 -0700 Subject: [PATCH 2/9] Add more docker contents --- docker/apachespark/Dockerfile | 8 ++ docker/apachespark/long_running.py | 82 +++++++++++++++++++ .../testdata/all_metrics_config.yaml | 1 - 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 docker/apachespark/Dockerfile create mode 100644 docker/apachespark/long_running.py diff --git a/docker/apachespark/Dockerfile b/docker/apachespark/Dockerfile new file mode 100644 index 0000000000..446ddbb26a --- /dev/null +++ b/docker/apachespark/Dockerfile @@ -0,0 +1,8 @@ +FROM apache/spark:3.4.0-python3 + +COPY ./long_running.py /opt/spark/examples/src/main/python/long_running.py +RUN chmod +x /opt/spark/examples/src/main/python/long_running.py + +CMD [ "/opt/spark/bin/spark-submit", "/opt/spark/examples/src/main/python/long_running.py" ] + +EXPOSE 4040 diff --git a/docker/apachespark/long_running.py b/docker/apachespark/long_running.py new file mode 100644 index 0000000000..0bd909779d --- /dev/null +++ b/docker/apachespark/long_running.py @@ -0,0 +1,82 @@ +# Keep the original spark license. +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Original source: https://github.com/apache/spark/blob/master/examples/src/main/python/status_api_demo.py +# +# NOTICE: +# Apache Spark +# Copyright 2014 and onwards The Apache Software Foundation. +# +# This product includes software developed at +# The Apache Software Foundation (http://www.apache.org/). +# + +import time +import threading +import queue as Queue +from typing import Any, Callable, List, Tuple + +from pyspark import SparkConf, SparkContext + + +def delayed(seconds: int) -> Callable[[Any], Any]: + def f(x: int) -> int: + time.sleep(seconds) + return x + return f + + +def call_in_background(f: Callable[..., Any], *args: Any) -> Queue.Queue: + result: Queue.Queue = Queue.Queue(1) + t = threading.Thread(target=lambda: result.put(f(*args))) + t.daemon = True + t.start() + return result + + +def main() -> None: + conf = SparkConf().set("spark.ui.showConsoleProgress", "false") + sc = SparkContext(appName="PythonStatusAPIDemo", conf=conf) + + def run() -> List[Tuple[int, int]]: + rdd = sc.parallelize(range(10), 10).map(delayed(2)) + reduced = rdd.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y) + return reduced.map(delayed(2)).collect() + + result = call_in_background(run) + status = sc.statusTracker() + while result.empty(): + ids = status.getJobIdsForGroup() + for id in ids: + job = status.getJobInfo(id) + assert job is not None + + print("Job", id, "status: ", job.status) + for sid in job.stageIds: + info = status.getStageInfo(sid) + if info: + print("Stage %d: %d tasks total (%d active, %d complete)" % + (sid, info.numTasks, info.numActiveTasks, info.numCompletedTasks)) + time.sleep(1) + + print("Job results are:", result.get()) + sc.stop() + + +if __name__ == "__main__": + for i in range(100): + main() diff --git a/tests/receivers/apachespark/testdata/all_metrics_config.yaml b/tests/receivers/apachespark/testdata/all_metrics_config.yaml index c2c6a695ae..0ae01e4663 100644 --- a/tests/receivers/apachespark/testdata/all_metrics_config.yaml +++ b/tests/receivers/apachespark/testdata/all_metrics_config.yaml @@ -1,4 +1,3 @@ -# ./bin/otelcontribcol_darwin_arm64 --config ./receiver/apachesparkreceiver/testdata/all_metrics_config.yaml receivers: apachespark: collection_interval: 15s From 17ea06552424e6dd6b96cd248abe307dd42c8afa Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Tue, 3 Sep 2024 12:16:28 -0700 Subject: [PATCH 3/9] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f8244a56f..aa3e88dc7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### 🚀 New components 🚀 -- (Splunk) Add `apachespark` receiver ([#]()) +- (Splunk) Add `apachespark` receiver ([#5318](https://github.com/signalfx/splunk-otel-collector/pull/5318)) ## v0.108.1 From 522ba580f5a5ce84c7bae950dcdd56543b542b6d Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Tue, 3 Sep 2024 12:27:06 -0700 Subject: [PATCH 4/9] Fix formatting --- .../receivers/apachespark/apachespark_test.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/receivers/apachespark/apachespark_test.go b/tests/receivers/apachespark/apachespark_test.go index 59dd78c044..e870b2bc09 100644 --- a/tests/receivers/apachespark/apachespark_test.go +++ b/tests/receivers/apachespark/apachespark_test.go @@ -29,15 +29,14 @@ const sparkPort = "4040" func TestApacheSparkIntegration(t *testing.T) { testutils.CheckGoldenFile(t, "all_metrics_config.yaml", "all_expected_metrics.yaml", pmetrictest.IgnoreMetricValues(), - pmetrictest.IgnoreStartTimestamp(), - pmetrictest.IgnoreTimestamp(), - pmetrictest.IgnoreResourceAttributeValue("spark.application.id"), - pmetrictest.IgnoreResourceAttributeValue("spark.application.name"), - pmetrictest.IgnoreMetricAttributeValue("active", "spark.stage.status"), - pmetrictest.IgnoreMetricAttributeValue("complete", "spark.stage.status"), - pmetrictest.IgnoreMetricAttributeValue("failed", "spark.stage.status"), - pmetrictest.IgnoreMetricAttributeValue("pending", "spark.stage.status"), - pmetrictest.IgnoreMetricDataPointsOrder(), - ), + pmetrictest.IgnoreStartTimestamp(), + pmetrictest.IgnoreTimestamp(), + pmetrictest.IgnoreResourceAttributeValue("spark.application.id"), + pmetrictest.IgnoreResourceAttributeValue("spark.application.name"), + pmetrictest.IgnoreMetricAttributeValue("active", "spark.stage.status"), + pmetrictest.IgnoreMetricAttributeValue("complete", "spark.stage.status"), + pmetrictest.IgnoreMetricAttributeValue("failed", "spark.stage.status"), + pmetrictest.IgnoreMetricAttributeValue("pending", "spark.stage.status"), + pmetrictest.IgnoreMetricDataPointsOrder(), ).Run(t) } From 5af0a0815c5377828f392aa2f16846695ec226ef Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Tue, 3 Sep 2024 14:13:20 -0700 Subject: [PATCH 5/9] Fix golden file test format --- tests/receivers/apachespark/apachespark_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/receivers/apachespark/apachespark_test.go b/tests/receivers/apachespark/apachespark_test.go index e870b2bc09..8113e76eb3 100644 --- a/tests/receivers/apachespark/apachespark_test.go +++ b/tests/receivers/apachespark/apachespark_test.go @@ -38,5 +38,5 @@ func TestApacheSparkIntegration(t *testing.T) { pmetrictest.IgnoreMetricAttributeValue("failed", "spark.stage.status"), pmetrictest.IgnoreMetricAttributeValue("pending", "spark.stage.status"), pmetrictest.IgnoreMetricDataPointsOrder(), - ).Run(t) + ) } From 11ed17f34094b3fef462c9f9f8a140b407eec13e Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Tue, 3 Sep 2024 14:44:40 -0700 Subject: [PATCH 6/9] rename test file --- .../{all_metrics_expected.yaml => all_expected_metrics.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/receivers/apachespark/testdata/{all_metrics_expected.yaml => all_expected_metrics.yaml} (100%) diff --git a/tests/receivers/apachespark/testdata/all_metrics_expected.yaml b/tests/receivers/apachespark/testdata/all_expected_metrics.yaml similarity index 100% rename from tests/receivers/apachespark/testdata/all_metrics_expected.yaml rename to tests/receivers/apachespark/testdata/all_expected_metrics.yaml From b3dd491dc495031d50cd246f4b968aa5be784ec7 Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Tue, 3 Sep 2024 15:39:07 -0700 Subject: [PATCH 7/9] Ignore scope version in testing --- tests/receivers/apachespark/apachespark_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/receivers/apachespark/apachespark_test.go b/tests/receivers/apachespark/apachespark_test.go index 8113e76eb3..e3ebd989e1 100644 --- a/tests/receivers/apachespark/apachespark_test.go +++ b/tests/receivers/apachespark/apachespark_test.go @@ -31,6 +31,7 @@ func TestApacheSparkIntegration(t *testing.T) { pmetrictest.IgnoreMetricValues(), pmetrictest.IgnoreStartTimestamp(), pmetrictest.IgnoreTimestamp(), + pmetrictest.IgnoreScopeVersion(), pmetrictest.IgnoreResourceAttributeValue("spark.application.id"), pmetrictest.IgnoreResourceAttributeValue("spark.application.name"), pmetrictest.IgnoreMetricAttributeValue("active", "spark.stage.status"), From 66b857ffc36d588c842dd098e5eb10ebdfc7de37 Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Thu, 5 Sep 2024 16:16:06 -0700 Subject: [PATCH 8/9] Update internal/components/components.go --- internal/components/components.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/components/components.go b/internal/components/components.go index de913fc0c6..ee329619b4 100644 --- a/internal/components/components.go +++ b/internal/components/components.go @@ -165,7 +165,7 @@ func Get() (otelcol.Factories, error) { receivers, err := receiver.MakeFactoryMap( activedirectorydsreceiver.NewFactory(), apachereceiver.NewFactory(), - apachesparkreceiver.NewFactory(), + apachesparkreceiver.NewFactory(), awscontainerinsightreceiver.NewFactory(), awsecscontainermetricsreceiver.NewFactory(), azureblobreceiver.NewFactory(), From 27e817c7c2d7c577277df25f3ca84e5583a7a478 Mon Sep 17 00:00:00 2001 From: Curtis Robert Date: Fri, 6 Sep 2024 07:51:14 -0700 Subject: [PATCH 9/9] Fix fmt error --- internal/components/components_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/components/components_test.go b/internal/components/components_test.go index 4eea654f6c..7e1b31cc93 100644 --- a/internal/components/components_test.go +++ b/internal/components/components_test.go @@ -46,7 +46,7 @@ func TestDefaultComponents(t *testing.T) { expectedReceivers := []string{ "active_directory_ds", "apache", - "apachespark", + "apachespark", "awscontainerinsightreceiver", "awsecscontainermetrics", "azureblob",