From beb78355203fa7dc7d264cf47a4d2bd6207103d5 Mon Sep 17 00:00:00 2001
From: Curtis Robert <crobert@splunk.com>
Date: Tue, 3 Sep 2024 12:12:50 -0700
Subject: [PATCH 1/9] Add apachespark receiver

---
 CHANGELOG.md                                  |    4 +
 docker/docker-compose.yml                     |    7 +
 docs/components.md                            |    1 +
 go.mod                                        |    1 +
 go.sum                                        |    2 +
 internal/components/components.go             |    2 +
 internal/components/components_test.go        |    1 +
 .../receivers/apachespark/apachespark_test.go |   43 +
 .../testdata/all_metrics_config.yaml          |   21 +
 .../testdata/all_metrics_expected.yaml        | 1249 +++++++++++++++++
 10 files changed, 1331 insertions(+)
 create mode 100644 tests/receivers/apachespark/apachespark_test.go
 create mode 100644 tests/receivers/apachespark/testdata/all_metrics_config.yaml
 create mode 100644 tests/receivers/apachespark/testdata/all_metrics_expected.yaml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e02f22ab6..3f8244a56f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## Unreleased
 
+### 🚀 New components 🚀
+
+- (Splunk) Add `apachespark` receiver ([#]())
+
 ## v0.108.1
 
 This Splunk OpenTelemetry Collector release includes changes from the [opentelemetry-collector v0.108.1](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.108.1) and the [opentelemetry-collector-contrib v0.108.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.108.0) releases where appropriate.
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 837a6ba7ea..1f0daadc67 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -15,6 +15,13 @@ services:
     build: ./apache
     ports:
       - "18080:80"
+  apachespark:
+    image: quay.io/splunko11ytest/apachespark:latest
+    profiles:
+      - integration
+    build: ./apachespark
+    ports:
+      - "4040:4040"
   # Cassandra image for collectd-cassandra test:
   cassandra:
     image: quay.io/splunko11ytest/cassandra:latest
diff --git a/docs/components.md b/docs/components.md
index 597a43c2b1..24b42ccc96 100644
--- a/docs/components.md
+++ b/docs/components.md
@@ -17,6 +17,7 @@ The distribution offers support for the following components.
 | [awscontainerinsights](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/awscontainerinsightreceiver)      | [beta]           |
 | [awsecscontainermetrics](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/awsecscontainermetricsreceiver) | [beta]           |
 | [apache](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/apachereceiver)                                 | [alpha]          |
+| [apachespark](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/apachesparkreceiver)                       | [alpha]          |
 | [azureblob](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/azureblobreceiver)                           | [alpha]          |
 | [azureeventhub](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/azureeventhubreceiver)                   | [alpha]          |
 | [azuremonitor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/azuremonitorreceiver)                     | [in development] |
diff --git a/go.mod b/go.mod
index 6c88df9f7f..2f7efb9393 100644
--- a/go.mod
+++ b/go.mod
@@ -62,6 +62,7 @@ require (
 	github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor v0.108.0
 	github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirectorydsreceiver v0.108.0
 	github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver v0.108.0
+	github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver v0.108.0
 	github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.108.0
 	github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver v0.108.0
 	github.com/open-telemetry/opentelemetry-collector-contrib/receiver/azureblobreceiver v0.108.0
diff --git a/go.sum b/go.sum
index 2e4d5c3aef..fa5a340252 100644
--- a/go.sum
+++ b/go.sum
@@ -1396,6 +1396,8 @@ github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirecto
 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirectorydsreceiver v0.108.0/go.mod h1:Zn5FJSJ2h6DUw84ZqwPiJFCXZWsEeBo++SV3xNeSuO4=
 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver v0.108.0 h1:RFB7p/HUt46C4HDZjwwDDYYv+ipn/SVTpbuLgXzEbZQ=
 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver v0.108.0/go.mod h1:7o6+Ojnd9enSTTVUfzORsgMT83qy0Ds0rNl6CXJb5cQ=
+github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver v0.108.0 h1:alseMqI1EDNxMBxfBMQde3r0g/KUauEWbm/u6vKzjnM=
+github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver v0.108.0/go.mod h1:GgM4G2KU5/CYVKUpnJkTsud8B6OvOlnNm6gz3KjsuwU=
 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.108.0 h1:GQPIFe/91bnSymg3/4cfW6kbQD6+VMCX8yPzgaVGoaQ=
 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.108.0/go.mod h1:BK/JiWZQtGM94UOwa+JWgI6ABUtJExPvM82uTQPXOns=
 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver v0.108.0 h1:4/SyCI9608uBuk0qkl7BWboXxWlArSys/qHBU/Wk4fQ=
diff --git a/internal/components/components.go b/internal/components/components.go
index 7789314f15..ff5bebab91 100644
--- a/internal/components/components.go
+++ b/internal/components/components.go
@@ -57,6 +57,7 @@ import (
 	"github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirectorydsreceiver"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver"
+	"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/azureblobreceiver"
@@ -166,6 +167,7 @@ func Get() (otelcol.Factories, error) {
 		awscontainerinsightreceiver.NewFactory(),
 		awsecscontainermetricsreceiver.NewFactory(),
 		apachereceiver.NewFactory(),
+		apachesparkreceiver.NewFactory(),
 		azureblobreceiver.NewFactory(),
 		azureeventhubreceiver.NewFactory(),
 		azuremonitorreceiver.NewFactory(),
diff --git a/internal/components/components_test.go b/internal/components/components_test.go
index 5368bb094f..4fa1a91f29 100644
--- a/internal/components/components_test.go
+++ b/internal/components/components_test.go
@@ -51,6 +51,7 @@ func TestDefaultComponents(t *testing.T) {
 		"azureblob",
 		"azuremonitor",
 		"apache",
+		"apachespark",
 		"carbon",
 		"cloudfoundry",
 		"collectd",
diff --git a/tests/receivers/apachespark/apachespark_test.go b/tests/receivers/apachespark/apachespark_test.go
new file mode 100644
index 0000000000..59dd78c044
--- /dev/null
+++ b/tests/receivers/apachespark/apachespark_test.go
@@ -0,0 +1,43 @@
+// Copyright Splunk, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build integration
+
+package tests
+
+import (
+	"testing"
+
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest/pmetrictest"
+
+	"github.com/signalfx/splunk-otel-collector/tests/testutils"
+)
+
+const sparkPort = "4040"
+
+func TestApacheSparkIntegration(t *testing.T) {
+	testutils.CheckGoldenFile(t, "all_metrics_config.yaml", "all_expected_metrics.yaml",
+		pmetrictest.IgnoreMetricValues(),
+			pmetrictest.IgnoreStartTimestamp(),
+			pmetrictest.IgnoreTimestamp(),
+			pmetrictest.IgnoreResourceAttributeValue("spark.application.id"),
+			pmetrictest.IgnoreResourceAttributeValue("spark.application.name"),
+			pmetrictest.IgnoreMetricAttributeValue("active", "spark.stage.status"),
+			pmetrictest.IgnoreMetricAttributeValue("complete", "spark.stage.status"),
+			pmetrictest.IgnoreMetricAttributeValue("failed", "spark.stage.status"),
+			pmetrictest.IgnoreMetricAttributeValue("pending", "spark.stage.status"),
+			pmetrictest.IgnoreMetricDataPointsOrder(),
+		),
+	).Run(t)
+}
diff --git a/tests/receivers/apachespark/testdata/all_metrics_config.yaml b/tests/receivers/apachespark/testdata/all_metrics_config.yaml
new file mode 100644
index 0000000000..c2c6a695ae
--- /dev/null
+++ b/tests/receivers/apachespark/testdata/all_metrics_config.yaml
@@ -0,0 +1,21 @@
+# ./bin/otelcontribcol_darwin_arm64 --config ./receiver/apachesparkreceiver/testdata/all_metrics_config.yaml
+receivers:
+  apachespark:
+    collection_interval: 15s
+
+exporters:
+  otlp:
+    endpoint: "${OTLP_ENDPOINT}"
+    tls:
+      insecure: true
+
+service:
+  telemetry:
+    logs:
+      level: "info"
+  pipelines:
+    metrics:
+      receivers:
+        - apachespark
+      exporters:
+        - otlp
diff --git a/tests/receivers/apachespark/testdata/all_metrics_expected.yaml b/tests/receivers/apachespark/testdata/all_metrics_expected.yaml
new file mode 100644
index 0000000000..1bb2f22256
--- /dev/null
+++ b/tests/receivers/apachespark/testdata/all_metrics_expected.yaml
@@ -0,0 +1,1249 @@
+resourceMetrics:
+  - resource:
+      attributes:
+        - key: spark.application.id
+          value:
+            stringValue: local-1684786598779
+        - key: spark.application.name
+          value:
+            stringValue: PythonStatusAPIDemo
+    scopeMetrics:
+      - metrics:
+          - description: Disk space used by the BlockManager.
+            name: spark.driver.block_manager.disk.usage
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: mb
+          - description: Memory usage for the driver's BlockManager.
+            name: spark.driver.block_manager.memory.usage
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: off_heap
+                    - key: state
+                      value:
+                        stringValue: used
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: on_heap
+                    - key: state
+                      value:
+                        stringValue: used
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: off_heap
+                    - key: state
+                      value:
+                        stringValue: free
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "434"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: on_heap
+                    - key: state
+                      value:
+                        stringValue: free
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: mb
+          - description: Average time spent during CodeGenerator source code compilation operations.
+            gauge:
+              dataPoints:
+                - asDouble: 0
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            name: spark.driver.code_generator.compilation.average_time
+            unit: ms
+          - description: Number of source code compilation operations performed by the CodeGenerator.
+            name: spark.driver.code_generator.compilation.count
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ compilation }'
+          - description: Average class size of the classes generated by the CodeGenerator.
+            gauge:
+              dataPoints:
+                - asDouble: 0
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            name: spark.driver.code_generator.generated_class.average_size
+            unit: bytes
+          - description: Number of classes generated by the CodeGenerator.
+            name: spark.driver.code_generator.generated_class.count
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ class }'
+          - description: Average method size of the classes generated by the CodeGenerator.
+            gauge:
+              dataPoints:
+                - asDouble: 0
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            name: spark.driver.code_generator.generated_method.average_size
+            unit: bytes
+          - description: Number of methods generated by the CodeGenerator.
+            name: spark.driver.code_generator.generated_method.count
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ method }'
+          - description: Average size of the source code generated by a CodeGenerator code generation operation.
+            gauge:
+              dataPoints:
+                - asDouble: 0
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            name: spark.driver.code_generator.source_code.average_size
+            unit: bytes
+          - description: Number of source code generation operations performed by the CodeGenerator.
+            name: spark.driver.code_generator.source_code.operations
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ operation }'
+          - description: Number of active jobs currently being processed by the DAGScheduler.
+            name: spark.driver.dag_scheduler.job.active
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "1"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ job }'
+          - description: Number of jobs that have been submitted to the DAGScheduler.
+            name: spark.driver.dag_scheduler.job.count
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "1"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ job }'
+          - description: Number of stages the DAGScheduler is either running or needs to run.
+            name: spark.driver.dag_scheduler.stage.count
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "1"
+                  attributes:
+                    - key: status
+                      value:
+                        stringValue: running
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: status
+                      value:
+                        stringValue: waiting
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ stage }'
+          - description: Number of failed stages run by the DAGScheduler.
+            name: spark.driver.dag_scheduler.stage.failed
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ stage }'
+          - description: Number of garbage collection operations performed by the driver.
+            name: spark.driver.executor.gc.operations
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: gc_type
+                      value:
+                        stringValue: minor
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: gc_type
+                      value:
+                        stringValue: major
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ gc_operation }'
+          - description: Total elapsed time during garbage collection operations performed by the driver.
+            name: spark.driver.executor.gc.time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: gc_type
+                      value:
+                        stringValue: minor
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: gc_type
+                      value:
+                        stringValue: major
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ms
+          - description: Amount of execution memory currently used by the driver.
+            name: spark.driver.executor.memory.execution
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: off_heap
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: on_heap
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: bytes
+          - description: Amount of memory used by the driver's JVM.
+            name: spark.driver.executor.memory.jvm
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: off_heap
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: on_heap
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: bytes
+          - description: Amount of pool memory currently used by the driver.
+            name: spark.driver.executor.memory.pool
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: type
+                      value:
+                        stringValue: direct
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: type
+                      value:
+                        stringValue: mapped
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: bytes
+          - description: Amount of storage memory currently used by the driver.
+            name: spark.driver.executor.memory.storage
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: off_heap
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: on_heap
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: bytes
+          - description: Number of file cache hits on the HiveExternalCatalog.
+            name: spark.driver.hive_external_catalog.file_cache_hits
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ hit }'
+          - description: Number of files discovered while listing the partitions of a table in the Hive metastore
+            name: spark.driver.hive_external_catalog.files_discovered
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ file }'
+          - description: Number of calls to the underlying Hive Metastore client made by the Spark application.
+            name: spark.driver.hive_external_catalog.hive_client_calls
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ call }'
+          - description: Number of parallel listing jobs initiated by the HiveExternalCatalog when listing partitions of a table.
+            name: spark.driver.hive_external_catalog.parallel_listing_jobs
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ listing_job }'
+          - description: Table partitions fetched by the HiveExternalCatalog.
+            name: spark.driver.hive_external_catalog.partitions_fetched
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ partition }'
+          - description: Current CPU time taken by the Spark driver.
+            name: spark.driver.jvm_cpu_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "8330000000"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ns
+          - description: Number of events that have been dropped by the LiveListenerBus.
+            name: spark.driver.live_listener_bus.dropped
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ event }'
+          - description: Number of events that have been posted on the LiveListenerBus.
+            name: spark.driver.live_listener_bus.posted
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "37"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ event }'
+          - description: Average time taken for the LiveListenerBus to process an event posted to it.
+            gauge:
+              dataPoints:
+                - asDouble: 1.743476601344461
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            name: spark.driver.live_listener_bus.processing_time.average
+            unit: ms
+          - description: Number of events currently waiting to be processed by the LiveListenerBus.
+            name: spark.driver.live_listener_bus.queue_size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ event }'
+        scope:
+          name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver
+          version: latest
+  - resource:
+      attributes:
+        - key: spark.application.id
+          value:
+            stringValue: local-1684786598779
+        - key: spark.application.name
+          value:
+            stringValue: PythonStatusAPIDemo
+        - key: spark.stage.id
+          value:
+            intValue: "1"
+    scopeMetrics:
+      - metrics:
+          - description: The amount of disk space used for storing portions of overly large data chunks that couldn't fit in memory in this stage.
+            name: spark.stage.disk.spilled
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: CPU time spent by the executor in this stage.
+            name: spark.stage.executor.cpu_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ns
+          - description: Amount of time spent by the executor in this stage.
+            name: spark.stage.executor.run_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ms
+          - description: Number of records written and read in this stage.
+            name: spark.stage.io.records
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: in
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: out
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ record }'
+          - description: Amount of data written and read at this stage.
+            name: spark.stage.io.size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: in
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: out
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: The amount of time the JVM spent on garbage collection in this stage.
+            name: spark.stage.jvm_gc_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ms
+          - description: Peak memory used by internal data structures created during shuffles, aggregations and joins in this stage.
+            name: spark.stage.memory.peak
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: The amount of memory moved to disk due to size constraints (spilled) in this stage.
+            name: spark.stage.memory.spilled
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: Number of blocks fetched in shuffle operations in this stage.
+            name: spark.stage.shuffle.blocks_fetched
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: source
+                      value:
+                        stringValue: remote
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: source
+                      value:
+                        stringValue: local
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ block }'
+          - description: Time spent in this stage waiting for remote shuffle blocks.
+            name: spark.stage.shuffle.fetch_wait_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ms
+          - description: Amount of data read to disk in shuffle operations (sometimes required for large blocks, as opposed to the default behavior of reading into memory).
+            name: spark.stage.shuffle.io.disk
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: Amount of data read in shuffle operations in this stage.
+            name: spark.stage.shuffle.io.read.size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: source
+                      value:
+                        stringValue: local
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: source
+                      value:
+                        stringValue: remote
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: Number of records written or read in shuffle operations in this stage.
+            name: spark.stage.shuffle.io.records
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: in
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: out
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ record }'
+          - description: Amount of data written in shuffle operations in this stage.
+            name: spark.stage.shuffle.io.write.size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: Time spent blocking on writes to disk or buffer cache in this stage.
+            name: spark.stage.shuffle.write_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ns
+          - description: A one-hot encoding representing the status of this stage.
+            name: spark.stage.status
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: active
+                      value:
+                        boolValue: true
+                    - key: complete
+                      value:
+                        boolValue: false
+                    - key: pending
+                      value:
+                        boolValue: false
+                    - key: failed
+                      value:
+                        boolValue: false
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ status }'
+          - description: Number of active tasks in this stage.
+            name: spark.stage.task.active
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ task }'
+          - description: Number of tasks with a specific result in this stage.
+            name: spark.stage.task.result
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: completed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: failed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: killed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ task }'
+          - description: The amount of data transmitted back to the driver by all the tasks in this stage.
+            name: spark.stage.task.result_size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+        scope:
+          name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver
+          version: latest
+  - resource:
+      attributes:
+        - key: spark.application.id
+          value:
+            stringValue: local-1684786598779
+        - key: spark.application.name
+          value:
+            stringValue: PythonStatusAPIDemo
+        - key: spark.stage.id
+          value:
+            intValue: "0"
+    scopeMetrics:
+      - metrics:
+          - description: The amount of disk space used for storing portions of overly large data chunks that couldn't fit in memory in this stage.
+            name: spark.stage.disk.spilled
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: CPU time spent by the executor in this stage.
+            name: spark.stage.executor.cpu_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "365309717"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ns
+          - description: Amount of time spent by the executor in this stage.
+            name: spark.stage.executor.run_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "23131"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ms
+          - description: Number of records written and read in this stage.
+            name: spark.stage.io.records
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: in
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: out
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ record }'
+          - description: Amount of data written and read at this stage.
+            name: spark.stage.io.size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: in
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: out
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: The amount of time the JVM spent on garbage collection in this stage.
+            name: spark.stage.jvm_gc_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "58"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ms
+          - description: Peak memory used by internal data structures created during shuffles, aggregations and joins in this stage.
+            name: spark.stage.memory.peak
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: The amount of memory moved to disk due to size constraints (spilled) in this stage.
+            name: spark.stage.memory.spilled
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: Number of blocks fetched in shuffle operations in this stage.
+            name: spark.stage.shuffle.blocks_fetched
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: source
+                      value:
+                        stringValue: remote
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: source
+                      value:
+                        stringValue: local
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ block }'
+          - description: Time spent in this stage waiting for remote shuffle blocks.
+            name: spark.stage.shuffle.fetch_wait_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ms
+          - description: Amount of data read to disk in shuffle operations (sometimes required for large blocks, as opposed to the default behavior of reading into memory).
+            name: spark.stage.shuffle.io.disk
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: Amount of data read in shuffle operations in this stage.
+            name: spark.stage.shuffle.io.read.size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: source
+                      value:
+                        stringValue: local
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: source
+                      value:
+                        stringValue: remote
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: Number of records written or read in shuffle operations in this stage.
+            name: spark.stage.shuffle.io.records
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: in
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "10"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: out
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ record }'
+          - description: Amount of data written in shuffle operations in this stage.
+            name: spark.stage.shuffle.io.write.size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "698"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: Time spent blocking on writes to disk or buffer cache in this stage.
+            name: spark.stage.shuffle.write_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "42947581"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ns
+          - description: A one-hot encoding representing the status of this stage.
+            name: spark.stage.status
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: active
+                      value:
+                        boolValue: false
+                    - key: complete
+                      value:
+                        boolValue: true
+                    - key: pending
+                      value:
+                        boolValue: false
+                    - key: failed
+                      value:
+                        boolValue: false
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ status }'
+          - description: Number of active tasks in this stage.
+            name: spark.stage.task.active
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ task }'
+          - description: Number of tasks with a specific result in this stage.
+            name: spark.stage.task.result
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "10"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: completed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: failed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: killed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ task }'
+          - description: The amount of data transmitted back to the driver by all the tasks in this stage.
+            name: spark.stage.task.result_size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "15675"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+        scope:
+          name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver
+          version: latest
+  - resource:
+      attributes:
+        - key: spark.application.id
+          value:
+            stringValue: local-1684786598779
+        - key: spark.application.name
+          value:
+            stringValue: PythonStatusAPIDemo
+        - key: spark.executor.id
+          value:
+            stringValue: driver
+    scopeMetrics:
+      - metrics:
+          - description: Disk space used by this executor for RDD storage.
+            name: spark.executor.disk.usage
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: bytes
+          - description: Elapsed time the JVM spent in garbage collection in this executor.
+            name: spark.executor.gc_time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ms
+          - description: Amount of data input for this executor.
+            name: spark.executor.input_size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: Storage memory used by this executor.
+            name: spark.executor.memory.usage
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "6890"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: bytes
+          - description: Amount of data written and read during shuffle operations for this executor.
+            name: spark.executor.shuffle.io.size
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: in
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "698"
+                  attributes:
+                    - key: direction
+                      value:
+                        stringValue: out
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: bytes
+          - description: The executor's storage memory usage.
+            name: spark.executor.storage_memory.usage
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: on_heap
+                    - key: state
+                      value:
+                        stringValue: used
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: on_heap
+                    - key: state
+                      value:
+                        stringValue: free
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: off_heap
+                    - key: state
+                      value:
+                        stringValue: used
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: location
+                      value:
+                        stringValue: off_heap
+                    - key: state
+                      value:
+                        stringValue: free
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: bytes
+          - description: Number of tasks currently running in this executor.
+            name: spark.executor.task.active
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ task }'
+          - description: Maximum number of tasks that can run concurrently in this executor.
+            name: spark.executor.task.limit
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "6"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ task }'
+          - description: Number of tasks with a specific result in this executor.
+            name: spark.executor.task.result
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: failed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "10"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: completed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ task }'
+          - description: Elapsed time the JVM spent executing tasks in this executor.
+            name: spark.executor.time
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "6876"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: ms
+        scope:
+          name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver
+          version: latest
+  - resource:
+      attributes:
+        - key: spark.application.id
+          value:
+            stringValue: local-1684786598779
+        - key: spark.application.name
+          value:
+            stringValue: PythonStatusAPIDemo
+        - key: spark.job.id
+          value:
+            intValue: "0"
+    scopeMetrics:
+      - metrics:
+          - description: Number of active stages in this job.
+            name: spark.job.stage.active
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "1"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ stage }'
+          - description: Number of stages with a specific result in this job.
+            name: spark.job.stage.result
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "1"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: completed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: skipped
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: failed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ stage }'
+          - description: Number of active tasks in this job.
+            name: spark.job.task.active
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "0"
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+            unit: '{ task }'
+          - description: Number of tasks with a specific result in this job.
+            name: spark.job.task.result
+            sum:
+              aggregationTemporality: 2
+              dataPoints:
+                - asInt: "10"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: completed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: skipped
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+                - asInt: "0"
+                  attributes:
+                    - key: result
+                      value:
+                        stringValue: failed
+                  startTimeUnixNano: "1684786599036141000"
+                  timeUnixNano: "1684786605037452000"
+              isMonotonic: true
+            unit: '{ task }'
+        scope:
+          name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver
+          version: latest

From 5bfdf396f577841e90b1226f3e03d5f1f8eb2d69 Mon Sep 17 00:00:00 2001
From: Curtis Robert <crobert@splunk.com>
Date: Tue, 3 Sep 2024 12:14:38 -0700
Subject: [PATCH 2/9] Add more docker contents

---
 docker/apachespark/Dockerfile                 |  8 ++
 docker/apachespark/long_running.py            | 82 +++++++++++++++++++
 .../testdata/all_metrics_config.yaml          |  1 -
 3 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 docker/apachespark/Dockerfile
 create mode 100644 docker/apachespark/long_running.py

diff --git a/docker/apachespark/Dockerfile b/docker/apachespark/Dockerfile
new file mode 100644
index 0000000000..446ddbb26a
--- /dev/null
+++ b/docker/apachespark/Dockerfile
@@ -0,0 +1,8 @@
+FROM apache/spark:3.4.0-python3
+
+COPY ./long_running.py /opt/spark/examples/src/main/python/long_running.py
+RUN chmod +x /opt/spark/examples/src/main/python/long_running.py
+
+CMD [ "/opt/spark/bin/spark-submit", "/opt/spark/examples/src/main/python/long_running.py" ]
+
+EXPOSE 4040
diff --git a/docker/apachespark/long_running.py b/docker/apachespark/long_running.py
new file mode 100644
index 0000000000..0bd909779d
--- /dev/null
+++ b/docker/apachespark/long_running.py
@@ -0,0 +1,82 @@
+# Keep the original spark license.
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Original source: https://github.com/apache/spark/blob/master/examples/src/main/python/status_api_demo.py
+#
+# NOTICE:
+# Apache Spark
+# Copyright 2014 and onwards The Apache Software Foundation.
+#
+# This product includes software developed at
+# The Apache Software Foundation (http://www.apache.org/).
+#
+
+import time
+import threading
+import queue as Queue
+from typing import Any, Callable, List, Tuple
+
+from pyspark import SparkConf, SparkContext
+
+
+def delayed(seconds: int) -> Callable[[Any], Any]:
+    def f(x: int) -> int:
+        time.sleep(seconds)
+        return x
+    return f
+
+
+def call_in_background(f: Callable[..., Any], *args: Any) -> Queue.Queue:
+    result: Queue.Queue = Queue.Queue(1)
+    t = threading.Thread(target=lambda: result.put(f(*args)))
+    t.daemon = True
+    t.start()
+    return result
+
+
+def main() -> None:
+    conf = SparkConf().set("spark.ui.showConsoleProgress", "false")
+    sc = SparkContext(appName="PythonStatusAPIDemo", conf=conf)
+
+    def run() -> List[Tuple[int, int]]:
+        rdd = sc.parallelize(range(10), 10).map(delayed(2))
+        reduced = rdd.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
+        return reduced.map(delayed(2)).collect()
+
+    result = call_in_background(run)
+    status = sc.statusTracker()
+    while result.empty():
+        ids = status.getJobIdsForGroup()
+        for id in ids:
+            job = status.getJobInfo(id)
+            assert job is not None
+
+            print("Job", id, "status: ", job.status)
+            for sid in job.stageIds:
+                info = status.getStageInfo(sid)
+                if info:
+                    print("Stage %d: %d tasks total (%d active, %d complete)" %
+                          (sid, info.numTasks, info.numActiveTasks, info.numCompletedTasks))
+        time.sleep(1)
+
+    print("Job results are:", result.get())
+    sc.stop()
+
+
+if __name__ == "__main__":
+    for i in range(100):
+        main()
diff --git a/tests/receivers/apachespark/testdata/all_metrics_config.yaml b/tests/receivers/apachespark/testdata/all_metrics_config.yaml
index c2c6a695ae..0ae01e4663 100644
--- a/tests/receivers/apachespark/testdata/all_metrics_config.yaml
+++ b/tests/receivers/apachespark/testdata/all_metrics_config.yaml
@@ -1,4 +1,3 @@
-# ./bin/otelcontribcol_darwin_arm64 --config ./receiver/apachesparkreceiver/testdata/all_metrics_config.yaml
 receivers:
   apachespark:
     collection_interval: 15s

From 17ea06552424e6dd6b96cd248abe307dd42c8afa Mon Sep 17 00:00:00 2001
From: Curtis Robert <crobert@splunk.com>
Date: Tue, 3 Sep 2024 12:16:28 -0700
Subject: [PATCH 3/9] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3f8244a56f..aa3e88dc7d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
 
 ### 🚀 New components 🚀
 
-- (Splunk) Add `apachespark` receiver ([#]())
+- (Splunk) Add `apachespark` receiver ([#5318](https://github.com/signalfx/splunk-otel-collector/pull/5318))
 
 ## v0.108.1
 

From 522ba580f5a5ce84c7bae950dcdd56543b542b6d Mon Sep 17 00:00:00 2001
From: Curtis Robert <crobert@splunk.com>
Date: Tue, 3 Sep 2024 12:27:06 -0700
Subject: [PATCH 4/9] Fix formatting

---
 .../receivers/apachespark/apachespark_test.go | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/receivers/apachespark/apachespark_test.go b/tests/receivers/apachespark/apachespark_test.go
index 59dd78c044..e870b2bc09 100644
--- a/tests/receivers/apachespark/apachespark_test.go
+++ b/tests/receivers/apachespark/apachespark_test.go
@@ -29,15 +29,14 @@ const sparkPort = "4040"
 func TestApacheSparkIntegration(t *testing.T) {
 	testutils.CheckGoldenFile(t, "all_metrics_config.yaml", "all_expected_metrics.yaml",
 		pmetrictest.IgnoreMetricValues(),
-			pmetrictest.IgnoreStartTimestamp(),
-			pmetrictest.IgnoreTimestamp(),
-			pmetrictest.IgnoreResourceAttributeValue("spark.application.id"),
-			pmetrictest.IgnoreResourceAttributeValue("spark.application.name"),
-			pmetrictest.IgnoreMetricAttributeValue("active", "spark.stage.status"),
-			pmetrictest.IgnoreMetricAttributeValue("complete", "spark.stage.status"),
-			pmetrictest.IgnoreMetricAttributeValue("failed", "spark.stage.status"),
-			pmetrictest.IgnoreMetricAttributeValue("pending", "spark.stage.status"),
-			pmetrictest.IgnoreMetricDataPointsOrder(),
-		),
+		pmetrictest.IgnoreStartTimestamp(),
+		pmetrictest.IgnoreTimestamp(),
+		pmetrictest.IgnoreResourceAttributeValue("spark.application.id"),
+		pmetrictest.IgnoreResourceAttributeValue("spark.application.name"),
+		pmetrictest.IgnoreMetricAttributeValue("active", "spark.stage.status"),
+		pmetrictest.IgnoreMetricAttributeValue("complete", "spark.stage.status"),
+		pmetrictest.IgnoreMetricAttributeValue("failed", "spark.stage.status"),
+		pmetrictest.IgnoreMetricAttributeValue("pending", "spark.stage.status"),
+		pmetrictest.IgnoreMetricDataPointsOrder(),
 	).Run(t)
 }

From 5af0a0815c5377828f392aa2f16846695ec226ef Mon Sep 17 00:00:00 2001
From: Curtis Robert <crobert@splunk.com>
Date: Tue, 3 Sep 2024 14:13:20 -0700
Subject: [PATCH 5/9] Fix golden file test format

---
 tests/receivers/apachespark/apachespark_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/receivers/apachespark/apachespark_test.go b/tests/receivers/apachespark/apachespark_test.go
index e870b2bc09..8113e76eb3 100644
--- a/tests/receivers/apachespark/apachespark_test.go
+++ b/tests/receivers/apachespark/apachespark_test.go
@@ -38,5 +38,5 @@ func TestApacheSparkIntegration(t *testing.T) {
 		pmetrictest.IgnoreMetricAttributeValue("failed", "spark.stage.status"),
 		pmetrictest.IgnoreMetricAttributeValue("pending", "spark.stage.status"),
 		pmetrictest.IgnoreMetricDataPointsOrder(),
-	).Run(t)
+	)
 }

From 11ed17f34094b3fef462c9f9f8a140b407eec13e Mon Sep 17 00:00:00 2001
From: Curtis Robert <crobert@splunk.com>
Date: Tue, 3 Sep 2024 14:44:40 -0700
Subject: [PATCH 6/9] rename test file

---
 .../{all_metrics_expected.yaml => all_expected_metrics.yaml}      | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/receivers/apachespark/testdata/{all_metrics_expected.yaml => all_expected_metrics.yaml} (100%)

diff --git a/tests/receivers/apachespark/testdata/all_metrics_expected.yaml b/tests/receivers/apachespark/testdata/all_expected_metrics.yaml
similarity index 100%
rename from tests/receivers/apachespark/testdata/all_metrics_expected.yaml
rename to tests/receivers/apachespark/testdata/all_expected_metrics.yaml

From b3dd491dc495031d50cd246f4b968aa5be784ec7 Mon Sep 17 00:00:00 2001
From: Curtis Robert <crobert@splunk.com>
Date: Tue, 3 Sep 2024 15:39:07 -0700
Subject: [PATCH 7/9] Ignore scope version in testing

---
 tests/receivers/apachespark/apachespark_test.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/receivers/apachespark/apachespark_test.go b/tests/receivers/apachespark/apachespark_test.go
index 8113e76eb3..e3ebd989e1 100644
--- a/tests/receivers/apachespark/apachespark_test.go
+++ b/tests/receivers/apachespark/apachespark_test.go
@@ -31,6 +31,7 @@ func TestApacheSparkIntegration(t *testing.T) {
 		pmetrictest.IgnoreMetricValues(),
 		pmetrictest.IgnoreStartTimestamp(),
 		pmetrictest.IgnoreTimestamp(),
+		pmetrictest.IgnoreScopeVersion(),
 		pmetrictest.IgnoreResourceAttributeValue("spark.application.id"),
 		pmetrictest.IgnoreResourceAttributeValue("spark.application.name"),
 		pmetrictest.IgnoreMetricAttributeValue("active", "spark.stage.status"),

From 66b857ffc36d588c842dd098e5eb10ebdfc7de37 Mon Sep 17 00:00:00 2001
From: Curtis Robert <crobert@splunk.com>
Date: Thu, 5 Sep 2024 16:16:06 -0700
Subject: [PATCH 8/9] Update internal/components/components.go

---
 internal/components/components.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/components/components.go b/internal/components/components.go
index de913fc0c6..ee329619b4 100644
--- a/internal/components/components.go
+++ b/internal/components/components.go
@@ -165,7 +165,7 @@ func Get() (otelcol.Factories, error) {
 	receivers, err := receiver.MakeFactoryMap(
 		activedirectorydsreceiver.NewFactory(),
 		apachereceiver.NewFactory(),
-    apachesparkreceiver.NewFactory(),
+		apachesparkreceiver.NewFactory(),
 		awscontainerinsightreceiver.NewFactory(),
 		awsecscontainermetricsreceiver.NewFactory(),
 		azureblobreceiver.NewFactory(),

From 27e817c7c2d7c577277df25f3ca84e5583a7a478 Mon Sep 17 00:00:00 2001
From: Curtis Robert <crobert@splunk.com>
Date: Fri, 6 Sep 2024 07:51:14 -0700
Subject: [PATCH 9/9] Fix fmt error

---
 internal/components/components_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/components/components_test.go b/internal/components/components_test.go
index 4eea654f6c..7e1b31cc93 100644
--- a/internal/components/components_test.go
+++ b/internal/components/components_test.go
@@ -46,7 +46,7 @@ func TestDefaultComponents(t *testing.T) {
 	expectedReceivers := []string{
 		"active_directory_ds",
 		"apache",
- 		"apachespark",
+		"apachespark",
 		"awscontainerinsightreceiver",
 		"awsecscontainermetrics",
 		"azureblob",