diff --git a/CHANGELOG.md b/CHANGELOG.md index 48b5a7ebc6..6dc55b5a02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ - (Splunk) Update Python to 3.12.5 in the Smart Agent bundle for Linux and Windows. Check [Whatโ€™s New In Python 3.12](https://docs.python.org/3/whatsnew/3.12.html) for details. ([#5298](https://github.com/signalfx/splunk-otel-collector/pull/5298)) +### ๐Ÿš€ New components ๐Ÿš€ + +- (Splunk) Add `apachespark` receiver ([#5318](https://github.com/signalfx/splunk-otel-collector/pull/5318)) + ## v0.108.1 This Splunk OpenTelemetry Collector release includes changes from the [opentelemetry-collector v0.108.1](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.108.1) and the [opentelemetry-collector-contrib v0.108.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.108.0) releases where appropriate. diff --git a/docker/apachespark/Dockerfile b/docker/apachespark/Dockerfile new file mode 100644 index 0000000000..446ddbb26a --- /dev/null +++ b/docker/apachespark/Dockerfile @@ -0,0 +1,8 @@ +FROM apache/spark:3.4.0-python3 + +COPY ./long_running.py /opt/spark/examples/src/main/python/long_running.py +RUN chmod +x /opt/spark/examples/src/main/python/long_running.py + +CMD [ "/opt/spark/bin/spark-submit", "/opt/spark/examples/src/main/python/long_running.py" ] + +EXPOSE 4040 diff --git a/docker/apachespark/long_running.py b/docker/apachespark/long_running.py new file mode 100644 index 0000000000..0bd909779d --- /dev/null +++ b/docker/apachespark/long_running.py @@ -0,0 +1,82 @@ +# Keep the original spark license. +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Original source: https://github.com/apache/spark/blob/master/examples/src/main/python/status_api_demo.py +# +# NOTICE: +# Apache Spark +# Copyright 2014 and onwards The Apache Software Foundation. +# +# This product includes software developed at +# The Apache Software Foundation (http://www.apache.org/). +# + +import time +import threading +import queue as Queue +from typing import Any, Callable, List, Tuple + +from pyspark import SparkConf, SparkContext + + +def delayed(seconds: int) -> Callable[[Any], Any]: + def f(x: int) -> int: + time.sleep(seconds) + return x + return f + + +def call_in_background(f: Callable[..., Any], *args: Any) -> Queue.Queue: + result: Queue.Queue = Queue.Queue(1) + t = threading.Thread(target=lambda: result.put(f(*args))) + t.daemon = True + t.start() + return result + + +def main() -> None: + conf = SparkConf().set("spark.ui.showConsoleProgress", "false") + sc = SparkContext(appName="PythonStatusAPIDemo", conf=conf) + + def run() -> List[Tuple[int, int]]: + rdd = sc.parallelize(range(10), 10).map(delayed(2)) + reduced = rdd.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y) + return reduced.map(delayed(2)).collect() + + result = call_in_background(run) + status = sc.statusTracker() + while result.empty(): + ids = status.getJobIdsForGroup() + for id in ids: + job = status.getJobInfo(id) + assert job is not None + + print("Job", id, "status: ", job.status) + for sid in job.stageIds: + info = status.getStageInfo(sid) + if info: + print("Stage %d: %d tasks total (%d active, %d complete)" % + (sid, info.numTasks, info.numActiveTasks, info.numCompletedTasks)) + time.sleep(1) + + print("Job results are:", result.get()) + sc.stop() + + +if __name__ == "__main__": + for i in range(100): + main() diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 837a6ba7ea..1f0daadc67 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -15,6 +15,13 @@ services: build: ./apache ports: - "18080:80" + apachespark: + image: quay.io/splunko11ytest/apachespark:latest + profiles: + - integration + build: ./apachespark + ports: + - "4040:4040" # Cassandra image for collectd-cassandra test: cassandra: image: quay.io/splunko11ytest/cassandra:latest diff --git a/docs/components.md b/docs/components.md index e0d9f58212..9a50f89847 100644 --- a/docs/components.md +++ b/docs/components.md @@ -15,6 +15,7 @@ The distribution offers support for the following components. |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------| | [active_directory_ds](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/activedirectorydsreceiver) | [beta] | | [apache](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/apachereceiver) | [alpha] | +| [apachespark](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/apachesparkreceiver) | [alpha] | | [awscontainerinsights](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/awscontainerinsightreceiver) | [beta] | | [awsecscontainermetrics](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/awsecscontainermetricsreceiver) | [beta] | | [azureblob](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/azureblobreceiver) | [alpha] | diff --git a/go.mod b/go.mod index f5df9ab515..52831181f5 100644 --- a/go.mod +++ b/go.mod @@ -62,6 +62,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirectorydsreceiver v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver v0.108.0 + github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver v0.108.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/azureblobreceiver v0.108.0 diff --git a/go.sum b/go.sum index b0fcb77483..4688d8b454 100644 --- a/go.sum +++ b/go.sum @@ -1394,6 +1394,8 @@ github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirecto github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirectorydsreceiver v0.108.0/go.mod h1:Zn5FJSJ2h6DUw84ZqwPiJFCXZWsEeBo++SV3xNeSuO4= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver v0.108.0 h1:RFB7p/HUt46C4HDZjwwDDYYv+ipn/SVTpbuLgXzEbZQ= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver v0.108.0/go.mod h1:7o6+Ojnd9enSTTVUfzORsgMT83qy0Ds0rNl6CXJb5cQ= +github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver v0.108.0 h1:alseMqI1EDNxMBxfBMQde3r0g/KUauEWbm/u6vKzjnM= +github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver v0.108.0/go.mod h1:GgM4G2KU5/CYVKUpnJkTsud8B6OvOlnNm6gz3KjsuwU= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.108.0 h1:GQPIFe/91bnSymg3/4cfW6kbQD6+VMCX8yPzgaVGoaQ= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver v0.108.0/go.mod h1:BK/JiWZQtGM94UOwa+JWgI6ABUtJExPvM82uTQPXOns= github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver v0.108.0 h1:4/SyCI9608uBuk0qkl7BWboXxWlArSys/qHBU/Wk4fQ= diff --git a/internal/components/components.go b/internal/components/components.go index 0207cf2cbb..ee329619b4 100644 --- a/internal/components/components.go +++ b/internal/components/components.go @@ -57,6 +57,7 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/activedirectorydsreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachereceiver" + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awscontainerinsightreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsecscontainermetricsreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/azureblobreceiver" @@ -164,6 +165,7 @@ func Get() (otelcol.Factories, error) { receivers, err := receiver.MakeFactoryMap( activedirectorydsreceiver.NewFactory(), apachereceiver.NewFactory(), + apachesparkreceiver.NewFactory(), awscontainerinsightreceiver.NewFactory(), awsecscontainermetricsreceiver.NewFactory(), azureblobreceiver.NewFactory(), diff --git a/internal/components/components_test.go b/internal/components/components_test.go index 1deb781d50..7e1b31cc93 100644 --- a/internal/components/components_test.go +++ b/internal/components/components_test.go @@ -46,6 +46,7 @@ func TestDefaultComponents(t *testing.T) { expectedReceivers := []string{ "active_directory_ds", "apache", + "apachespark", "awscontainerinsightreceiver", "awsecscontainermetrics", "azureblob", diff --git a/tests/receivers/apachespark/apachespark_test.go b/tests/receivers/apachespark/apachespark_test.go new file mode 100644 index 0000000000..e3ebd989e1 --- /dev/null +++ b/tests/receivers/apachespark/apachespark_test.go @@ -0,0 +1,43 @@ +// Copyright Splunk, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build integration + +package tests + +import ( + "testing" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest/pmetrictest" + + "github.com/signalfx/splunk-otel-collector/tests/testutils" +) + +const sparkPort = "4040" + +func TestApacheSparkIntegration(t *testing.T) { + testutils.CheckGoldenFile(t, "all_metrics_config.yaml", "all_expected_metrics.yaml", + pmetrictest.IgnoreMetricValues(), + pmetrictest.IgnoreStartTimestamp(), + pmetrictest.IgnoreTimestamp(), + pmetrictest.IgnoreScopeVersion(), + pmetrictest.IgnoreResourceAttributeValue("spark.application.id"), + pmetrictest.IgnoreResourceAttributeValue("spark.application.name"), + pmetrictest.IgnoreMetricAttributeValue("active", "spark.stage.status"), + pmetrictest.IgnoreMetricAttributeValue("complete", "spark.stage.status"), + pmetrictest.IgnoreMetricAttributeValue("failed", "spark.stage.status"), + pmetrictest.IgnoreMetricAttributeValue("pending", "spark.stage.status"), + pmetrictest.IgnoreMetricDataPointsOrder(), + ) +} diff --git a/tests/receivers/apachespark/testdata/all_expected_metrics.yaml b/tests/receivers/apachespark/testdata/all_expected_metrics.yaml new file mode 100644 index 0000000000..1bb2f22256 --- /dev/null +++ b/tests/receivers/apachespark/testdata/all_expected_metrics.yaml @@ -0,0 +1,1249 @@ +resourceMetrics: + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + scopeMetrics: + - metrics: + - description: Disk space used by the BlockManager. + name: spark.driver.block_manager.disk.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: mb + - description: Memory usage for the driver's BlockManager. + name: spark.driver.block_manager.memory.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + - key: state + value: + stringValue: used + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + - key: state + value: + stringValue: used + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + - key: state + value: + stringValue: free + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "434" + attributes: + - key: location + value: + stringValue: on_heap + - key: state + value: + stringValue: free + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: mb + - description: Average time spent during CodeGenerator source code compilation operations. + gauge: + dataPoints: + - asDouble: 0 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.code_generator.compilation.average_time + unit: ms + - description: Number of source code compilation operations performed by the CodeGenerator. + name: spark.driver.code_generator.compilation.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ compilation }' + - description: Average class size of the classes generated by the CodeGenerator. + gauge: + dataPoints: + - asDouble: 0 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.code_generator.generated_class.average_size + unit: bytes + - description: Number of classes generated by the CodeGenerator. + name: spark.driver.code_generator.generated_class.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ class }' + - description: Average method size of the classes generated by the CodeGenerator. + gauge: + dataPoints: + - asDouble: 0 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.code_generator.generated_method.average_size + unit: bytes + - description: Number of methods generated by the CodeGenerator. + name: spark.driver.code_generator.generated_method.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ method }' + - description: Average size of the source code generated by a CodeGenerator code generation operation. + gauge: + dataPoints: + - asDouble: 0 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.code_generator.source_code.average_size + unit: bytes + - description: Number of source code generation operations performed by the CodeGenerator. + name: spark.driver.code_generator.source_code.operations + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ operation }' + - description: Number of active jobs currently being processed by the DAGScheduler. + name: spark.driver.dag_scheduler.job.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ job }' + - description: Number of jobs that have been submitted to the DAGScheduler. + name: spark.driver.dag_scheduler.job.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ job }' + - description: Number of stages the DAGScheduler is either running or needs to run. + name: spark.driver.dag_scheduler.stage.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + attributes: + - key: status + value: + stringValue: running + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: status + value: + stringValue: waiting + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ stage }' + - description: Number of failed stages run by the DAGScheduler. + name: spark.driver.dag_scheduler.stage.failed + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ stage }' + - description: Number of garbage collection operations performed by the driver. + name: spark.driver.executor.gc.operations + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: gc_type + value: + stringValue: minor + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: gc_type + value: + stringValue: major + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ gc_operation }' + - description: Total elapsed time during garbage collection operations performed by the driver. + name: spark.driver.executor.gc.time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: gc_type + value: + stringValue: minor + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: gc_type + value: + stringValue: major + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Amount of execution memory currently used by the driver. + name: spark.driver.executor.memory.execution + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Amount of memory used by the driver's JVM. + name: spark.driver.executor.memory.jvm + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Amount of pool memory currently used by the driver. + name: spark.driver.executor.memory.pool + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: type + value: + stringValue: direct + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: type + value: + stringValue: mapped + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Amount of storage memory currently used by the driver. + name: spark.driver.executor.memory.storage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Number of file cache hits on the HiveExternalCatalog. + name: spark.driver.hive_external_catalog.file_cache_hits + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ hit }' + - description: Number of files discovered while listing the partitions of a table in the Hive metastore + name: spark.driver.hive_external_catalog.files_discovered + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ file }' + - description: Number of calls to the underlying Hive Metastore client made by the Spark application. + name: spark.driver.hive_external_catalog.hive_client_calls + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ call }' + - description: Number of parallel listing jobs initiated by the HiveExternalCatalog when listing partitions of a table. + name: spark.driver.hive_external_catalog.parallel_listing_jobs + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ listing_job }' + - description: Table partitions fetched by the HiveExternalCatalog. + name: spark.driver.hive_external_catalog.partitions_fetched + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ partition }' + - description: Current CPU time taken by the Spark driver. + name: spark.driver.jvm_cpu_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "8330000000" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: Number of events that have been dropped by the LiveListenerBus. + name: spark.driver.live_listener_bus.dropped + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ event }' + - description: Number of events that have been posted on the LiveListenerBus. + name: spark.driver.live_listener_bus.posted + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "37" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ event }' + - description: Average time taken for the LiveListenerBus to process an event posted to it. + gauge: + dataPoints: + - asDouble: 1.743476601344461 + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + name: spark.driver.live_listener_bus.processing_time.average + unit: ms + - description: Number of events currently waiting to be processed by the LiveListenerBus. + name: spark.driver.live_listener_bus.queue_size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ event }' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + - key: spark.stage.id + value: + intValue: "1" + scopeMetrics: + - metrics: + - description: The amount of disk space used for storing portions of overly large data chunks that couldn't fit in memory in this stage. + name: spark.stage.disk.spilled + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: CPU time spent by the executor in this stage. + name: spark.stage.executor.cpu_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: Amount of time spent by the executor in this stage. + name: spark.stage.executor.run_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Number of records written and read in this stage. + name: spark.stage.io.records + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ record }' + - description: Amount of data written and read at this stage. + name: spark.stage.io.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The amount of time the JVM spent on garbage collection in this stage. + name: spark.stage.jvm_gc_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Peak memory used by internal data structures created during shuffles, aggregations and joins in this stage. + name: spark.stage.memory.peak + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The amount of memory moved to disk due to size constraints (spilled) in this stage. + name: spark.stage.memory.spilled + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Number of blocks fetched in shuffle operations in this stage. + name: spark.stage.shuffle.blocks_fetched + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: source + value: + stringValue: remote + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: source + value: + stringValue: local + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ block }' + - description: Time spent in this stage waiting for remote shuffle blocks. + name: spark.stage.shuffle.fetch_wait_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Amount of data read to disk in shuffle operations (sometimes required for large blocks, as opposed to the default behavior of reading into memory). + name: spark.stage.shuffle.io.disk + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Amount of data read in shuffle operations in this stage. + name: spark.stage.shuffle.io.read.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: source + value: + stringValue: local + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: source + value: + stringValue: remote + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Number of records written or read in shuffle operations in this stage. + name: spark.stage.shuffle.io.records + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ record }' + - description: Amount of data written in shuffle operations in this stage. + name: spark.stage.shuffle.io.write.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Time spent blocking on writes to disk or buffer cache in this stage. + name: spark.stage.shuffle.write_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: A one-hot encoding representing the status of this stage. + name: spark.stage.status + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: active + value: + boolValue: true + - key: complete + value: + boolValue: false + - key: pending + value: + boolValue: false + - key: failed + value: + boolValue: false + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ status }' + - description: Number of active tasks in this stage. + name: spark.stage.task.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Number of tasks with a specific result in this stage. + name: spark.stage.task.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: killed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ task }' + - description: The amount of data transmitted back to the driver by all the tasks in this stage. + name: spark.stage.task.result_size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + - key: spark.stage.id + value: + intValue: "0" + scopeMetrics: + - metrics: + - description: The amount of disk space used for storing portions of overly large data chunks that couldn't fit in memory in this stage. + name: spark.stage.disk.spilled + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: CPU time spent by the executor in this stage. + name: spark.stage.executor.cpu_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "365309717" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: Amount of time spent by the executor in this stage. + name: spark.stage.executor.run_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "23131" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Number of records written and read in this stage. + name: spark.stage.io.records + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ record }' + - description: Amount of data written and read at this stage. + name: spark.stage.io.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The amount of time the JVM spent on garbage collection in this stage. + name: spark.stage.jvm_gc_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "58" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Peak memory used by internal data structures created during shuffles, aggregations and joins in this stage. + name: spark.stage.memory.peak + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The amount of memory moved to disk due to size constraints (spilled) in this stage. + name: spark.stage.memory.spilled + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Number of blocks fetched in shuffle operations in this stage. + name: spark.stage.shuffle.blocks_fetched + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: source + value: + stringValue: remote + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: source + value: + stringValue: local + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ block }' + - description: Time spent in this stage waiting for remote shuffle blocks. + name: spark.stage.shuffle.fetch_wait_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Amount of data read to disk in shuffle operations (sometimes required for large blocks, as opposed to the default behavior of reading into memory). + name: spark.stage.shuffle.io.disk + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Amount of data read in shuffle operations in this stage. + name: spark.stage.shuffle.io.read.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: source + value: + stringValue: local + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: source + value: + stringValue: remote + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Number of records written or read in shuffle operations in this stage. + name: spark.stage.shuffle.io.records + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "10" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ record }' + - description: Amount of data written in shuffle operations in this stage. + name: spark.stage.shuffle.io.write.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "698" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Time spent blocking on writes to disk or buffer cache in this stage. + name: spark.stage.shuffle.write_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "42947581" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ns + - description: A one-hot encoding representing the status of this stage. + name: spark.stage.status + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: active + value: + boolValue: false + - key: complete + value: + boolValue: true + - key: pending + value: + boolValue: false + - key: failed + value: + boolValue: false + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ status }' + - description: Number of active tasks in this stage. + name: spark.stage.task.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Number of tasks with a specific result in this stage. + name: spark.stage.task.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "10" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: killed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ task }' + - description: The amount of data transmitted back to the driver by all the tasks in this stage. + name: spark.stage.task.result_size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "15675" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + - key: spark.executor.id + value: + stringValue: driver + scopeMetrics: + - metrics: + - description: Disk space used by this executor for RDD storage. + name: spark.executor.disk.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Elapsed time the JVM spent in garbage collection in this executor. + name: spark.executor.gc_time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + - description: Amount of data input for this executor. + name: spark.executor.input_size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: Storage memory used by this executor. + name: spark.executor.memory.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "6890" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Amount of data written and read during shuffle operations for this executor. + name: spark.executor.shuffle.io.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: direction + value: + stringValue: in + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "698" + attributes: + - key: direction + value: + stringValue: out + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: bytes + - description: The executor's storage memory usage. + name: spark.executor.storage_memory.usage + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + - key: state + value: + stringValue: used + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: on_heap + - key: state + value: + stringValue: free + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + - key: state + value: + stringValue: used + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: location + value: + stringValue: off_heap + - key: state + value: + stringValue: free + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: bytes + - description: Number of tasks currently running in this executor. + name: spark.executor.task.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Maximum number of tasks that can run concurrently in this executor. + name: spark.executor.task.limit + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "6" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Number of tasks with a specific result in this executor. + name: spark.executor.task.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "10" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ task }' + - description: Elapsed time the JVM spent executing tasks in this executor. + name: spark.executor.time + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "6876" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: ms + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest + - resource: + attributes: + - key: spark.application.id + value: + stringValue: local-1684786598779 + - key: spark.application.name + value: + stringValue: PythonStatusAPIDemo + - key: spark.job.id + value: + intValue: "0" + scopeMetrics: + - metrics: + - description: Number of active stages in this job. + name: spark.job.stage.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ stage }' + - description: Number of stages with a specific result in this job. + name: spark.job.stage.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: skipped + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ stage }' + - description: Number of active tasks in this job. + name: spark.job.task.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + unit: '{ task }' + - description: Number of tasks with a specific result in this job. + name: spark.job.task.result + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "10" + attributes: + - key: result + value: + stringValue: completed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: skipped + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + - asInt: "0" + attributes: + - key: result + value: + stringValue: failed + startTimeUnixNano: "1684786599036141000" + timeUnixNano: "1684786605037452000" + isMonotonic: true + unit: '{ task }' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/apachesparkreceiver + version: latest diff --git a/tests/receivers/apachespark/testdata/all_metrics_config.yaml b/tests/receivers/apachespark/testdata/all_metrics_config.yaml new file mode 100644 index 0000000000..0ae01e4663 --- /dev/null +++ b/tests/receivers/apachespark/testdata/all_metrics_config.yaml @@ -0,0 +1,20 @@ +receivers: + apachespark: + collection_interval: 15s + +exporters: + otlp: + endpoint: "${OTLP_ENDPOINT}" + tls: + insecure: true + +service: + telemetry: + logs: + level: "info" + pipelines: + metrics: + receivers: + - apachespark + exporters: + - otlp