Skip to content

Commit

Permalink
feat(prometheus): add prometheus support (#134)
Browse files Browse the repository at this point in the history
### Description

This PR adds support for monitoring with Prometheus, by:
- adding a new top-level `observability` parameter section
- deploying a Prometheus server
- conditionally exposing metrics endpoints on all EL/CL clients, as well
as OP services
- registering metrics jobs for each with Prometheus

The `prometheus` module from the `ethereum-package` package was used as
inspiration, but modified to add helper methods and improve the
job-registration workflow.

This PR has been tested and successfully deploys a Prometheus server
with functioning metrics scrape jobs:
<img width="1786" alt="image"
src="https://github.com/user-attachments/assets/397d5c5c-2465-40cd-a18e-9c581fb30ace"
/>
  • Loading branch information
edobry authored Jan 15, 2025
1 parent d2aadba commit e22047a
Show file tree
Hide file tree
Showing 21 changed files with 784 additions and 315 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,24 @@ The full YAML schema that can be passed in is as follows with the defaults provi

```yaml
optimism_package:
# Observability configuration
observability:
# Whether or not to configure observability (e.g. prometheus)
enabled: true
# Default prometheus configuration
prometheus_params:
storage_tsdb_retention_time: "1d"
storage_tsdb_retention_size: "512MB"
# Resource management for prometheus container
# CPU is milicores
# RAM is in MB
min_cpu: 10
max_cpu: 1000
min_mem: 128
max_mem: 2048
# Prometheus docker image to use
# Defaults to the latest image
image: "prom/prometheus:latest"
# Interop configuration
interop:
# Whether or not to enable interop mode
Expand Down
17 changes: 17 additions & 0 deletions main.star
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ l2_launcher = import_module("./src/l2.star")
op_supervisor_launcher = import_module(
"./src/interop/op-supervisor/op_supervisor_launcher.star"
)

observability = import_module("./src/observability/observability.star")
prometheus = import_module("./src/observability/prometheus/prometheus_launcher.star")

wait_for_sync = import_module("./src/wait/wait_for_sync.star")
input_parser = import_module("./src/package_io/input_parser.star")
ethereum_package_static_files = import_module(
Expand Down Expand Up @@ -40,8 +44,11 @@ def run(plan, args):
global_log_level = optimism_args_with_right_defaults.global_log_level
persistent = optimism_args_with_right_defaults.persistent

observability_params = optimism_args_with_right_defaults.observability
interop_params = optimism_args_with_right_defaults.interop

observability_helper = observability.make_helper(observability_params)

# Deploy the L1
l1_network = ""
if external_l1_args:
Expand Down Expand Up @@ -109,6 +116,7 @@ def run(plan, args):
global_node_selectors,
global_tolerations,
persistent,
observability_helper,
interop_params,
)

Expand All @@ -120,6 +128,15 @@ def run(plan, args):
all_participants,
jwt_file,
interop_params.supervisor_params,
observability_helper,
)

if observability_helper.enabled:
plan.print("Launching prometheus...")
prometheus_private_url = prometheus.launch_prometheus(
plan,
observability_helper,
global_node_selectors,
)


Expand Down
4 changes: 0 additions & 4 deletions network_params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ optimism_package:
chains:
- participants:
- el_type: op-geth
el_image: ""
el_log_level: ""
el_extra_env_vars: {}
el_extra_labels: {}
Expand All @@ -14,7 +13,6 @@ optimism_package:
el_min_mem: 0
el_max_mem: 0
cl_type: op-node
cl_image: ""
cl_log_level: ""
cl_extra_env_vars: {}
cl_extra_labels: {}
Expand All @@ -37,10 +35,8 @@ optimism_package:
granite_time_offset: 0
fund_dev_accounts: true
batcher_params:
image: ""
extra_params: []
mev_params:
rollup_boost_image: ""
builder_host: ""
builder_port: ""
additional_services: []
Expand Down
16 changes: 15 additions & 1 deletion src/batcher/op-batcher/op_batcher_launcher.star
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ ethereum_package_constants = import_module(
"github.com/ethpandaops/ethereum-package/src/package_io/constants.star"
)

observability = import_module("../../observability/observability.star")
prometheus = import_module("../../observability/prometheus/prometheus_launcher.star")

#
# ---------------------------------- Batcher client -------------------------------------
# The Docker container runs as the "op-batcher" user so we can't write to root
Expand Down Expand Up @@ -41,6 +44,7 @@ def launch(
l1_config_env_vars,
gs_batcher_private_key,
batcher_params,
observability_helper,
):
batcher_service_name = "{0}".format(service_name)

Expand All @@ -53,6 +57,7 @@ def launch(
l1_config_env_vars,
gs_batcher_private_key,
batcher_params,
observability_helper,
)

batcher_service = plan.add_service(service_name, config)
Expand All @@ -62,6 +67,8 @@ def launch(
batcher_service.ip_address, batcher_http_port.number
)

observability.register_op_service_metrics_job(observability_helper, batcher_service)

return "op_batcher"


Expand All @@ -74,7 +81,10 @@ def get_batcher_config(
l1_config_env_vars,
gs_batcher_private_key,
batcher_params,
observability_helper,
):
ports = dict(get_used_ports())

cmd = [
"op-batcher",
"--l2-eth-rpc=" + el_context.rpc_http_url,
Expand All @@ -93,9 +103,13 @@ def get_batcher_config(
"--data-availability-type=blobs",
]

# apply customizations

if observability_helper.enabled:
observability.configure_op_service_metrics(cmd, ports)

cmd += batcher_params.extra_params

ports = get_used_ports()
return ServiceConfig(
image=image,
ports=ports,
Expand Down
33 changes: 28 additions & 5 deletions src/challenger/op-challenger/op_challenger_launcher.star
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ ethereum_package_constants = import_module(
"github.com/ethpandaops/ethereum-package/src/package_io/constants.star"
)

observability = import_module("../../observability/observability.star")
prometheus = import_module("../../observability/prometheus/prometheus_launcher.star")

#
# ---------------------------------- Challenger client -------------------------------------
CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER = "/data/op-challenger/op-challenger-data"
Expand All @@ -29,6 +32,7 @@ def launch(
deployment_output,
network_params,
challenger_params,
observability_helper,
):
challenger_service_name = "{0}".format(service_name)

Expand All @@ -44,10 +48,15 @@ def launch(
deployment_output,
network_params,
challenger_params,
observability_helper,
)

challenger_service = plan.add_service(service_name, config)

observability.register_op_service_metrics_job(
observability_helper, challenger_service
)

return "op_challenger"


Expand All @@ -63,15 +72,22 @@ def get_challenger_config(
deployment_output,
network_params,
challenger_params,
observability_helper,
):
ports = dict(get_used_ports())

cmd = [
"op-challenger",
"--cannon-l2-genesis="
+ ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS
+ "/genesis-{0}.json".format(network_params.network_id),
+ "{0}/genesis-{1}.json".format(
ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS,
network_params.network_id,
),
"--cannon-rollup-config="
+ ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS
+ "/rollup-{0}.json".format(network_params.network_id),
+ "{0}/rollup-{1}.json".format(
ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS,
network_params.network_id,
),
"--game-factory-address=" + game_factory_address,
"--datadir=" + CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER,
"--l1-beacon=" + l1_config_env_vars["CL_RPC_URL"],
Expand All @@ -81,10 +97,18 @@ def get_challenger_config(
"--rollup-rpc=" + cl_context.beacon_http_url,
"--trace-type=" + "cannon,permissioned",
]

# configure files

files = {
ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: deployment_output,
}

# apply customizations

if observability_helper.enabled:
observability.configure_op_service_metrics(cmd, ports)

if (
challenger_params.cannon_prestate_path
and challenger_params.cannon_prestates_url
Expand All @@ -107,7 +131,6 @@ def get_challenger_config(
CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER, " ".join(cmd)
)

ports = get_used_ports()
return ServiceConfig(
image=image,
ports=ports,
Expand Down
Loading

0 comments on commit e22047a

Please sign in to comment.