Skip to content

Commit

Permalink
Collect scheduler memray profiles when benchmarking geo
Browse files Browse the repository at this point in the history
  • Loading branch information
hendrikmakait committed Oct 23, 2024
1 parent bf231bb commit e35661b
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 2 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ab_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ jobs:
DB_NAME: ${{ matrix.runtime-version }}-${{ matrix.repeat }}.db
CLUSTER_KWARGS: AB_environments/${{ matrix.runtime-version }}.cluster.yaml
H2O_DATASETS: ${{ matrix.h2o_datasets }}
MEMRAY_PROFILE: "scheduler"
run: pytest --benchmark ${{ matrix.pytest_args }}

- name: Dump coiled.Cluster kwargs
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ jobs:
COILED_RUNTIME_VERSION: ${{ matrix.runtime-version }}
DB_NAME: ${{ matrix.name_prefix }}-${{ matrix.os }}-py${{ matrix.python_version }}.db
DASK_DATAFRAME__QUERY_PLANNING: True
MEMRAY_PROFILE: "scheduler"
run: |
pytest --benchmark -n 4 --dist loadscope ${{ env.PYTEST_MARKERS }} ${{ matrix.pytest_args }}
Expand Down
1 change: 1 addition & 0 deletions AB_environments/AB_baseline.conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ dependencies:
- pystac-client ==0.8.3
- odc-stac ==0.3.10
- adlfs ==2024.7.0
- memray ==1.13.4
# End copy-paste

- pip:
Expand Down
1 change: 1 addition & 0 deletions AB_environments/AB_sample.conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ dependencies:
- pystac-client ==0.8.3
- odc-stac ==0.3.10
- adlfs ==2024.7.0
- memray ==1.13.4
# End copy-paste

- pip:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Add column for Memray profiles url
Revision ID: 1095dfdfc4ae
Revises: 2d2405ad763b
Create Date: 2024-10-23 11:11:15.238042
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '1095dfdfc4ae'
down_revision = '2d2405ad763b'
branch_labels = None
depends_on = None


def upgrade() -> None:
op.add_column('test_run', sa.Column('memray_profiles_url', sa.String(), nullable=True))


def downgrade() -> None:
op.drop_column("test_run", "memray_profiles_url")
1 change: 1 addition & 0 deletions benchmark_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class TestRun(Base):
# Artifacts
performance_report_url = Column(String, nullable=True) # Not yet collected
cluster_dump_url = Column(String, nullable=True)
memray_profiles_url = Column(String, nullable=True)


class TPCHRun(Base):
Expand Down
1 change: 1 addition & 0 deletions ci/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ dependencies:
- pystac-client ==0.8.3
- odc-stac ==0.3.10
- adlfs ==2024.7.0
- memray ==1.13.4

########################################################
# PLEASE READ:
Expand Down
64 changes: 64 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pickle
import subprocess
import sys
import tarfile
import threading
import time
import uuid
Expand All @@ -28,6 +29,7 @@
import yaml
from coiled import Cluster
from dask.distributed import Client, WorkerPlugin
from dask.distributed.diagnostics import memray
from dask.distributed.diagnostics.memory_sampler import MemorySampler
from dask.distributed.scheduler import logger as scheduler_logger
from dotenv import load_dotenv
Expand Down Expand Up @@ -68,6 +70,14 @@ def pytest_addoption(parser):
"--benchmark", action="store_true", help="Collect benchmarking data for tests"
)

parser.addoption(
"--memray",
action="store",
default="scheduler",
help="Memray profiles to collect: scheduler or none",
choices=("scheduler", "none"),
)


def pytest_sessionfinish(session, exitstatus):
# https://github.com/pytest-dev/pytest/issues/2393
Expand Down Expand Up @@ -652,6 +662,16 @@ def _(**exta_options):
return _


@pytest.fixture(scope="session")
def s3_memray_profiles(s3):
profiles_url = f"{S3_BUCKET}/memray-profiles"
# Ensure that the memray-profiles directory exists,
# but do NOT remove it as multiple test runs could be
# accessing it at the same time
s3.mkdirs(profiles_url, exist_ok=True)
return profiles_url


@pytest.fixture(scope="session")
def s3_scratch(s3):
# Ensure that the test-scratch directory exists,
Expand All @@ -672,6 +692,13 @@ def s3_url(s3, s3_scratch, test_name_uuid):
s3.rm(url, recursive=True)


@pytest.fixture(scope="function")
def s3_memray_profiles_url(s3, s3_memray_profiles, test_name_uuid):
url = f"{s3_memray_profiles}/{test_name_uuid}"
s3.mkdirs(url, exist_ok=False)
return url


GCS_REGION = "us-central1"
GCS_BUCKET = "gs://coiled-oss-scratch/benchmarks-bot"

Expand Down Expand Up @@ -834,3 +861,40 @@ def _(*args, **kwargs):
@pytest.fixture(params=[0.1, 1])
def memory_multiplier(request):
return request.param


@pytest.fixture
def memray_profile(
pytestconfig,
s3,
s3_memray_profiles_url,
s3_storage_options,
test_run_benchmark,
tmp_path,
):
if not test_run_benchmark:
yield
else:
memray_option = pytestconfig.getoption("--memray")

if memray_option == "none":
yield
elif memray_option != "scheduler":
raise ValueError(f"Unhandled value for --memray: {memray_option}")

@contextlib.contextmanager
def _memray_profile(client):
profiles_path = tmp_path / "profiles"
profiles_path.mkdir()
try:
with memray.memray_scheduler(directory=profiles_path):
yield
finally:
archive = tmp_path / "memray.tar.gz"
with tarfile.open(archive, mode="w:gz") as tar:
for item in profiles_path.iterdir():
tar.add(item, arcname=item.name)
test_run_benchmark.memray_profiles_url = s3_memray_profiles_url
s3.put(archive, s3_memray_profiles_url)

yield _memray_profile
4 changes: 2 additions & 2 deletions tests/geospatial/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def cluster_name(request, scale):


@pytest.fixture()
def client_factory(cluster_name, github_cluster_tags, benchmark_all):
def client_factory(cluster_name, github_cluster_tags, benchmark_all, memray_profile):
import contextlib

@contextlib.contextmanager
Expand All @@ -41,7 +41,7 @@ def _(n_workers, env=None, **cluster_kwargs):
if env:
cluster.send_private_envs(env=env)
with cluster.get_client() as client:
with benchmark_all(client):
with memray_profile(client), benchmark_all(client):
yield client

return _

0 comments on commit e35661b

Please sign in to comment.