Skip to content

Commit

Permalink
checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
ankona committed Apr 17, 2024
1 parent 5ef4af5 commit 472715c
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 12 deletions.
70 changes: 58 additions & 12 deletions smartsim/_core/entrypoints/dragon.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import dataclasses
import json
import os
import signal
Expand All @@ -37,6 +38,7 @@
import zmq
import zmq.auth.thread

from smartsim._core.config import get_config
from smartsim._core.launcher.dragon import dragonSockets
from smartsim._core.launcher.dragon.dragonBackend import DragonBackend
from smartsim._core.schemas import DragonBootstrapRequest, DragonBootstrapResponse
Expand All @@ -51,6 +53,12 @@
SHUTDOWN_INITIATED = False


@dataclasses.dataclass
class DragonEntrypointArgs:
launching_address: str
interface: str


def handle_signal(signo: int, _frame: t.Optional[FrameType]) -> None:
if not signo:
logger.info("Received signal with no signo")
Expand All @@ -64,10 +72,16 @@ def handle_signal(signo: int, _frame: t.Optional[FrameType]) -> None:
"""


def get_log_path() -> str:
config = get_config()
return config.dragon_log_filename


def print_summary(network_interface: str, ip_address: str) -> None:
zmq_config = {"interface": network_interface, "address": ip_address}

with open("dragon_config.log", "w", encoding="utf-8") as dragon_config_log:
log_path = get_log_path()
with open(log_path, "w", encoding="utf-8") as dragon_config_log:
dragon_config_log.write(
textwrap.dedent(f"""\
-------- Dragon Configuration --------
Expand Down Expand Up @@ -128,7 +142,7 @@ def run(
break


def main(args: argparse.Namespace) -> int:
def execute_entrypoint(args: DragonEntrypointArgs) -> int:
if_config = get_best_interface_and_address()
interface = if_config.interface
address = if_config.address
Expand Down Expand Up @@ -186,16 +200,31 @@ def main(args: argparse.Namespace) -> int:
return 0


def remove_config_log() -> None:
"""Remove the Dragon `config_log` file from the file system. Used to
clean up after a dragon environment is shutdown to eliminate an
unnecessary attempt to connect to a stopped ZMQ server."""
log_path = get_log_path()
if os.path.exists(log_path):
os.remove(log_path)


def cleanup() -> None:
global SHUTDOWN_INITIATED # pylint: disable=global-statement
logger.debug("Cleaning up")
remove_config_log()
SHUTDOWN_INITIATED = True


if __name__ == "__main__":
os.environ["PYTHONUNBUFFERED"] = "1"
logger.info("Dragon server started")
def register_signal_handlers():
# make sure to register the cleanup before the start
# the process so our signaller will be able to stop
# the database process.
for sig in SIGNALS:
signal.signal(sig, handle_signal)


def parse_arguments(args: t.List[str]) -> DragonEntrypointArgs:
parser = argparse.ArgumentParser(
prefix_chars="+", description="SmartSim Dragon Head Process"
)
Expand All @@ -208,12 +237,29 @@ def cleanup() -> None:
parser.add_argument(
"+interface", type=str, help="Network Interface name", required=False
)
args_ = parser.parse_args()
args_ = parser.parse_args(args)

return DragonEntrypointArgs(args_.launching_address, args_.interface)

# make sure to register the cleanup before the start
# the process so our signaller will be able to stop
# the database process.
for sig in SIGNALS:
signal.signal(sig, handle_signal)

sys.exit(main(args_))
def main(args_: t.List[str]):
"""Execute the dragon entrypoint as a module"""
os.environ["PYTHONUNBUFFERED"] = "1"
logger.info("Dragon server started")

args = parse_arguments(args_)
register_signal_handlers()

try:
return_code = execute_entrypoint(args)
return return_code
except:
logger.error("An unexpected error occurred in the Dragon entrypoint.", exc_info=True)
finally:
cleanup()

return -1


if __name__ == "__main__":
sys.exit(main(sys.argv))
131 changes: 131 additions & 0 deletions tests/on_wlm/test_dragon_entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# BSD 2-Clause License
#
# Copyright (c) 2021-2023, Hewlett Packard Enterprise
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import pathlib
import pytest
import typing as t


from smartsim._core.entrypoints.dragon import (
cleanup,
main,
parse_arguments,
register_signal_handlers,
remove_config_log,
)


@pytest.fixture
def mock_argv() -> t.List[str]:
"""Fixture for returning valid arguments to the entrypoint"""
return ["+launching_address", "mock-addr", "+interface", "mock-interface"]


def test_file_removal(test_dir: str, monkeypatch: pytest.MonkeyPatch):
"""Verify that the log file is removed when expected"""
mock_file_name = "mocked_file_name.txt"
expected_path = pathlib.Path(test_dir) / mock_file_name
expected_path.touch()

with monkeypatch.context() as ctx:
# ensure we get outputs in the test directory
ctx.setattr(
"smartsim._core.entrypoints.dragon.get_log_path", lambda: str(expected_path)
)

remove_config_log()
assert not expected_path.exists(), "Dragon config file was not removed"


def test_file_removal_on_bad_path(test_dir: str, monkeypatch: pytest.MonkeyPatch):
"""Verify that file removal doesn't blow up if the log file wasn't created"""
mock_file_name = "mocked_file_name.txt"
expected_path = pathlib.Path(test_dir) / mock_file_name

with monkeypatch.context() as ctx:
# ensure we get outputs in the test directory
ctx.setattr(
"smartsim._core.entrypoints.dragon.get_log_path", lambda: str(expected_path)
)

# confirm the file doesn't exist...
assert not expected_path.exists(), "Dragon config file was not found"

try:
# ensure we don't blow up
remove_config_log()
except:
assert False


def test_dragon_failure(mock_argv: t.List[str], test_dir: str, monkeypatch: pytest.MonkeyPatch):
"""Verify that the expected cleanup actions are taken when the dragon
entrypoint exits"""
mock_file_name = "mocked_file_name.txt"
expected_path = pathlib.Path(test_dir) / mock_file_name
expected_path.touch()

with monkeypatch.context() as ctx:
# ensure we get outputs in the test directory
ctx.setattr(
"smartsim._core.entrypoints.dragon.get_log_path", lambda: str(expected_path)
)

def raiser(args_) -> int:
raise Exception("Something bad...")

# we don't need to execute the entrypoint...
ctx.setattr("smartsim._core.entrypoints.dragon.execute_entrypoint", raiser)

return_code = main(mock_argv)

# ensure our exception error code is returned
assert return_code == -1


def test_dragon_main(mock_argv: t.List[str], test_dir: str, monkeypatch: pytest.MonkeyPatch):
"""Verify that the expected startup & cleanup actions are taken when the dragon
entrypoint exits"""
mock_file_name = "mocked_file_name.txt"
expected_path = pathlib.Path(test_dir) / mock_file_name
expected_path.touch()

with monkeypatch.context() as ctx:
# ensure we get outputs in the test directory
ctx.setattr(
"smartsim._core.entrypoints.dragon.get_log_path", lambda: str(expected_path)
)
# we don't need to execute the actual entrypoint...
ctx.setattr("smartsim._core.entrypoints.dragon.execute_entrypoint", lambda args_: 0)

return_code = main(mock_argv)

# execute_entrypoint should return 0 from our mock
assert return_code == 0
# the cleanup should remove our config file
assert not expected_path.exists(), "Dragon config file was not removed!"
# the environment should be set as expected
assert os.environ.get("PYTHONUNBUFFERED", None) == "1"

0 comments on commit 472715c

Please sign in to comment.