Skip to content

Commit

Permalink
Add log file cleanup to dragon entrypoint (#554)
Browse files Browse the repository at this point in the history
Update the dragon entrypoint to ensure that the log file is removed when
the environment is shutdown.

Additional updates:
- minor refactor to enable testing entrypoint features
- add tests for entrypoint functions
- update incorrect license clause

[ committed by @ankona ]
[ reviewed by @al-rigazzi ]
  • Loading branch information
ankona authored Apr 23, 2024
1 parent 5ef4af5 commit 9fd7fe6
Show file tree
Hide file tree
Showing 3 changed files with 365 additions and 16 deletions.
84 changes: 69 additions & 15 deletions smartsim/_core/entrypoints/dragon.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import dataclasses
import json
import os
import signal
Expand All @@ -37,6 +38,7 @@
import zmq
import zmq.auth.thread

from smartsim._core.config import get_config
from smartsim._core.launcher.dragon import dragonSockets
from smartsim._core.launcher.dragon.dragonBackend import DragonBackend
from smartsim._core.schemas import DragonBootstrapRequest, DragonBootstrapResponse
Expand All @@ -51,7 +53,13 @@
SHUTDOWN_INITIATED = False


def handle_signal(signo: int, _frame: t.Optional[FrameType]) -> None:
@dataclasses.dataclass
class DragonEntrypointArgs:
launching_address: str
interface: str


def handle_signal(signo: int, _frame: t.Optional[FrameType] = None) -> None:
if not signo:
logger.info("Received signal with no signo")
else:
Expand All @@ -64,10 +72,16 @@ def handle_signal(signo: int, _frame: t.Optional[FrameType]) -> None:
"""


def get_log_path() -> str:
config = get_config()
return config.dragon_log_filename


def print_summary(network_interface: str, ip_address: str) -> None:
zmq_config = {"interface": network_interface, "address": ip_address}

with open("dragon_config.log", "w", encoding="utf-8") as dragon_config_log:
log_path = get_log_path()
with open(log_path, "w", encoding="utf-8") as dragon_config_log:
dragon_config_log.write(
textwrap.dedent(f"""\
-------- Dragon Configuration --------
Expand Down Expand Up @@ -128,7 +142,7 @@ def run(
break


def main(args: argparse.Namespace) -> int:
def execute_entrypoint(args: DragonEntrypointArgs) -> int:
if_config = get_best_interface_and_address()
interface = if_config.interface
address = if_config.address
Expand Down Expand Up @@ -186,34 +200,74 @@ def main(args: argparse.Namespace) -> int:
return 0


def remove_config_log() -> None:
"""Remove the Dragon `config_log` file from the file system. Used to
clean up after a dragon environment is shutdown to eliminate an
unnecessary attempt to connect to a stopped ZMQ server."""
log_path = get_log_path()
if os.path.exists(log_path):
os.remove(log_path)


def cleanup() -> None:
global SHUTDOWN_INITIATED # pylint: disable=global-statement
logger.debug("Cleaning up")
remove_config_log()
SHUTDOWN_INITIATED = True


if __name__ == "__main__":
os.environ["PYTHONUNBUFFERED"] = "1"
logger.info("Dragon server started")
def register_signal_handlers() -> None:
# make sure to register the cleanup before the start
# the process so our signaller will be able to stop
# the database process.
for sig in SIGNALS:
signal.signal(sig, handle_signal)


def parse_arguments(args: t.List[str]) -> DragonEntrypointArgs:
parser = argparse.ArgumentParser(
prefix_chars="+", description="SmartSim Dragon Head Process"
)
parser.add_argument(
"+launching_address",
type=str,
help="Address of launching process if a ZMQ connection can be established",
required=False,
required=True,
)
parser.add_argument(
"+interface", type=str, help="Network Interface name", required=False
"+interface",
type=str,
help="Network Interface name",
required=False,
)
args_ = parser.parse_args()
args_ = parser.parse_args(args)

# make sure to register the cleanup before the start
# the process so our signaller will be able to stop
# the database process.
for sig in SIGNALS:
signal.signal(sig, handle_signal)
if not args_.launching_address:
raise ValueError("Empty launching address supplied.")

return DragonEntrypointArgs(args_.launching_address, args_.interface)

sys.exit(main(args_))

def main(args_: t.List[str]) -> int:
"""Execute the dragon entrypoint as a module"""
os.environ["PYTHONUNBUFFERED"] = "1"
logger.info("Dragon server started")

args = parse_arguments(args_)
register_signal_handlers()

try:
return_code = execute_entrypoint(args)
return return_code
except Exception:
logger.error(
"An unexpected error occurred in the Dragon entrypoint.", exc_info=True
)
finally:
cleanup()

return -1


if __name__ == "__main__":
sys.exit(main(sys.argv))
2 changes: 1 addition & 1 deletion tests/on_wlm/test_dragon.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# BSD 2-Clause License
#
# Copyright (c) 2021-2023, Hewlett Packard Enterprise
# Copyright (c) 2021-2024, Hewlett Packard Enterprise
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down
Loading

0 comments on commit 9fd7fe6

Please sign in to comment.