Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add log file cleanup to dragon entrypoint #554

Merged
merged 4 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 69 additions & 15 deletions smartsim/_core/entrypoints/dragon.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import argparse
import dataclasses
import json
import os
import signal
Expand All @@ -37,6 +38,7 @@
import zmq
import zmq.auth.thread

from smartsim._core.config import get_config
from smartsim._core.launcher.dragon import dragonSockets
from smartsim._core.launcher.dragon.dragonBackend import DragonBackend
from smartsim._core.schemas import DragonBootstrapRequest, DragonBootstrapResponse
Expand All @@ -51,7 +53,13 @@
SHUTDOWN_INITIATED = False


def handle_signal(signo: int, _frame: t.Optional[FrameType]) -> None:
@dataclasses.dataclass
class DragonEntrypointArgs:
launching_address: str
interface: str


def handle_signal(signo: int, _frame: t.Optional[FrameType] = None) -> None:
if not signo:
logger.info("Received signal with no signo")
else:
Expand All @@ -64,10 +72,16 @@ def handle_signal(signo: int, _frame: t.Optional[FrameType]) -> None:
"""


def get_log_path() -> str:
config = get_config()
return config.dragon_log_filename


def print_summary(network_interface: str, ip_address: str) -> None:
zmq_config = {"interface": network_interface, "address": ip_address}

with open("dragon_config.log", "w", encoding="utf-8") as dragon_config_log:
log_path = get_log_path()
with open(log_path, "w", encoding="utf-8") as dragon_config_log:
dragon_config_log.write(
textwrap.dedent(f"""\
-------- Dragon Configuration --------
Expand Down Expand Up @@ -128,7 +142,7 @@ def run(
break


def main(args: argparse.Namespace) -> int:
def execute_entrypoint(args: DragonEntrypointArgs) -> int:
if_config = get_best_interface_and_address()
interface = if_config.interface
address = if_config.address
Expand Down Expand Up @@ -186,34 +200,74 @@ def main(args: argparse.Namespace) -> int:
return 0


def remove_config_log() -> None:
"""Remove the Dragon `config_log` file from the file system. Used to
clean up after a dragon environment is shutdown to eliminate an
unnecessary attempt to connect to a stopped ZMQ server."""
log_path = get_log_path()
if os.path.exists(log_path):
os.remove(log_path)


def cleanup() -> None:
global SHUTDOWN_INITIATED # pylint: disable=global-statement
logger.debug("Cleaning up")
remove_config_log()
SHUTDOWN_INITIATED = True


if __name__ == "__main__":
os.environ["PYTHONUNBUFFERED"] = "1"
logger.info("Dragon server started")
def register_signal_handlers() -> None:
# make sure to register the cleanup before the start
# the process so our signaller will be able to stop
# the database process.
for sig in SIGNALS:
signal.signal(sig, handle_signal)


def parse_arguments(args: t.List[str]) -> DragonEntrypointArgs:
parser = argparse.ArgumentParser(
prefix_chars="+", description="SmartSim Dragon Head Process"
)
parser.add_argument(
"+launching_address",
type=str,
help="Address of launching process if a ZMQ connection can be established",
required=False,
required=True,
)
parser.add_argument(
"+interface", type=str, help="Network Interface name", required=False
"+interface",
type=str,
help="Network Interface name",
required=False,
)
args_ = parser.parse_args()
args_ = parser.parse_args(args)

# make sure to register the cleanup before the start
# the process so our signaller will be able to stop
# the database process.
for sig in SIGNALS:
signal.signal(sig, handle_signal)
if not args_.launching_address:
raise ValueError("Empty launching address supplied.")

return DragonEntrypointArgs(args_.launching_address, args_.interface)

sys.exit(main(args_))

def main(args_: t.List[str]) -> int:
"""Execute the dragon entrypoint as a module"""
os.environ["PYTHONUNBUFFERED"] = "1"
logger.info("Dragon server started")

args = parse_arguments(args_)
register_signal_handlers()

try:
return_code = execute_entrypoint(args)
return return_code
except Exception:
logger.error(
"An unexpected error occurred in the Dragon entrypoint.", exc_info=True
)
finally:
cleanup()

return -1


if __name__ == "__main__":
sys.exit(main(sys.argv))
2 changes: 1 addition & 1 deletion tests/on_wlm/test_dragon.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# BSD 2-Clause License
#
# Copyright (c) 2021-2023, Hewlett Packard Enterprise
# Copyright (c) 2021-2024, Hewlett Packard Enterprise
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm so old school.

# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down
Loading
Loading