-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstart_worker.py
167 lines (142 loc) · 5.98 KB
/
start_worker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env python3
"""
This is the bridge, which connects the horde with the ML processing.
This version checks if the aphrodite engine (an OpenAI-compatible model server)
is running on port 2242. If it's not running, it automatically launches the engine
inside a Docker container using the model specified in bridgeData.yaml.
After the engine starts, it pauses so you can review the logs before proceeding.
"""
# isort: off
import threading
import time
import socket
import subprocess
import sys
import shutil
import yaml
import os
from worker.argparser.scribe import args
from worker.utils.set_envs import set_worker_env_vars_from_config
set_worker_env_vars_from_config() # Get necessary environment variables
from worker.bridge_data.scribe import KoboldAIBridgeData # noqa: E402
from worker.logger import logger, quiesce_logger, set_logger_verbosity # noqa: E402
from worker.workers.scribe import ScribeWorker # noqa: E402
# isort: on
def is_engine_running(host='localhost', port=2242, timeout=5):
"""
Check if the aphrodite engine is running by attempting to connect to the given host and port.
"""
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(timeout)
try:
s.connect((host, port))
s.close()
return True
except Exception:
return False
def tail_docker_logs(container_id):
"""Tail the logs of a docker container and print them to stdout."""
try:
log_proc = subprocess.Popen(
["docker", "logs", "-f", container_id],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
)
for line in log_proc.stdout:
print("[DOCKER LOG]", line.rstrip())
except Exception as e:
print("Error tailing docker logs:", e)
def start_aphrodite_engine(model_name, gpu_count=1, old_nvidia_compute=False, download_dir="./models"):
"""
Launch the aphrodite engine as a Docker container with the specified model.
Args:
model_name (str): The model to load.
gpu_count (int): Number of GPUs to allocate to the container.
old_nvidia_compute (bool): Whether to use old NVIDIA compute settings.
download_dir (str): Directory to download models to and mount into Docker.
"""
if shutil.which("docker") is None:
print("Error: The 'docker' command is not found in your PATH. Please install Docker.")
sys.exit(1)
# Ensure the download directory exists
if not os.path.exists(download_dir):
os.makedirs(download_dir)
# Set the --gpus parameter based on gpu_count
gpus_param = f"count={gpu_count}" if gpu_count > 0 else "all"
# Build the Docker command with dynamic tensor-parallel-size
command = [
"docker", "run", "--rm", "-d",
"--gpus", gpus_param,
"-p", "2242:2242",
"--ipc=host",
"-v", f"{download_dir}:/app/models",
"alpindale/aphrodite-openai:latest",
"--model", model_name,
"--tensor-parallel-size", str(gpu_count),
"--download-dir", "/app/models",
]
if old_nvidia_compute:
command.append("--dtype")
command.append("half")
command.append("--launch-kobold-api")
print("Starting aphrodite engine with command:", " ".join(command))
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode != 0:
print("Failed to launch docker container for aphrodite engine:")
print(result.stderr)
sys.exit(1)
container_id = result.stdout.strip()
print("Started aphrodite docker container with ID:", container_id)
# Start a background thread to tail the container's logs.
log_thread = threading.Thread(target=tail_docker_logs, args=(container_id,), daemon=True)
log_thread.start()
# Wait until the engine is up and listening on port 2242.
retries = 0
while retries < 10:
if is_engine_running():
print("Aphrodite engine is up and running.")
return container_id
time.sleep(2)
retries += 1
print("Failed to start aphrodite engine after container launch. Exiting.")
subprocess.run(["docker", "kill", container_id])
sys.exit(1)
def main():
set_logger_verbosity(args.verbosity)
quiesce_logger(args.quiet)
with open('bridgeData.yaml', 'rt', encoding='utf-8') as configfile:
config = yaml.safe_load(configfile)
bridge_data = KoboldAIBridgeData()
bridge_data.worker_name = config.get('worker_name', 'DefaultWorker')
bridge_data.api_key = config.get('api_key', '')
bridge_data.model_name = config.get('model_name')
if not bridge_data.model_name:
print("No model specified in bridgeData.yaml. Exiting.")
sys.exit(1)
# Set the URLs from the YAML (using external URLs if preferred).
bridge_data.horde_url = config.get('horde_url', bridge_data.horde_url)
bridge_data.kai_url = config.get('kai_url', bridge_data.horde_url)
old_nvidia_compute = config.get('old_nvidia_compute', False)
download_dir = config.get('download_dir', './models')
gpu_count = config.get('gpu_count', 1) # Default to 1 if not specified
if not is_engine_running():
print("Aphrodite engine is not running. Launching via Docker...")
engine_container = start_aphrodite_engine(
bridge_data.model_name,
gpu_count=gpu_count,
old_nvidia_compute=old_nvidia_compute,
download_dir=download_dir
)
print("\nAphrodite engine has been launched! Check the logs above for details.")
input("Press Enter to continue and start the text worker...\n")
else:
print("Aphrodite engine is already running.")
try:
worker = ScribeWorker(bridge_data)
worker.start()
except KeyboardInterrupt:
logger.info("Keyboard Interrupt Received. Ending Process")
logger.init(f"{bridge_data.worker_name} Instance", status="Stopped")
if __name__ == "__main__":
main()