-
Notifications
You must be signed in to change notification settings - Fork 128
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Cherry pick fixes and features (#164)
* Support ms swift * fix * Upgrade cuda version * fix branch mapping error * fix resource display issue * use graceful deletion * bug fix --------- Co-authored-by: James <xzgan@opencsg.com>
- Loading branch information
Showing
26 changed files
with
1,316 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
446 changes: 446 additions & 0 deletions
446
builder/store/database/migrations/20241018113252_init_swift_runtime_framework.down.sql
Large diffs are not rendered by default.
Oops, something went wrong.
446 changes: 446 additions & 0 deletions
446
builder/store/database/migrations/20241018113252_init_swift_runtime_framework.up.sql
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# pull from devel image instead of base | ||
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 | ||
# Set bash as the default shell | ||
ENV SHELL=/bin/bash \ | ||
JUPYTERHUB_SERVICE_PREFIX=/proxy/ \ | ||
GRADIO_ROOT_PATH=/proxy/7860/ \ | ||
TZ=Asia/Shanghai \ | ||
NCCL_IB_DISABLE=1 NCCL_P2P_DISABLE=1 \ | ||
HF_HOME=/workspace/.cache \ | ||
DEBIAN_FRONTEND=noninteractive | ||
|
||
# Build with some basic utilities | ||
RUN apt-get update && apt-get install -y \ | ||
python3-pip apt-utils \ | ||
wget curl vim \ | ||
git git-lfs \ | ||
supervisor \ | ||
unzip | ||
# set timezone | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
RUN apt-get install -y tzdata \ | ||
&& ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \ | ||
&& echo $TZ > /etc/timezone \ | ||
&& dpkg-reconfigure -f noninteractive tzdata | ||
|
||
# alias python='python3' | ||
RUN ln -s /usr/bin/python3 /usr/bin/python | ||
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple | ||
|
||
# Install the appropriate torch version | ||
#RUN pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 | ||
RUN pip install --no-cache-dir jupyterlab numpy==1.26.4 \ | ||
torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 \ | ||
jupyter-server-proxy==4.4.0 deepspeed \ | ||
gradio-client==1.4.0 | ||
# Create a working directory | ||
WORKDIR /etc/csghub | ||
#RUN git clone https://github.com/modelscope/ms-swift.git --branch v2.5.0 --single-branch | ||
RUN git clone https://gitee.com/xzgan/ms-swift.git --branch v2.5.0 --single-branch | ||
RUN cd ms-swift && pip install --no-cache-dir -e ".[llm]" | ||
#because this library is update frequently, we use new line | ||
RUN pip install --no-cache-dir vllm==v0.6.3.post1 transformers==4.45.2 timm==1.0.11 evalscope==0.5.5 | ||
# setup supervisord | ||
RUN mkdir -p /var/log/supervisord | ||
COPY swift/supervisord.conf /etc/supervisor/conf.d/supervisord.conf | ||
COPY swift/jupyter_notebook_config.py /root/.jupyter/jupyter_notebook_config.py | ||
COPY swift/ /etc/csghub/ | ||
RUN chmod +x /etc/csghub/*.sh | ||
#use dark mode | ||
RUN mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension && \ | ||
echo '{"theme":"JupyterLab Dark"}' > /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings && \ | ||
mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension && \ | ||
echo '{"codeCellConfig":{"lineNumbers":true }}' > /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension/tracker.jupyterlab-settings | ||
#fix gradio proxy issue | ||
RUN pip uninstall -y gradio && pip install https://opencsg-public-resource.oss-cn-beijing.aliyuncs.com/csghub/gradio/gradio-5.1.0-py3-none-any.whl | ||
|
||
# Create a working directory | ||
WORKDIR /workspace/ | ||
ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] | ||
EXPOSE 8000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import os | ||
|
||
c.ServerApp.ip = '0.0.0.0' | ||
c.ServerApp.token = "" | ||
c.ServerApp.open_browser = False | ||
c.ServerApp.allow_root = True | ||
c.ServerApp.port_retries = 0 | ||
c.ServerApp.quit_button = False | ||
c.ServerApp.allow_remote_access = True | ||
c.ServerApp.disable_check_xsrf = True | ||
c.ServerApp.allow_origin = '*' | ||
c.ServerApp.trust_xheaders = True | ||
c.ServerApp.open_browser = False | ||
c.ServerApp.answer_yes = True | ||
c.ServerApp.tornado_settings = { | ||
"headers": { | ||
"Content-Security-Policy": "frame-ancestors \'self\' *" | ||
} | ||
} | ||
|
||
# c.ServerApp.base_url = context_path | ||
|
||
# opt-in the async version to file handler and checkpoints | ||
c.ServerApp.checkpoints_class = "jupyter_server.services.contents.checkpoints.AsyncCheckpoints" | ||
|
||
# Do not delete files to trash: https://github.com/jupyter/notebook/issues/3130 | ||
c.FileContentsManager.delete_to_trash = False | ||
|
||
c.ContentsManager.allow_hidden = True | ||
|
||
# improve the performance of autocompletion, disable Jedi in IPython (the LSP servers for Python use Jedi too) | ||
c.Completer.use_jedi = False | ||
|
||
# https://forums.fast.ai/t/jupyter-notebook-enhancements-tips-and-tricks/17064/22 | ||
c.NotebookApp.iopub_msg_rate_limit = 100000000 | ||
c.NotebookApp.iopub_data_rate_limit = 2147483647 | ||
|
||
# inject proxy js (it is hack) | ||
|
||
# c.ServerProxy['non_service_rewrite_response'] = [proxy_local_server] | ||
c.FileContentsManager.always_delete_dir = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/bin/bash | ||
|
||
while true; do | ||
if test -f "/sys/fs/cgroup/cpu.max"; then | ||
max_memory=$(cat /sys/fs/cgroup/memory.max) | ||
current_memory=$(cat /sys/fs/cgroup/memory.current) | ||
fi | ||
|
||
if test -f "/sys/fs/cgroup/memory/memory.limit_in_bytes"; then | ||
max_memory=$(cat /sys/fs/cgroup/memory/memory.limit_in_bytes) | ||
MEMORY_STAT_PATH="/sys/fs/cgroup/memory/memory.stat" | ||
current_memory=$(awk '$1 == "rss" {print $2}' $MEMORY_STAT_PATH) | ||
fi | ||
|
||
if [ "${max_memory}" == "max" ]; then | ||
sleep 86400 | ||
continue | ||
fi | ||
# reserve 200M | ||
threshold=209715200 | ||
less_max_memory=$((max_memory - threshold)) | ||
if [ "$current_memory" -gt "$less_max_memory" ]; then | ||
# Get the PID of the process with the highest memory usage | ||
pid=$(ps -eo pid,%mem --sort=-%mem | awk 'NR==2 {print $1}') | ||
|
||
# Kill the process | ||
kill "$pid" | ||
echo "Process with PID $pid killed due to memory exceeding the limit." | ||
fi | ||
|
||
sleep 10 | ||
done |
Oops, something went wrong.