Merge main to develop (#17)

Signed-off-by: daijun1 <daijun1@eccom.com.cn> Co-authored-by: x54-729 <45304952+x54-729@users.noreply.github.com> Co-authored-by: ytxiong <45058324+yingtongxiong@users.noreply.github.com> Co-authored-by: vansin <msnode@163.com> Co-authored-by: Pryest <54388244+Pryest@users.noreply.github.com> Co-authored-by: Yining Li <liyining0712@gmail.com> Co-authored-by: Lyu Han <lvhan_028@163.com> Co-authored-by: djsaber <60215276+djsaber@users.noreply.github.com> Co-authored-by: daijun1 <daijun1@eccom.com.cn> Co-authored-by: Sun Peng <sunpengsdu@gmail.com> Co-authored-by: Yang Gao <Gary1546308416AL@gmail.com>
InternLM · Jan 18, 2024 · cba90e6 · cba90e6
1 parent 02c2d03
commit cba90e6
Show file tree

Hide file tree

Showing 48 changed files with 209 additions and 801 deletions.
diff --git a/.github/workflows/demo_in_readme.yaml b/.github/workflows/demo_in_readme.yaml
@@ -1,5 +1,5 @@
 name: demo-in-readme
-on: 
+on:
   pull_request:
     branches:
       - "main"
@@ -83,7 +83,7 @@ jobs:
         source activate internlm-env-test
         export PYTHONPATH=$PWD:$PYTHONPATH
         sh ./ci_scripts/train/load_ckpt.sh 7B_load_new_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
-        rsync -av --remove-source-files $GITHUB_WORKSPACE/llm_ckpts ${{env.WORKSPACE_PREFIX}}/ci_clean_bak 
+        rsync -av --remove-source-files $GITHUB_WORKSPACE/llm_ckpts ${{env.WORKSPACE_PREFIX}}/ci_clean_bak
 
     - name: torchrun-train
       run: |

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -50,4 +50,4 @@ repos:
             [
                 '--rcfile=.pylintrc',
                 '--disable=C0114,C0415,W0212,W0235,W0238,W0621,C0103,R1735,C2801,E0402,C0412,W0719,R1728,W1514,W0718,W0105,W0707,C0209,W0703,W1203'
-            ]
+            ]
diff --git a/.pylintrc b/.pylintrc
@@ -425,4 +425,4 @@ valid-metaclass-classmethod-first-arg=mcs
 # Exceptions that will emit a warning when being caught. Defaults to
 # "Exception"
 overgeneral-exceptions=builtins.BaseException,
-                       builtins.Exception
+                       builtins.Exception
diff --git a/.readthedocs.yml → .readthedocs.yaml b/.readthedocs.yml → .readthedocs.yaml
@@ -3,7 +3,7 @@
 # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 
 # Required
-version: 2
+# version: 2
 
 # Set the OS, Python version and other tools you might need
 build:

diff --git a/README-ja-JP.md b/README-ja-JP.md
diff --git a/README-zh-Hans.md b/README-zh-Hans.md
diff --git a/README.md b/README.md
diff --git a/doc/code-docs/locales/en/LC_MESSAGES/example/20B_demo.po b/doc/code-docs/locales/en/LC_MESSAGES/example/20B_demo.po
@@ -46,4 +46,3 @@ msgstr "Training Results"
 #: ../../source/example/20B_demo.rst:175
 msgid "基于以上训练配置和启动命令，两节点 16GPU 下的模型训练部分日志展示如下："
 msgstr "Taking the configuration of the demo training on two nodes with 16 GPUs on slurm as an example, the training result log is shown below:"
-
diff --git a/doc/code-docs/locales/en/LC_MESSAGES/example/7B_demo.po b/doc/code-docs/locales/en/LC_MESSAGES/example/7B_demo.po
@@ -46,4 +46,3 @@ msgstr "Training Results"
 #: ../../source/example/7B_demo.rst:173 33ec81f34e3c4340beacdb5254069d08
 msgid "基于以上训练配置和启动命令，单节点 8GPU 下的模型训练部分日志展示如下："
 msgstr "Taking the configuration of the demo training on a single machine with 8 GPUs on slurm as an example, the training result log is shown below:"
-
diff --git a/doc/code-docs/locales/en/LC_MESSAGES/initialize.po b/doc/code-docs/locales/en/LC_MESSAGES/initialize.po
@@ -244,4 +244,3 @@ msgid ""
 "A tuple of ``(trainer, train_dataloader, test_dataloader, lr_scheduler)``"
 " where only ``trainer`` could not be None."
 msgstr ""
-
diff --git a/doc/code-docs/locales/en/LC_MESSAGES/install.po b/doc/code-docs/locales/en/LC_MESSAGES/install.po
@@ -136,4 +136,3 @@ msgstr "For the local standard image built with dockerfile or pulled, use the fo
 #: ../../../install.md:87 66613606256e4094a6be5ab2af1269ae
 msgid "容器内默认目录即 `/InternLM`，根据[使用文档](./usage.md)即可启动训练。"
 msgstr "The default directory in the container is `/InternLM`, please start training according to the [Usage](./usage.md)."
-
diff --git a/doc/code-docs/locales/en/LC_MESSAGES/parallel.po b/doc/code-docs/locales/en/LC_MESSAGES/parallel.po
@@ -453,4 +453,3 @@ msgstr ""
 #: internlm.solver.optimizer.hybrid_zero_optim.HybridZeroOptimizer.step:7 of
 msgid "Whether the gradient is success updated, and the gradient."
 msgstr ""
-
diff --git a/doc/code-docs/locales/en/LC_MESSAGES/profiler.po b/doc/code-docs/locales/en/LC_MESSAGES/profiler.po
@@ -171,4 +171,3 @@ msgstr ""
 #: internlm.utils.simple_memory_profiler.SimpleMemoryProfiler.step:1 of
 msgid "Update the memory state of the optimizer state."
 msgstr ""
-
diff --git a/doc/code-docs/locales/en/LC_MESSAGES/qa.po b/doc/code-docs/locales/en/LC_MESSAGES/qa.po
@@ -21,4 +21,3 @@ msgstr ""
 #: ../../source/qa.rst:2 e3b22a39640a40cfb527068a7f4bbfc9
 msgid "问&答"
 msgstr "Q&A"
-
diff --git a/doc/code-docs/locales/en/LC_MESSAGES/training.po b/doc/code-docs/locales/en/LC_MESSAGES/training.po
@@ -158,4 +158,3 @@ msgstr ""
 
 #~ msgid "InternLM训练流程图"
 #~ msgstr "InternLM training process"
-
diff --git a/doc/code-docs/locales/en/LC_MESSAGES/usage.po b/doc/code-docs/locales/en/LC_MESSAGES/usage.po
@@ -385,4 +385,3 @@ msgstr ""
 #~ msgstr ""
 #~ "`load_model_only_folder` and `load_ckpt_folder` "
 #~ "cannot be set at the same time."
-
diff --git a/doc/code-docs/requirements.txt b/doc/code-docs/requirements.txt
@@ -8,4 +8,4 @@ numpy
 torch
 tqdm
 pyecharts
-myst-parser
+myst-parser
diff --git a/doc/code-docs/source/conf.py b/doc/code-docs/source/conf.py
@@ -9,11 +9,9 @@
 import os
 import sys
 
-import torch  # noqa # pylint: disable=unused-import
-
-project = "InternLM"
-copyright = "2023, InternLM Team"
-author = "InternLM Team"
+project = "InternEvo"
+copyright = "2023, InternEvo Team"
+author = "InternEvo Team"
 
 with open("../../../version.txt", "r") as f:
     release = f.readline().rstrip()

diff --git a/doc/code-docs/source/example/7B_demo.rst b/doc/code-docs/source/example/7B_demo.rst
@@ -184,9 +184,9 @@
     2023-09-05 11:47:44,652 INFO parallel_context.py:508 in set_device -- process rank 0 is bound to host:SH-IDC1-10-140-1-110 device: 0
     2023-09-05 11:47:51,006 INFO launch.py:354 in launch -- Distributed environment is initialized, data parallel size: 8, pipeline parallel size: 1, tensor parallel size: 1
     2023-09-05 11:49:09,855 INFO hybrid_zero_optim.py:294 in _partition_param_list -- Number of elements on ranks: [894509056, 944865280, 966909952, 966909952, 966909952, 944865280, 966909952, 670068736], rank:0
-    2023-09-05T11:49:58.225+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=63.283263603947816 step=0 loss=11.641494750976562 tgs (tokens/gpu/second)=1424.93 lr=4.0000000000000003e-07 loss_scale=65536.0 grad_norm={'0_default': 66.51907327507652} micro_num=4 num_consumed_tokens=131072 inf_nan_skip_batches=0 num_samples_in_batch=19 largest_length=2048 largest_batch=6 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=6.87 acc=0.0 perplexity=112181.7188 acc/en=0.0 acc/cn=0.0 acc/code=0.0 tokens/en=120836 tokens/cn=0 tokens/code=0 loss_from_metric=11.6279 loss/en=11.6279 loss/cn=nan loss/code=nan 
-    2023-09-05T11:50:02.553+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=171.92140761933035 step=1 loss=11.546792984008789 tgs (tokens/gpu/second)=3871.11 lr=6.000000000000001e-07 loss_scale=65536.0 grad_norm={'0_default': 64.47430144542088} micro_num=4 num_consumed_tokens=262144 inf_nan_skip_batches=0 num_samples_in_batch=16 largest_length=2048 largest_batch=5 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=4.14 acc=0.0 perplexity=103779.1406 acc/en=0.0 acc/cn=0.0 acc/code=0.0 tokens/en=120572 tokens/cn=0 tokens/code=0 loss_from_metric=11.55 loss/en=11.55 loss/cn=nan loss/code=nan 
-    2023-09-05T11:50:06.504+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=186.0565203348341 step=2 loss=11.106071472167969 tgs (tokens/gpu/second)=4189.39 lr=8.000000000000001e-07 loss_scale=65536.0 grad_norm={'0_default': 62.520055376005146} micro_num=4 num_consumed_tokens=393216 inf_nan_skip_batches=0 num_samples_in_batch=16 largest_length=2048 largest_batch=6 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.82 acc=0.0001 perplexity=71139.6797 acc/en=0.0001 acc/cn=0.0 acc/code=0.0 tokens/en=122032 tokens/cn=0 tokens/code=0 loss_from_metric=11.1724 loss/en=11.1724 loss/cn=nan loss/code=nan 
-    2023-09-05T11:50:10.487+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=185.48897918112567 step=3 loss=10.444510459899902 tgs (tokens/gpu/second)=4176.61 lr=1.0000000000000002e-06 loss_scale=65536.0 grad_norm={'0_default': 57.91057980979166} micro_num=4 num_consumed_tokens=524288 inf_nan_skip_batches=0 num_samples_in_batch=18 largest_length=2048 largest_batch=6 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.83 acc=0.0705 perplexity=39851.1289 acc/en=0.0705 acc/cn=0.0 acc/code=0.0 tokens/en=121125 tokens/cn=0 tokens/code=0 loss_from_metric=10.5929 loss/en=10.5929 loss/cn=nan loss/code=nan 
-    2023-09-05T11:50:14.476+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=185.8751803758398 step=4 loss=9.798665046691895 tgs (tokens/gpu/second)=4185.31 lr=1.2000000000000002e-06 loss_scale=65536.0 grad_norm={'0_default': 48.1136933755285} micro_num=4 num_consumed_tokens=655360 inf_nan_skip_batches=0 num_samples_in_batch=14 largest_length=2048 largest_batch=4 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.82 acc=0.076 perplexity=18045.6699 acc/en=0.076 acc/cn=0.0 acc/code=0.0 tokens/en=121365 tokens/cn=0 tokens/code=0 loss_from_metric=9.8007 loss/en=9.8007 loss/cn=nan loss/code=nan 
-    2023-09-05T11:50:18.442+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=185.6236609556878 step=5 loss=9.215429306030273 tgs (tokens/gpu/second)=4179.64 lr=1.4000000000000001e-06 loss_scale=65536.0 grad_norm={'0_default': 36.95489557069029} micro_num=4 num_consumed_tokens=786432 inf_nan_skip_batches=0 num_samples_in_batch=14 largest_length=2048 largest_batch=4 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.82 acc=0.0767 perplexity=8999.0869 acc/en=0.0767 acc/cn=0.0 acc/code=0.0 tokens/en=121223 tokens/cn=0 tokens/code=0 loss_from_metric=9.1049 loss/en=9.1049 loss/cn=nan loss/code=nan 
+    2023-09-05T11:49:58.225+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=63.283263603947816 step=0 loss=11.641494750976562 tgs (tokens/gpu/second)=1424.93 lr=4.0000000000000003e-07 loss_scale=65536.0 grad_norm={'0_default': 66.51907327507652} micro_num=4 num_consumed_tokens=131072 inf_nan_skip_batches=0 num_samples_in_batch=19 largest_length=2048 largest_batch=6 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=6.87 acc=0.0 perplexity=112181.7188 acc/en=0.0 acc/cn=0.0 acc/code=0.0 tokens/en=120836 tokens/cn=0 tokens/code=0 loss_from_metric=11.6279 loss/en=11.6279 loss/cn=nan loss/code=nan
+    2023-09-05T11:50:02.553+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=171.92140761933035 step=1 loss=11.546792984008789 tgs (tokens/gpu/second)=3871.11 lr=6.000000000000001e-07 loss_scale=65536.0 grad_norm={'0_default': 64.47430144542088} micro_num=4 num_consumed_tokens=262144 inf_nan_skip_batches=0 num_samples_in_batch=16 largest_length=2048 largest_batch=5 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=4.14 acc=0.0 perplexity=103779.1406 acc/en=0.0 acc/cn=0.0 acc/code=0.0 tokens/en=120572 tokens/cn=0 tokens/code=0 loss_from_metric=11.55 loss/en=11.55 loss/cn=nan loss/code=nan
+    2023-09-05T11:50:06.504+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=186.0565203348341 step=2 loss=11.106071472167969 tgs (tokens/gpu/second)=4189.39 lr=8.000000000000001e-07 loss_scale=65536.0 grad_norm={'0_default': 62.520055376005146} micro_num=4 num_consumed_tokens=393216 inf_nan_skip_batches=0 num_samples_in_batch=16 largest_length=2048 largest_batch=6 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.82 acc=0.0001 perplexity=71139.6797 acc/en=0.0001 acc/cn=0.0 acc/code=0.0 tokens/en=122032 tokens/cn=0 tokens/code=0 loss_from_metric=11.1724 loss/en=11.1724 loss/cn=nan loss/code=nan
+    2023-09-05T11:50:10.487+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=185.48897918112567 step=3 loss=10.444510459899902 tgs (tokens/gpu/second)=4176.61 lr=1.0000000000000002e-06 loss_scale=65536.0 grad_norm={'0_default': 57.91057980979166} micro_num=4 num_consumed_tokens=524288 inf_nan_skip_batches=0 num_samples_in_batch=18 largest_length=2048 largest_batch=6 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.83 acc=0.0705 perplexity=39851.1289 acc/en=0.0705 acc/cn=0.0 acc/code=0.0 tokens/en=121125 tokens/cn=0 tokens/code=0 loss_from_metric=10.5929 loss/en=10.5929 loss/cn=nan loss/code=nan
+    2023-09-05T11:50:14.476+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=185.8751803758398 step=4 loss=9.798665046691895 tgs (tokens/gpu/second)=4185.31 lr=1.2000000000000002e-06 loss_scale=65536.0 grad_norm={'0_default': 48.1136933755285} micro_num=4 num_consumed_tokens=655360 inf_nan_skip_batches=0 num_samples_in_batch=14 largest_length=2048 largest_batch=4 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.82 acc=0.076 perplexity=18045.6699 acc/en=0.076 acc/cn=0.0 acc/code=0.0 tokens/en=121365 tokens/cn=0 tokens/code=0 loss_from_metric=9.8007 loss/en=9.8007 loss/cn=nan loss/code=nan
+    2023-09-05T11:50:18.442+08:00 INFO [training_internlm.py, line 413, in record_current_batch_training_metrics] - pid=6794 : tflops=185.6236609556878 step=5 loss=9.215429306030273 tgs (tokens/gpu/second)=4179.64 lr=1.4000000000000001e-06 loss_scale=65536.0 grad_norm={'0_default': 36.95489557069029} micro_num=4 num_consumed_tokens=786432 inf_nan_skip_batches=0 num_samples_in_batch=14 largest_length=2048 largest_batch=4 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.82 acc=0.0767 perplexity=8999.0869 acc/en=0.0767 acc/cn=0.0 acc/code=0.0 tokens/en=121223 tokens/cn=0 tokens/code=0 loss_from_metric=9.1049 loss/en=9.1049 loss/cn=nan loss/code=nan
diff --git a/doc/code-docs/source/initialize.rst b/doc/code-docs/source/initialize.rst
@@ -9,7 +9,7 @@ InternLM 的训练流程可以归纳为两个步骤：
     * 初始化Logger、Checkpoint管理器、Monitor管理器、Profiler，对迭代训练的过程观察、预警、记录。
 
 2. 迭代训练
-   
+
     * 根据配置文件定义的张量并行、流水线并行、数据并行的大小，加载训练引擎和调度器进行混合并行训练。
     * 在迭代训练中，调用 Trainer API 进行梯度置零，前向传播计算损失并反向传播，参数更新。
 
@@ -105,4 +105,4 @@ InternLM 在配置文件中使用字段 ``model_type`` 和 ``model`` 来控制
 Trainer 初始化
 -------------------------
 
-.. autofunction:: internlm.initialize.initialize_trainer
+.. autofunction:: internlm.initialize.initialize_trainer
diff --git a/doc/code-docs/source/install.md b/doc/code-docs/source/install.md
@@ -1,2 +1,2 @@
 ```{include} ../../install.md
-```
+```
diff --git a/doc/code-docs/source/qa.rst b/doc/code-docs/source/qa.rst
@@ -1,2 +1,2 @@
 问&答
-=====
+=====
diff --git a/doc/code-docs/source/training.rst b/doc/code-docs/source/training.rst
@@ -6,4 +6,4 @@ InternLM 的训练 API 由 ``internlm.core.trainer.Trainer`` 管理。在定义
 有关详细用法，请参阅 Trainer API 文档和示例。
 
 .. autoclass:: internlm.core.trainer.Trainer
-    :members:
+    :members:
diff --git a/doc/code-docs/source/usage.md b/doc/code-docs/source/usage.md
@@ -1,4 +1,4 @@
 ```{include} ../../usage.md
 :relative-docs: docs/
 :relative-images:
-```
+```
diff --git a/doc/en/install.md b/doc/en/install.md
@@ -25,18 +25,18 @@ export CXX=${GCC_HOME}/bin/c++
 ```
 
 ### Environment Installation
-Clone the project `internlm` and its dependent submodules from the github repository, as follows:
+Clone the project `InternEvo` and its dependent submodules from the github repository, as follows:
 ```bash
-git clone git@github.com:InternLM/InternLM.git --recurse-submodules
+git clone git@github.com:InternLM/InternEvo.git --recurse-submodules
 ```
 
 It is recommended to build a Python-3.10 virtual environment using conda and install the required dependencies based on the `requirements/` files:
 ```bash
-conda create --name internlm-env python=3.10 -y
-conda activate internlm-env
-cd internlm
-pip install -r requirements/torch.txt 
-pip install -r requirements/runtime.txt 
+conda create --name internevo python=3.10 -y
+conda activate internevo
+cd InternEvo
+pip install -r requirements/torch.txt
+pip install -r requirements/runtime.txt
 ```
 
 Install flash-attention (version v1.0.5):
@@ -62,10 +62,10 @@ cd ../../
 Users can use the provided dockerfile combined with docker.Makefile to build their own images, or obtain images with InternLM runtime environment installed from https://hub.docker.com/r/internlm/internlm.
 
 #### Image Configuration and Build
-The configuration and build of the Dockerfile are implemented through the docker.Makefile. To build the image, execute the following command in the root directory of InternLM:
+The configuration and build of the Dockerfile are implemented through the docker.Makefile. To build the image, execute the following command in the root directory of InternEvo:
 ``` bash
 make -f docker.Makefile BASE_OS=centos7
-``` 
+```
 In docker.Makefile, you can customize the basic image, environment version, etc., and the corresponding parameters can be passed directly through the command line. For BASE_OS, ubuntu20.04 and centos7 are respectively supported.
 
 #### Pull Standard Image
@@ -83,4 +83,4 @@ For the local standard image built with dockerfile or pulled, use the following
 ```bash
 docker run --gpus all -it -m 500g --cap-add=SYS_PTRACE --cap-add=IPC_LOCK --shm-size 20g --network=host --name myinternlm internlm/internlm:torch1.13.1-cuda11.7.1-flashatten1.0.5-centos7 bash
 ```
-The default directory in the container is `/InternLM`, please start training according to the [Usage](./usage.md).
+The default directory in the container is `/InternEvo`, please start training according to the [Usage](./usage.md).
-Original file line number
+Diff line change
@@ Expand Up / @@ -50,4 +50,4 @@ repos: @@
                 [
                     '--rcfile=.pylintrc',
                     '--disable=C0114,C0415,W0212,W0235,W0238,W0621,C0103,R1735,C2801,E0402,C0412,W0719,R1728,W1514,W0718,W0105,W0707,C0209,W0703,W1203'
-                ]
+                ]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -46,4 +46,3 @@ msgstr "Training Results"
		#: ../../source/example/20B_demo.rst:175
		msgid "基于以上训练配置和启动命令，两节点 16GPU 下的模型训练部分日志展示如下："
		msgstr "Taking the configuration of the demo training on two nodes with 16 GPUs on slurm as an example, the training result log is shown below:"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -46,4 +46,3 @@ msgstr "Training Results"
		#: ../../source/example/7B_demo.rst:173 33ec81f34e3c4340beacdb5254069d08
		msgid "基于以上训练配置和启动命令，单节点 8GPU 下的模型训练部分日志展示如下："
		msgstr "Taking the configuration of the demo training on a single machine with 8 GPUs on slurm as an example, the training result log is shown below:"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -244,4 +244,3 @@ msgid ""
		"A tuple of ``(trainer, train_dataloader, test_dataloader, lr_scheduler)``"
		" where only ``trainer`` could not be None."
		msgstr ""
Original file line number	Diff line number	Diff line change
Expand Up		@@ -136,4 +136,3 @@ msgstr "For the local standard image built with dockerfile or pulled, use the fo
		#: ../../../install.md:87 66613606256e4094a6be5ab2af1269ae
		msgid "容器内默认目录即 `/InternLM`，根据[使用文档](./usage.md)即可启动训练。"
		msgstr "The default directory in the container is `/InternLM`, please start training according to the [Usage](./usage.md)."
Original file line number	Diff line number	Diff line change
Expand Up		@@ -453,4 +453,3 @@ msgstr ""
		#: internlm.solver.optimizer.hybrid_zero_optim.HybridZeroOptimizer.step:7 of
		msgid "Whether the gradient is success updated, and the gradient."
		msgstr ""
Original file line number	Diff line number	Diff line change
Expand Up		@@ -171,4 +171,3 @@ msgstr ""
		#: internlm.utils.simple_memory_profiler.SimpleMemoryProfiler.step:1 of
		msgid "Update the memory state of the optimizer state."
		msgstr ""
Original file line number	Diff line number	Diff line change
Expand Up		@@ -21,4 +21,3 @@ msgstr ""
		#: ../../source/qa.rst:2 e3b22a39640a40cfb527068a7f4bbfc9
		msgid "问&答"
		msgstr "Q&A"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -158,4 +158,3 @@ msgstr ""

		#~ msgid "InternLM训练流程图"
		#~ msgstr "InternLM training process"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -385,4 +385,3 @@ msgstr ""
		#~ msgstr ""
		#~ "`load_model_only_folder` and `load_ckpt_folder` "
		#~ "cannot be set at the same time."