load_internlm_model: change config to internlm2

InternLM · Jan 24, 2024 · bb3a711 · bb3a711
1 parent 51ef2d0
commit bb3a711
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 27 deletions.
diff --git a/tools/README.md b/tools/README.md
@@ -15,6 +15,14 @@
 └── tokenizer.py # 将原始数据转换成bin和meta文件的工具
 ```
 
+# load_internlm_model.py
+
+改文件用于加载原生训练的 InternLM 模型和 InternLM2 模型并进行推理示例。 `initialize_internlm_model` 函数的 `ckpt_dir` 参数为模型路径，`model_type` 为模型的类型（目前支持 `INTERNLM` `INTERNLM2` `INTERNLM_MoE` 和 `LLAMA2`），`model_config` 为模型配置。
+
+```bash
+torchrun --nproc_per_node=1 tools/load_internlm_model.py
+```
+
 # tokenizer.py
 
 生成原始数据的`bin`和`meta`文件需要使用`tokenizer`，我们通过在`tools/tokenizer.py`中指定模型参数路径的方式来导入tokenizer模型。目前我们提供了`tokenizer_internlm.model`来生成tokens。若想使用不同的模型，可直接修改`tokernizer.py`中的模型参数路径。
@@ -66,6 +74,8 @@ $ python tools/tokenizer.py --text_input_path raw_data.txt --bin_output_path cn/
 
 `json`和`jsonl`类型的文件的`bin`和`meta`文件格式和`txt`一致，此处不再赘叙。
 
+# alpaca_tokenizer.py
+
 # pal_inference.py
 
 在 [GSM8K](https://huggingface.co/datasets/gsm8k) 数据集上使用 [PAL](https://github.com/reasoning-machines/pal) 范式推理，使模型编写代码并通过 Python 解释器执行来解决数学问题。其用法如下：

diff --git a/tools/load_internlm_model.py b/tools/load_internlm_model.py
@@ -2,18 +2,33 @@
 import logging
 import os
 import re
+import sys
 from typing import Callable, Dict, List, Optional, Union
 
 import torch
 
-from internlm.apis.inference import SequenceGenerator
-from internlm.core.context import ParallelMode
-from internlm.core.context import global_context as gpc
-from internlm.initialize.launch import launch_from_torch
-from internlm.train import initialize_model
-from internlm.utils.registry import MODEL_INITIALIZER
-from internlm.utils.storage_manager import get_fns, init_storage_manager, llm_load
-from tools.interface import GenerationConfig
+sys.path.append(os.getcwd())
+
+from internlm.apis.inference import (  # noqa: E402 # pylint: disable=C0413
+    SequenceGenerator,
+)
+from internlm.core.context import ParallelMode  # noqa: E402 # pylint: disable=C0413
+from internlm.core.context import (  # noqa: E402 # pylint: disable=C0413
+    global_context as gpc,
+)
+from internlm.initialize.launch import (  # noqa: E402 # pylint: disable=C0413
+    launch_from_torch,
+)
+from internlm.train import initialize_model  # noqa: E402 # pylint: disable=C0413
+from internlm.utils.registry import (  # noqa: E402 # pylint: disable=C0413
+    MODEL_INITIALIZER,
+)
+from internlm.utils.storage_manager import (  # noqa: E402 # pylint: disable=C0413
+    get_fns,
+    init_storage_manager,
+    llm_load,
+)
+from tools.interface import GenerationConfig  # noqa: E402 # pylint: disable=C0413
 
 logger = logging.getLogger(__file__)
 logging.basicConfig(level=logging.INFO)
@@ -134,11 +149,11 @@ def initialize_internlm_model(
     """Initialize internlm model.
 
     Args:
-        model_type (str): The types of models supported by internlm framework, such as "INTERNLM".
+        model_type (str): The types of models supported by internlm framework, such as "INTERNLM", "INTERNLM2.
         ckpt_dir (str): Directory where model checkpoints are stored. Its format needs to be like this:
             (a) local path, such as: "local:{your local path}";
             (b) boto3 path, such as: "boto3:s3://{bucket name}.{ip}/{your ceph path}".
-        model_config (Optional[Union[Dict, str]], optional): Configuration of models. Defaults to None.
+        model_config (Optional[Union[Dict, str]], optional): Configuration of models.
         del_model_prefix (bool, optional):  Whether to remove the "model." string in the key in state_dict.
             Defaults to False.
         param_dtype (torch.dtype, optional): The dtype of the model at inference time. This value can be a string.
@@ -256,42 +271,42 @@ def internlm_interactive_generation(
     >>> torchrun --master_port 12331 --nnodes=1 --node_rank=0 --nproc_per_node=1 tools/load_internlm_model.py
     """
     model = initialize_internlm_model(
-        model_type="INTERNLM",
+        model_type="INTERNLM2",
         ckpt_dir="[Please replace this with the directory where the internlm model weights are stored]",
+        # config for 7B INTERNLM2
         model_config=dict(
-            checkpoint=False,
+            vocab_size=92544,
+            mlp_ratio=3.5,
             num_attention_heads=32,
-            embed_split_hidden=True,
-            vocab_size=103168,
-            embed_grad_scale=1,
-            parallel_output=False,
             hidden_size=4096,
             num_layers=32,
-            mlp_ratio=8 / 3,
-            apply_post_layer_norm=False,
-            dtype="torch.bfloat16",
-            norm_type="rmsnorm",
-            layer_norm_epsilon=1e-5,
-            use_flash_attn=True,
+            num_kv_attention_heads=8,
+            adapt_hf=False,
+            rope_base=1000000,
+            norm_head=False,
+            checkpoint=1,
+            embed_split_hidden=True,
             num_chunks=1,
-            use_dynamic_ntk_rope=True,
+            no_bias=True,
         ),
         del_model_prefix=True,
     )
 
     from sentencepiece import SentencePieceProcessor
 
-    prompt = """<|User|>:{query}<eoh>\n<|Bot|>:"""
-    prompt = prompt.replace("{query}", "hello")
-    tokenizer = SentencePieceProcessor("tools/tokenizer_internlm.model")  # pylint: disable=E1121
+    query = "hello"
+    # [UNUSED_TOKEN_146] -> <|im_start|>：92543
+    # [UNUSED_TOKEN_145] -> <|im_end|>：92542
+    prompt = f"""[UNUSED_TOEKN_146]user\n{query}[UNUSED_TOEKN_145]\n[UNUSED_TOEKN_146]assistant\n"""
+    tokenizer = SentencePieceProcessor("tools/tokenizer_internlm2.model")  # pylint: disable=E1121
 
     generation_config = GenerationConfig()
     output_generator = internlm_interactive_generation(
         model=model,
         tokenizer=tokenizer,
         prompt=prompt,
         generation_config=generation_config,
-        additional_eos_token_list=[103028],
+        additional_eos_token_list=[92542],
     )
 
     for text in output_generator: