From bb3a711f8a072b8cb4b692873d2b052f50304f00 Mon Sep 17 00:00:00 2001 From: x54-729 Date: Wed, 24 Jan 2024 19:09:10 +0800 Subject: [PATCH] load_internlm_model: change config to internlm2 --- tools/README.md | 10 ++++++ tools/load_internlm_model.py | 69 ++++++++++++++++++++++-------------- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/tools/README.md b/tools/README.md index 2b47b1f4..af3f30c8 100644 --- a/tools/README.md +++ b/tools/README.md @@ -15,6 +15,14 @@ └── tokenizer.py # 将原始数据转换成bin和meta文件的工具 ``` +# load_internlm_model.py + +改文件用于加载原生训练的 InternLM 模型和 InternLM2 模型并进行推理示例。 `initialize_internlm_model` 函数的 `ckpt_dir` 参数为模型路径,`model_type` 为模型的类型(目前支持 `INTERNLM` `INTERNLM2` `INTERNLM_MoE` 和 `LLAMA2`),`model_config` 为模型配置。 + +```bash +torchrun --nproc_per_node=1 tools/load_internlm_model.py +``` + # tokenizer.py 生成原始数据的`bin`和`meta`文件需要使用`tokenizer`,我们通过在`tools/tokenizer.py`中指定模型参数路径的方式来导入tokenizer模型。目前我们提供了`tokenizer_internlm.model`来生成tokens。若想使用不同的模型,可直接修改`tokernizer.py`中的模型参数路径。 @@ -66,6 +74,8 @@ $ python tools/tokenizer.py --text_input_path raw_data.txt --bin_output_path cn/ `json`和`jsonl`类型的文件的`bin`和`meta`文件格式和`txt`一致,此处不再赘叙。 +# alpaca_tokenizer.py + # pal_inference.py 在 [GSM8K](https://huggingface.co/datasets/gsm8k) 数据集上使用 [PAL](https://github.com/reasoning-machines/pal) 范式推理,使模型编写代码并通过 Python 解释器执行来解决数学问题。其用法如下: diff --git a/tools/load_internlm_model.py b/tools/load_internlm_model.py index 98e6ad53..a483c9a7 100644 --- a/tools/load_internlm_model.py +++ b/tools/load_internlm_model.py @@ -2,18 +2,33 @@ import logging import os import re +import sys from typing import Callable, Dict, List, Optional, Union import torch -from internlm.apis.inference import SequenceGenerator -from internlm.core.context import ParallelMode -from internlm.core.context import global_context as gpc -from internlm.initialize.launch import launch_from_torch -from internlm.train import initialize_model -from internlm.utils.registry import MODEL_INITIALIZER -from internlm.utils.storage_manager import get_fns, init_storage_manager, llm_load -from tools.interface import GenerationConfig +sys.path.append(os.getcwd()) + +from internlm.apis.inference import ( # noqa: E402 # pylint: disable=C0413 + SequenceGenerator, +) +from internlm.core.context import ParallelMode # noqa: E402 # pylint: disable=C0413 +from internlm.core.context import ( # noqa: E402 # pylint: disable=C0413 + global_context as gpc, +) +from internlm.initialize.launch import ( # noqa: E402 # pylint: disable=C0413 + launch_from_torch, +) +from internlm.train import initialize_model # noqa: E402 # pylint: disable=C0413 +from internlm.utils.registry import ( # noqa: E402 # pylint: disable=C0413 + MODEL_INITIALIZER, +) +from internlm.utils.storage_manager import ( # noqa: E402 # pylint: disable=C0413 + get_fns, + init_storage_manager, + llm_load, +) +from tools.interface import GenerationConfig # noqa: E402 # pylint: disable=C0413 logger = logging.getLogger(__file__) logging.basicConfig(level=logging.INFO) @@ -134,11 +149,11 @@ def initialize_internlm_model( """Initialize internlm model. Args: - model_type (str): The types of models supported by internlm framework, such as "INTERNLM". + model_type (str): The types of models supported by internlm framework, such as "INTERNLM", "INTERNLM2. ckpt_dir (str): Directory where model checkpoints are stored. Its format needs to be like this: (a) local path, such as: "local:{your local path}"; (b) boto3 path, such as: "boto3:s3://{bucket name}.{ip}/{your ceph path}". - model_config (Optional[Union[Dict, str]], optional): Configuration of models. Defaults to None. + model_config (Optional[Union[Dict, str]], optional): Configuration of models. del_model_prefix (bool, optional): Whether to remove the "model." string in the key in state_dict. Defaults to False. param_dtype (torch.dtype, optional): The dtype of the model at inference time. This value can be a string. @@ -256,34 +271,34 @@ def internlm_interactive_generation( >>> torchrun --master_port 12331 --nnodes=1 --node_rank=0 --nproc_per_node=1 tools/load_internlm_model.py """ model = initialize_internlm_model( - model_type="INTERNLM", + model_type="INTERNLM2", ckpt_dir="[Please replace this with the directory where the internlm model weights are stored]", + # config for 7B INTERNLM2 model_config=dict( - checkpoint=False, + vocab_size=92544, + mlp_ratio=3.5, num_attention_heads=32, - embed_split_hidden=True, - vocab_size=103168, - embed_grad_scale=1, - parallel_output=False, hidden_size=4096, num_layers=32, - mlp_ratio=8 / 3, - apply_post_layer_norm=False, - dtype="torch.bfloat16", - norm_type="rmsnorm", - layer_norm_epsilon=1e-5, - use_flash_attn=True, + num_kv_attention_heads=8, + adapt_hf=False, + rope_base=1000000, + norm_head=False, + checkpoint=1, + embed_split_hidden=True, num_chunks=1, - use_dynamic_ntk_rope=True, + no_bias=True, ), del_model_prefix=True, ) from sentencepiece import SentencePieceProcessor - prompt = """<|User|>:{query}\n<|Bot|>:""" - prompt = prompt.replace("{query}", "hello") - tokenizer = SentencePieceProcessor("tools/tokenizer_internlm.model") # pylint: disable=E1121 + query = "hello" + # [UNUSED_TOKEN_146] -> <|im_start|>:92543 + # [UNUSED_TOKEN_145] -> <|im_end|>:92542 + prompt = f"""[UNUSED_TOEKN_146]user\n{query}[UNUSED_TOEKN_145]\n[UNUSED_TOEKN_146]assistant\n""" + tokenizer = SentencePieceProcessor("tools/tokenizer_internlm2.model") # pylint: disable=E1121 generation_config = GenerationConfig() output_generator = internlm_interactive_generation( @@ -291,7 +306,7 @@ def internlm_interactive_generation( tokenizer=tokenizer, prompt=prompt, generation_config=generation_config, - additional_eos_token_list=[103028], + additional_eos_token_list=[92542], ) for text in output_generator: