From da064a809badd5086d61fd82accbb16ad93cde94 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Wed, 10 Jan 2024 10:52:42 +1100 Subject: [PATCH] convert.py: Outfile default name change and additional metadata support --- convert.py | 158 ++++++++++++++++++++++++++++++------ gguf-py/gguf/constants.py | 1 + gguf-py/gguf/gguf_writer.py | 6 ++ llama.cpp | 2 + 4 files changed, 143 insertions(+), 24 deletions(-) diff --git a/convert.py b/convert.py index 244eb75822fd89..44ad1206065d60 100755 --- a/convert.py +++ b/convert.py @@ -336,6 +336,39 @@ def load(model_plus: ModelPlus) -> Params: return params +@dataclass +class Metadata: + name: Optional[str] = None + author: Optional[str] = None + version: Optional[str] = None + url: Optional[str] = None + description: Optional[str] = None + licence: Optional[str] = None + source_url: Optional[str] = None + source_hf_repo: Optional[str] = None + + @staticmethod + def load(metadata_path: Path) -> "Metadata": + if metadata_path is None or not metadata_path.exists(): + return Metadata() + + with open(metadata_path, 'r') as file: + data = json.load(file) + + # Create a new Metadata instance + metadata = Metadata() + + # Assigning values to Metadata attributes if they exist in the JSON file + metadata.name = data.get("general.name") + metadata.author = data.get("general.author") + metadata.version = data.get("general.version") + metadata.url = data.get("general.url") + metadata.description = data.get("general.description") + metadata.license = data.get("general.license") + metadata.source_url = data.get("general.source_url") + metadata.source_hf_repo = data.get("general.source_hf_repo") + + return metadata # # vocab @@ -1053,21 +1086,41 @@ class OutputFile: def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE): self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess) - def add_meta_arch(self, params: Params) -> None: + def add_meta_model(self, params: Params, metadata: Metadata) -> None: + # Metadata About The Model And It's Provenence name = "LLaMA" - - # TODO: better logic to determine model name - if params.n_ctx == 4096: - name = "LLaMA v2" + if metadata is not None and metadata.name is not None: + name = metadata.name elif params.path_model is not None: - name = str(params.path_model.parent).split('/')[-1] - - self.gguf.add_name (name) - self.gguf.add_vocab_size (params.n_vocab) - self.gguf.add_context_length (params.n_ctx) - self.gguf.add_embedding_length (params.n_embd) - self.gguf.add_block_count (params.n_layer) - self.gguf.add_feed_forward_length (params.n_ff) + name = str(params.path_model.parent).split("/")[-1] + elif params.n_ctx == 4096: + # Heuristic detection of LLaMA v2 model + name = "LLaMA v2" + + self.gguf.add_name(name) + + if metadata is not None: + if metadata.author is not None: + self.gguf.add_author(metadata.author) + if metadata.version is not None: + self.gguf.add_version(metadata.version) + if metadata.url is not None: + self.gguf.add_url(metadata.url) + if metadata.description is not None: + self.gguf.add_description(metadata.description) + if metadata.licence is not None: + self.gguf.add_licence(metadata.licence) + if metadata.source_url is not None: + self.gguf.add_source_url(metadata.source_url) + if metadata.source_hf_repo is not None: + self.gguf.add_source_hf_repo(metadata.source_hf_repo) + + def add_meta_arch(self, params: Params) -> None: + # Metadata About The Neural Architecture Itself + self.gguf.add_context_length(params.n_ctx) + self.gguf.add_embedding_length(params.n_embd) + self.gguf.add_block_count(params.n_layer) + self.gguf.add_feed_forward_length(params.n_ff) self.gguf.add_rope_dimension_count(params.n_embd // params.n_head) self.gguf.add_head_count (params.n_head) self.gguf.add_head_count_kv (params.n_head_kv) @@ -1170,13 +1223,14 @@ def close(self) -> None: @staticmethod def write_vocab_only( fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, - endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, + endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata = None, ) -> None: check_vocab_size(params, vocab, pad_vocab=pad_vocab) of = OutputFile(fname_out, endianess=endianess) # meta data + of.add_meta_model(params, metadata) of.add_meta_arch(params) of.add_meta_vocab(vocab) of.add_meta_special_vocab(svocab) @@ -1203,12 +1257,14 @@ def write_all( fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, + metadata: Metadata = None, ) -> None: check_vocab_size(params, vocab, pad_vocab=pad_vocab) of = OutputFile(fname_out, endianess=endianess) # meta data + of.add_meta_model(params, metadata) of.add_meta_arch(params) if isinstance(vocab, Vocab): of.add_meta_vocab(vocab) @@ -1244,6 +1300,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT raise ValueError(f"Unexpected combination of types: {name_to_type}") +def model_parameter_count(model: LazyModel) -> int: + total_model_parameters = 0 + for i, (name, lazy_tensor) in enumerate(model.items()): + sum_weights_in_tensor = 1 + for dim in lazy_tensor.shape: + sum_weights_in_tensor *= dim + total_model_parameters += sum_weights_in_tensor + return total_model_parameters + + +def model_parameter_count_rounded_notation(model_params_count: int) -> str: + if model_params_count > 1e12 : + # Trillions Of Parameters + scaled_model_params = model_params_count * 1e-12 + scale_suffix = "T" + elif model_params_count > 1e9 : + # Billions Of Parameters + scaled_model_params = model_params_count * 1e-9 + scale_suffix = "B" + elif model_params_count > 1e6 : + # Millions Of Parameters + scaled_model_params = model_params_count * 1e-6 + scale_suffix = "M" + else: + # Thousands Of Parameters + scaled_model_params = model_params_count * 1e-3 + scale_suffix = "K" + + return f"{round(scaled_model_params)}{scale_suffix}" + + def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel: return {name: tensor.astype(output_type.type_for_tensor(name, tensor)) for (name, tensor) in model.items()} @@ -1423,13 +1510,26 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> return vocab, special_vocab -def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path: - namestr = { - GGMLFileType.AllF32: "f32", - GGMLFileType.MostlyF16: "f16", - GGMLFileType.MostlyQ8_0:"q8_0", +def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path: + quantization = { + GGMLFileType.AllF32: "F32", + GGMLFileType.MostlyF16: "F16", + GGMLFileType.MostlyQ8_0: "Q8_0", }[file_type] - ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf" + + parameters = model_parameter_count_rounded_notation(model_params_count) + + version = "" + if metadata is not None and metadata.version is not None: + version = f"-{metadata.version}" + + name = "ggml-model" + if metadata is not None and metadata.name is not None: + name = metadata.name + elif params.path_model is not None: + name = params.path_model.name + + ret = model_paths[0].parent / f"{name}{version}-{parameters}-{quantization}.gguf" if ret in model_paths: sys.stderr.write( f"Error: Default output path ({ret}) would overwrite the input. " @@ -1466,8 +1566,12 @@ def main(args_in: list[str] | None = None) -> None: parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine") parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides") parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing") + parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file") args = parser.parse_args(args_in) + + metadata = Metadata.load(args.metadata) + if args.no_vocab and args.vocab_only: raise ValueError("--vocab-only does not make sense with --no-vocab") @@ -1481,6 +1585,9 @@ def main(args_in: list[str] | None = None) -> None: else: model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None) + model_params_count = model_parameter_count(model_plus.model) + print(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})") + if args.dump: do_dump_model(model_plus) return @@ -1520,7 +1627,7 @@ def main(args_in: list[str] | None = None) -> None: raise ValueError("need --outfile if using --vocab-only") outfile = args.outfile OutputFile.write_vocab_only(outfile, params, vocab, special_vocab, - endianess=endianess, pad_vocab=args.pad_vocab) + endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata) print(f"Wrote {outfile}") return @@ -1528,19 +1635,22 @@ def main(args_in: list[str] | None = None) -> None: vocab = model_plus.vocab print(f"Vocab info: {vocab}") - print(f"Special vocab info: {special_vocab}") + special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent, + load_merges = True, + n_vocab = vocab.vocab_size) + print(f"Special vocab info: {special_vocab}") model = model_plus.model model = convert_model_names(model, params, args.skip_unknown) ftype = pick_output_type(model, args.outtype) model = convert_to_output_type(model, ftype) - outfile = args.outfile or default_outfile(model_plus.paths, ftype) + outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata) params.ftype = ftype print(f"Writing {outfile}, format {ftype}") OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, - concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab) + concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata) print(f"Wrote {outfile}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 5214764a9ea98e..c44d8abeb857fe 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -24,6 +24,7 @@ class General: ALIGNMENT = "general.alignment" NAME = "general.name" AUTHOR = "general.author" + VERSION = "general.version" URL = "general.url" DESCRIPTION = "general.description" LICENSE = "general.license" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 2ae6c814b52de1..f4c4407667bda0 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -296,6 +296,9 @@ def add_architecture(self) -> None: def add_author(self, author: str) -> None: self.add_string(Keys.General.AUTHOR, author) + def add_version(self, version: str) -> None: + self.add_string(Keys.General.VERSION, version) + def add_tensor_data_layout(self, layout: str) -> None: self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) @@ -305,6 +308,9 @@ def add_url(self, url: str) -> None: def add_description(self, description: str) -> None: self.add_string(Keys.General.DESCRIPTION, description) + def add_licence(self, licence: str) -> None: + self.add_string(Keys.General.LICENSE, licence) + def add_source_url(self, url: str) -> None: self.add_string(Keys.General.SOURCE_URL, url) diff --git a/llama.cpp b/llama.cpp index 9a1c11043b94ad..217726184879f1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -261,6 +261,7 @@ enum llm_kv { LLM_KV_GENERAL_ALIGNMENT, LLM_KV_GENERAL_NAME, LLM_KV_GENERAL_AUTHOR, + LLM_KV_GENERAL_VERSION, LLM_KV_GENERAL_URL, LLM_KV_GENERAL_DESCRIPTION, LLM_KV_GENERAL_LICENSE, @@ -330,6 +331,7 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, { LLM_KV_GENERAL_NAME, "general.name" }, { LLM_KV_GENERAL_AUTHOR, "general.author" }, + { LLM_KV_GENERAL_VERSION, "general.version" }, { LLM_KV_GENERAL_URL, "general.url" }, { LLM_KV_GENERAL_DESCRIPTION, "general.description" }, { LLM_KV_GENERAL_LICENSE, "general.license" },