diff --git a/convert.py b/convert.py index 9110f15806c6b..1c30df6569b02 100755 --- a/convert.py +++ b/convert.py @@ -250,9 +250,14 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params: if config.get("rope_theta") == 1000000: # CodeLlama n_ctx = 16384 - elif config["norm_eps"] == 1e-05: + elif config["norm_eps"] in (1e-05, 1e-06): # LLaMA v2 n_ctx = 4096 + # For some reason FB writes -1 to vocab size for their LLAMA2 models + # simply remove this bogus value and let the return statement belo + # figure it out + if config["vocab_size"] == -1: + del config["vocab_size"] else: # LLaMA v1 n_ctx = 2048