diff --git a/1_python/1_llm-prediction/parameters.md b/1_python/1_llm-prediction/parameters.md index c29cff4..deff03b 100644 --- a/1_python/1_llm-prediction/parameters.md +++ b/1_python/1_llm-prediction/parameters.md @@ -23,7 +23,7 @@ Set inference-time parameters such as `temperature`, `maxTokens`, `topP` and mor ".complete()": language: python code: | - result = model.respond(chat, config={ + result = model.complete(chat, config={ "temperature": 0.6, "maxTokens": 50, "stop": ["\n\n"], @@ -51,9 +51,9 @@ The `.model()` retrieves a handle to a model that has already been loaded, or lo language: python code: | import lmstudio as lms - model = lms.llm("qwen2.5-7b-instruct", config = { - contextLength: 8192, - gpuOffload: 0.5, + model = lms.llm("qwen2.5-7b-instruct", config={ + "contextLength": 8192, + "gpuOffload": 0.5, }) "Python (scoped resource API)": @@ -63,10 +63,11 @@ The `.model()` retrieves a handle to a model that has already been loaded, or lo with lms.Client() as client: model = client.llm.model( "qwen2.5-7b-instruct", - config = { - contextLength: 8192, - gpuOffload: 0.5, - }) + config={ + "contextLength": 8192, + "gpuOffload": 0.5, + } + ) ``` @@ -83,9 +84,9 @@ The `.load_new_instance()` method creates a new model instance and loads it with code: | import lmstudio as lms client = lms.get_default_client() - model = client.llm.load_new_instance("qwen2.5-7b-instruct", config = { - contextLength: 8192, - gpuOffload: 0.5, + model = client.llm.load_new_instance("qwen2.5-7b-instruct", config={ + "contextLength": 8192, + "gpuOffload": 0.5, }) "Python (scoped resource API)": @@ -95,10 +96,11 @@ The `.load_new_instance()` method creates a new model instance and loads it with with lms.Client() as client: model = client.llm.load_new_instance( "qwen2.5-7b-instruct", - config = { - contextLength: 8192, - gpuOffload: 0.5, - }) + config={ + "contextLength": 8192, + "gpuOffload": 0.5, + } + ) ``` diff --git a/1_python/2_agent/act.md b/1_python/2_agent/act.md index 1c84076..01cc34c 100644 --- a/1_python/2_agent/act.md +++ b/1_python/2_agent/act.md @@ -99,7 +99,7 @@ The following code creates a conversation loop with an LLM agent that can create return "Error: {exc!r}" return "File created." - def print_content(fragment): + def print_fragment(fragment, *args): print(fragment.content, end="", flush=True) model = lms.llm() @@ -117,8 +117,9 @@ The following code creates a conversation loop with an LLM agent that can create print("Bot: ", end="", flush=True) model.act( chat, + [create_file], on_message=chat.append, - on_fragment=print_fragment, + on_prediction_fragment=print_fragment, ) print() diff --git a/1_python/3_embedding/index.md b/1_python/3_embedding/index.md index 6ddecbb..d0c55fb 100644 --- a/1_python/3_embedding/index.md +++ b/1_python/3_embedding/index.md @@ -26,7 +26,7 @@ To convert a string to a vector representation, pass it to the `embed` method on code: | import lmstudio as lms - model = lms.embedding.model("nomic-embed-text-v1.5") + model = lms.embedding_model("nomic-embed-text-v1.5") embedding = model.embed("Hello, world!") diff --git a/1_python/4_tokenization/index.md b/1_python/4_tokenization/index.md index a16b624..1ed3f8b 100644 --- a/1_python/4_tokenization/index.md +++ b/1_python/4_tokenization/index.md @@ -19,7 +19,7 @@ You can tokenize a string with a loaded LLM or embedding model using the SDK. In model = lms.llm() - tokens = llm.tokenize("Hello, world!") + tokens = model.tokenize("Hello, world!") print(tokens) # Array of token IDs. ``` @@ -33,7 +33,7 @@ If you only care about the number of tokens, simply check the length of the resu "Python (convenience API)": language: python code: | - token_count = len(llm.tokenize("Hello, world!")) + token_count = len(model.tokenize("Hello, world!")) print("Token count:", token_count) ``` @@ -71,7 +71,7 @@ You can determine if a given conversation fits into a model's context by doing t ] }) - print("Fits", does_chat_fit_in_context(model, chat)) + print("Fits in context:", does_chat_fit_in_context(model, chat)) ``` diff --git a/1_python/5_manage-models/list-downloaded.md b/1_python/5_manage-models/list-downloaded.md index f8c0fda..c69f1e1 100644 --- a/1_python/5_manage-models/list-downloaded.md +++ b/1_python/5_manage-models/list-downloaded.md @@ -21,7 +21,7 @@ downloaded model reference to be converted in the full SDK handle for a loaded m llm_only = lms.list_downloaded_models("llm") embedding_only = lms.list_downloaded_models("embedding") - for model in downloaded_models: + for model in downloaded: print(model) "Python (scoped resource API)": @@ -34,7 +34,7 @@ downloaded model reference to be converted in the full SDK handle for a loaded m llm_only = client.llm.list_downloaded() embedding_only = client.embedding.list_downloaded() - for model in downloaded_models: + for model in downloaded: print(model) ``` diff --git a/1_python/5_manage-models/list-loaded.md b/1_python/5_manage-models/list-loaded.md index a2699cc..0f71c92 100644 --- a/1_python/5_manage-models/list-loaded.md +++ b/1_python/5_manage-models/list-loaded.md @@ -23,6 +23,8 @@ This will give you results equivalent to using [`lms ps`](../../cli/ps) in the C llm_only = lms.list_loaded_models("llm") embedding_only = lms.list_loaded_models("embedding") + print(all_loaded_models) + Python (scoped resource API): language: python code: | @@ -33,4 +35,6 @@ This will give you results equivalent to using [`lms ps`](../../cli/ps) in the C llm_only = client.llm.list_loaded() embedding_only = client.embedding.list_loaded() + print(all_loaded_models) + ``` diff --git a/1_python/5_manage-models/loading.md b/1_python/5_manage-models/loading.md index ca09f07..871b178 100644 --- a/1_python/5_manage-models/loading.md +++ b/1_python/5_manage-models/loading.md @@ -86,7 +86,7 @@ This allows you to have multiple instances of the same or different models loade code: | import lmstudio as lms - client = lms.get_default_client + client = lms.get_default_client() llama = client.llm.load_new_instance("llama-3.2-1b-instruct") another_llama = client.llm.load_new_instance("llama-3.2-1b-instruct", "second-llama") diff --git a/1_python/6_model-info/get-context-length.md b/1_python/6_model-info/get-context-length.md index 9df723e..4590a76 100644 --- a/1_python/6_model-info/get-context-length.md +++ b/1_python/6_model-info/get-context-length.md @@ -54,6 +54,6 @@ You can determine if a given conversation fits into a model's context by doing t ] }) - print("Fits", does_chat_fit_in_context(model, chat)) + print("Fits in context:", does_chat_fit_in_context(model, chat)) ```