From 9c21c7cf0fc4f085c2996bdafc06a4bec5833be5 Mon Sep 17 00:00:00 2001 From: Jude Niroshan Date: Wed, 2 Oct 2024 20:55:46 +0200 Subject: [PATCH] add missing mandatory parameters for generate_data --- pipeline.yaml | 3 ++- sdg/components.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pipeline.yaml b/pipeline.yaml index 3d86a0d..aa013e8 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -1221,7 +1221,8 @@ deploymentSpec: \ `empty`\n # it allows generating from the whole repo, see:\n # https://github.com/instructlab/sdg/blob/c6a9e74a1618b1077cd38e713b8aaed8b7c0c8ce/src/instructlab/sdg/utils/taxonomy.py#L230\n\ \ generate_data(\n client=client,\n num_instructions_to_generate=num_instructions_to_generate,\n\ \ output_dir=sdg.path,\n taxonomy=taxonomy.path,\n \ - \ taxonomy_base=taxonomy_base,\n model_name=model,\n )\n\n" + \ taxonomy_base=taxonomy_base,\n model_name=model,\n chunk_word_count=1000,\n\ + \ server_ctx_size=4096\n )\n\n" image: quay.io/tcoufal/ilab-sdg:latest pipelineInfo: description: InstructLab pipeline diff --git a/sdg/components.py b/sdg/components.py index d7ece52..caa1e45 100644 --- a/sdg/components.py +++ b/sdg/components.py @@ -60,4 +60,6 @@ def sdg_op( taxonomy=taxonomy.path, taxonomy_base=taxonomy_base, model_name=model, + chunk_word_count=1000, + server_ctx_size=4096, )