From 08267af836c55a945abf93677a478b0c0dd25c52 Mon Sep 17 00:00:00 2001
From: Samuel Bushi <ssbushi@google.com>
Date: Tue, 4 Feb 2025 02:20:48 +0000
Subject: [PATCH 1/3] fix: eval plugin docs

---
 docs/plugin-authoring-evaluator.md | 34 ++++++++++++++----------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/docs/plugin-authoring-evaluator.md b/docs/plugin-authoring-evaluator.md
index 6b8c58172..9ee8d218b 100644
--- a/docs/plugin-authoring-evaluator.md
+++ b/docs/plugin-authoring-evaluator.md
@@ -61,23 +61,22 @@ function getDeliciousnessPrompt(ai: Genkit) {
       output: {
         schema: DeliciousnessDetectionResponseSchema,
       }
-    },
-    `You are a food critic. Assess whether the provided output sounds delicious, giving only "yes" (delicious), "no" (not delicious), or "maybe" (undecided) as the verdict.
+      prompt: `You are a food critic. Assess whether the provided output sounds delicious, giving only "yes" (delicious), "no" (not delicious), or "maybe" (undecided) as the verdict.
 
-    Examples:
-    Output: Chicken parm sandwich
-    Response: { "reason": "A classic and beloved dish.", "verdict": "yes" }
+      Examples:
+      Output: Chicken parm sandwich
+      Response: { "reason": "A classic and beloved dish.", "verdict": "yes" }
 
-    Output: Boston Logan Airport tarmac
-    Response: { "reason": "Not edible.", "verdict": "no" }
+      Output: Boston Logan Airport tarmac
+      Response: { "reason": "Not edible.", "verdict": "no" }
 
-    Output: A juicy piece of gossip
-    Response: { "reason": "Metaphorically 'tasty' but not food.", "verdict": "maybe" }
+      Output: A juicy piece of gossip
+      Response: { "reason": "Metaphorically 'tasty' but not food.", "verdict": "maybe" }
 
-    New Output: {% verbatim %}{{ responseToTest }} {% endverbatim %}
-    Response:
-    `
-  );
+      New Output: {% verbatim %}{{ responseToTest }} {% endverbatim %}
+      Response:
+      `
+  });
 }
 ```
 
@@ -91,7 +90,7 @@ responsibility of the evaluator to validate that all fields required for
 evaluation are present.
 
 ```ts
-import { ModelArgument, z } from 'genkit';
+import { ModelArgument } from 'genkit';
 import { BaseEvalDataPoint, Score } from 'genkit/evaluator';
 
 /**
@@ -100,6 +99,7 @@ import { BaseEvalDataPoint, Score } from 'genkit/evaluator';
 export async function deliciousnessScore<
   CustomModelOptions extends z.ZodTypeAny,
 >(
+  ai: Genkit,
   judgeLlm: ModelArgument<CustomModelOptions>,
   dataPoint: BaseEvalDataPoint,
   judgeConfig?: CustomModelOptions
@@ -141,8 +141,7 @@ export async function deliciousnessScore<
 The final step is to write a function that defines the `EvaluatorAction`.
 
 ```ts
-import { Genkit, z } from 'genkit';
-import { BaseEvalDataPoint, EvaluatorAction } from 'genkit/evaluator';
+import { EvaluatorAction } from 'genkit/evaluator';
 
 /**
  * Create the Deliciousness evaluator action.
@@ -162,7 +161,7 @@ export function createDeliciousnessEvaluator<
       isBilled: true,
     },
     async (datapoint: BaseEvalDataPoint) => {
-      const score = await deliciousnessScore(judge, datapoint, judgeConfig);
+      const score = await deliciousnessScore(ai, judge, datapoint, judgeConfig);
       return {
         testCaseId: datapoint.testCaseId,
         evaluation: score,
@@ -245,7 +244,6 @@ As with the LLM-based evaluator, define the scoring function. In this case,
 the scoring function does not need a judge LLM.
 
 ```ts
-import { EvalResponses } from 'genkit';
 import { BaseEvalDataPoint, Score } from 'genkit/evaluator';
 
 const US_PHONE_REGEX =

From feda2a1c84d4b1090a81a07157dd077bde5432c3 Mon Sep 17 00:00:00 2001
From: ssbushi <66321939+ssbushi@users.noreply.github.com>
Date: Tue, 4 Feb 2025 19:25:29 +0000
Subject: [PATCH 2/3] fix: evaluation.md (#1817)

---
 docs/evaluation.md | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/docs/evaluation.md b/docs/evaluation.md
index 385a6da04..74a378178 100644
--- a/docs/evaluation.md
+++ b/docs/evaluation.md
@@ -42,8 +42,8 @@ This section explains how to perform inference-based evaluation using Genkit.
 
 ### Setup
 <ol>
-<li>Use an existing Genkit app or create a new one by following our [Getting
-started](get-started) guide.</li>
+<li>Use an existing Genkit app or create a new one by following our [Get 
+started](get-started.md) guide.</li>
 <li>Add the following code to define a simple RAG application to evaluate. For
 this guide, we use a dummy retriever that always returns the same documents.
 
@@ -52,7 +52,6 @@ import { genkit, z, Document } from "genkit";
 import {
   googleAI,
   gemini15Flash,
-  gemini15Pro,
 } from "@genkit-ai/googleai";
 
 // Initialize Genkit
@@ -164,7 +163,7 @@ to open the Datasets page.
     c. Repeat steps (a) and (b) a couple more times to add more examples. This
     guide adds the following example inputs to the dataset:
 
-    ```
+    ```none {:.devsite-disable-click-to-copy}
     "Can I give milk to my cats?"
     "From which animals did dogs evolve?"
     ```
@@ -174,8 +173,8 @@ to open the Datasets page.
 
 ### Run evaluation and view results
 
-To start evaluating the flow, click the `Evaluations` tab in the Dev UI and
-click the **Run new evaluation** button to get started.
+To start evaluating the flow, click the **Run new evaluation** button on your
+dataset page. You can also start a new evaluation from the `Evaluations` tab.
 
 1. Select the `Flow` radio button to evaluate a flow.
 
@@ -234,7 +233,7 @@ and is only enforced if a schema is specified on the target flow.
   control for advanced use cases (e.g.  providing model parameters, message
   history, tools, etc). You can find the full schema for `GenerateRequest` in
   our [API reference
-  docs](https://js.api.genkit.dev/interfaces/genkit._.GenerateRequest.html).
+  docs](https://js.api.genkit.dev/interfaces/genkit._.GenerateRequest.html){: .external}.
 
 Note: Schema validation is a helper tool for editing examples, but it is
 possible to save an example with invalid schema. These examples may fail when
@@ -245,7 +244,7 @@ the running an evaluation.
 ### Genkit evaluators
 
 Genkit includes a small number of native evaluators, inspired by
-[RAGAS](https://docs.ragas.io/en/stable/), to help you get started:
+[RAGAS](https://docs.ragas.io/en/stable/){: .external}, to help you get started:
 
 *   Faithfulness -- Measures the factual consistency of the generated answer
 against the given context
@@ -257,7 +256,7 @@ harm, or exploit
 ### Evaluator plugins
 
 Genkit supports additional evaluators through plugins, like the Vertex Rapid
-Evaluators, which you access via the [VertexAI
+Evaluators, which you can access via the [VertexAI
 Plugin](./plugins/vertex-ai#evaluators).
 
 ## Advanced use
@@ -309,7 +308,7 @@ field and an optional `reference` field, like below:
 If your flow requires auth, you may specify it using the `--auth` argument:
 
 ```posix-terminal
-genkit eval:flow qaFlow --input testInputs.json --auth "{\"email_verified\": true}"
+genkit eval:flow qaFlow --input testInputs.json --auth '{"auth":{"email_verified":true}}'
 ```
 
 By default, the `eval:flow` and `eval:run` commands use all available metrics
@@ -317,7 +316,7 @@ for evaluation. To run on a subset of the configured evaluators, use the
 `--evaluators` flag and provide a comma-separated list of evaluators by name:
 
 ```posix-terminal
-genkit eval:flow qaFlow --input testInputs.json --evaluators=genkit/faithfulness,genkit/answer_relevancy
+genkit eval:flow qaFlow --input testInputs.json --evaluators=genkitEval/maliciousness,genkitEval/answer_relevancy
 ```
 You can view the results of your evaluation run in the Dev UI at
 `localhost:4000/evaluate`.
@@ -385,6 +384,8 @@ First, as a preparatory step, introduce an auxilary step in our `qaFlow`
 example:
 
 ```js
+import { run } from '@genkit-ai/core';
+
 export const qaFlow = ai.defineFlow({
     name: 'qaFlow',
     inputSchema: z.string(),
@@ -409,7 +410,7 @@ export const qaFlow = ai.defineFlow({
     const llmResponse = await ai.generate({
       model: gemini15Flash,
       prompt: `Answer this question with the given context ${query}`,
-      docs: factDocs,
+      docs: factDocsModified,
     });
     return llmResponse.text;
   }
@@ -483,7 +484,8 @@ Here is an example flow that uses a PDF file to generate potential user
 questions.
 
 ```ts
-import { genkit, run, z } from "genkit";
+import { genkit, z } from "genkit";
+import { run } from "@genkit-ai/core";
 import { googleAI, gemini15Flash } from "@genkit-ai/googleai";
 import { chunk } from "llm-chunk"; // npm i llm-chunk
 import path from "path";

From cc7e2dad7d98c65d34f4ab5af6aae6ec49ba4ae4 Mon Sep 17 00:00:00 2001
From: Samuel Bushi <ssbushi@google.com>
Date: Tue, 4 Feb 2025 19:45:46 +0000
Subject: [PATCH 3/3] format

---
 docs/evaluation.md | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/docs/evaluation.md b/docs/evaluation.md
index 9a7ec9363..e16443553 100644
--- a/docs/evaluation.md
+++ b/docs/evaluation.md
@@ -383,8 +383,6 @@ First, as a preparatory step, introduce an auxilary step in our `qaFlow`
 example:
 
 ```js
-import { run } from '@genkit-ai/core';
-
 export const qaFlow = ai.defineFlow({
     name: 'qaFlow',
     inputSchema: z.string(),
@@ -394,9 +392,8 @@ export const qaFlow = ai.defineFlow({
     const factDocs = await ai.retrieve({
       retriever: dummyRetriever,
       query,
-      options: { k: 2 },
     });
-    const factDocsModified = await run('factModified', async () => {
+    const factDocsModified = await ai.run('factModified', async () => {
         // Let us use only facts that are considered silly. This is a 
         // hypothetical step for demo purposes, you may perform any 
         // arbitrary task inside a step and reference it in custom 
@@ -484,7 +481,6 @@ questions.
 
 ```ts
 import { genkit, z } from "genkit";
-import { run } from "@genkit-ai/core";
 import { googleAI, gemini15Flash } from "@genkit-ai/googleai";
 import { chunk } from "llm-chunk"; // npm i llm-chunk
 import path from "path";
@@ -517,9 +513,9 @@ export const synthesizeQuestions = ai.defineFlow(
   async (filePath) => {
     filePath = path.resolve(filePath);
     // `extractText` loads the PDF and extracts its contents as text.
-    const pdfTxt = await run("extract-text", () => extractText(filePath));
+    const pdfTxt = await ai.run("extract-text", () => extractText(filePath));
 
-    const chunks = await run("chunk-it", async () =>
+    const chunks = await ai.run("chunk-it", async () =>
       chunk(pdfTxt, chunkingConfig)
     );