-
Notifications
You must be signed in to change notification settings - Fork 89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
js[patch]: simple evaluator args #1264
Changes from all commits
d076dce
d8cbab7
0335961
9f6771a
637fd6e
355ced5
f9d8632
f21cffc
9662c07
55a8aa6
fade984
40c12dd
973ba3d
8d3bbdc
1b62e2f
cf81392
b0bde01
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,16 +69,29 @@ async function loadTraces( | |
return results; | ||
} | ||
|
||
/** @deprecated Use ComparativeEvaluatorNew instead: (args: { runs, example, inputs, outputs, referenceOutputs }) => ... */ | ||
export type _ComparativeEvaluatorLegacy = ( | ||
runs: Run[], | ||
example: Example | ||
) => ComparisonEvaluationResultRow | Promise<ComparisonEvaluationResultRow>; | ||
|
||
export type _ComparativeEvaluator = (args: { | ||
runs?: Run[]; | ||
example?: Example; | ||
inputs?: Record<string, any>; | ||
outputs?: Record<string, any>[]; | ||
referenceOutputs?: Record<string, any>; | ||
}) => ComparisonEvaluationResultRow | Promise<ComparisonEvaluationResultRow>; | ||
|
||
export type ComparativeEvaluator = | ||
| _ComparativeEvaluatorLegacy | ||
| _ComparativeEvaluator; | ||
|
||
export interface EvaluateComparativeOptions { | ||
/** | ||
* A list of evaluators to use for comparative evaluation. | ||
*/ | ||
evaluators: Array< | ||
( | ||
runs: Run[], | ||
example: Example | ||
) => ComparisonEvaluationResultRow | Promise<ComparisonEvaluationResultRow> | ||
>; | ||
evaluators: Array<ComparativeEvaluator>; | ||
/** | ||
* Randomize the order of outputs for each evaluation | ||
* @default false | ||
|
@@ -306,16 +319,20 @@ export async function evaluateComparative( | |
async function evaluateAndSubmitFeedback( | ||
runs: Run[], | ||
example: Example, | ||
evaluator: ( | ||
runs: Run[], | ||
example: Example | ||
) => ComparisonEvaluationResultRow | Promise<ComparisonEvaluationResultRow> | ||
evaluator: ComparativeEvaluator | ||
) { | ||
const expectedRunIds = new Set(runs.map((r) => r.id)); | ||
const result = await evaluator( | ||
options.randomizeOrder ? shuffle(runs) : runs, | ||
example | ||
); | ||
// Check if evaluator expects an object parameter | ||
const result = | ||
evaluator.length === 1 | ||
? await (evaluator as _ComparativeEvaluator)({ | ||
runs: options.randomizeOrder ? shuffle(runs) : runs, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth supporting evaluator(runs)? |
||
example, | ||
inputs: example.inputs, | ||
outputs: runs.map((run) => run.outputs || {}), | ||
referenceOutputs: example.outputs || {}, | ||
}) | ||
: await (evaluator as _ComparativeEvaluatorLegacy)(runs, example); | ||
|
||
for (const [runId, score] of Object.entries(result.scores)) { | ||
// validate if the run id | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably also update the types for |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,7 +95,21 @@ export type RunEvaluatorLike = | |
run: Run, | ||
example?: Example | ||
) => Promise<EvaluationResult | EvaluationResults>) | ||
| ((run: Run, example?: Example) => EvaluationResult | EvaluationResults); | ||
| ((run: Run, example?: Example) => EvaluationResult | EvaluationResults) | ||
| ((args: { | ||
run?: Run; | ||
example?: Example; | ||
inputs?: Record<string, any>; | ||
outputs?: Record<string, any>; | ||
referenceOutputs?: Record<string, any>; | ||
}) => EvaluationResult | EvaluationResults) | ||
| ((args: { | ||
run?: Run; | ||
example?: Example; | ||
inputs?: Record<string, any>; | ||
outputs?: Record<string, any>; | ||
referenceOutputs?: Record<string, any>; | ||
}) => Promise<EvaluationResult | EvaluationResults>); | ||
|
||
/** | ||
* Wraps an evaluator function + implements the RunEvaluator interface. | ||
|
@@ -110,7 +124,18 @@ export class DynamicRunEvaluator<Func extends (...args: any[]) => any> | |
langSmithRunAndExample: { run: Run; example: Example }; | ||
}) => { | ||
const { run, example } = input.langSmithRunAndExample; | ||
return evaluator(run, example); | ||
|
||
return evaluator( | ||
{ | ||
...run, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we just do the arity check for both of these? Is there a world where there are some crazies out there declaring evaluators like this: async (run) => {
} I would prefer the shim in both places unless we want to drop support for the two arg syntax in the long run, in which case we should deprecate the other signature: export type RunEvaluatorLike =
/** @deprecated NOTE */
| ((
run: Run,
example?: Example
) => Promise<EvaluationResult | EvaluationResults>)
/** @deprecated NOTE */
| ((run: Run, example?: Example) => EvaluationResult | EvaluationResults)
| ((args: {
run?: Run;
example?: Example;
inputs?: Record<string, any>;
outputs?: Record<string, any>;
referenceOutputs?: Record<string, any>;
}) => EvaluationResult | EvaluationResults)
| ((args: {
run?: Run;
example?: Example;
inputs?: Record<string, any>;
outputs?: Record<string, any>;
referenceOutputs?: Record<string, any>;
}) => Promise<EvaluationResult | EvaluationResults>); |
||
run, | ||
example, | ||
inputs: example?.inputs, | ||
outputs: run?.outputs, | ||
referenceOutputs: example?.outputs, | ||
}, | ||
example | ||
); | ||
}) as Func; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't need the
Promise.resolve
, more standard to just make the outside functionasync
: