import * as ls from "langsmith/vitest";
import { myLLMApp } from "./app";
ls.describe("LLM Application Tests", () => {
ls.test(
"Should answer general knowledge questions",
{
inputs: { query: "What is the capital of France?" },
referenceOutputs: { answer: "Paris" },
},
async ({ inputs, referenceOutputs }) => {
const response = await myLLMApp(inputs.query);
ls.expect(response.answer).toBeDefined();
ls.logFeedback({
key: "answer_length",
score: response.answer.length,
});
return { answer: response.answer };
}
);
ls.test(
"Should use evaluator matcher",
{
inputs: { query: "Tell me about AI" },
referenceOutputs: { answer: "AI is artificial intelligence" },
},
async ({ inputs, referenceOutputs }) => {
const response = await myLLMApp(inputs.query);
const relevanceEvaluator = async ({ outputs, referenceOutputs }) => {
// Your relevance logic
return {
key: "relevance",
score: 0.8,
};
};
await ls.expect(response)
.evaluatedBy(relevanceEvaluator)
.toBeGreaterThan(0.7);
return response;
}
);
ls.test.each([
{
inputs: { query: "What is 2+2?" },
referenceOutputs: { answer: "4" },
},
{
inputs: { query: "What is 3+3?" },
referenceOutputs: { answer: "6" },
},
])(
"Should handle math questions",
async ({ inputs, referenceOutputs }) => {
const response = await myLLMApp(inputs.query);
await ls.expect(response.answer)
.toBeRelativeCloseTo(referenceOutputs.answer, { threshold: 0.9 });
return response;
}
);
});