From 05bcdb8688530bb5ed58ad0377dbeb507f0ebab8 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 9 Nov 2023 15:42:12 -0800 Subject: [PATCH] (docs) add tutorial on using litellm with autoeval --- docs/my-website/docs/tutorials/eval_suites.md | 67 +++++++++++++++++++ docs/my-website/sidebars.js | 3 +- 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 docs/my-website/docs/tutorials/eval_suites.md diff --git a/docs/my-website/docs/tutorials/eval_suites.md b/docs/my-website/docs/tutorials/eval_suites.md new file mode 100644 index 0000000000..58ca93c571 --- /dev/null +++ b/docs/my-website/docs/tutorials/eval_suites.md @@ -0,0 +1,67 @@ +# Evaluate LLMs - Auto Eval, MlFlow + +## Using LiteLLM with AutoEval +AutoEvals is a tool for quickly and easily evaluating AI model outputs using best practices. +https://github.com/braintrustdata/autoevals + +## Pre Requisites +```shell +pip install litellm +``` +```shell +pip install autoevals +``` + +### Quick Start +```python +from autoevals.llm import * +import autoevals + +# litellm completion call +import litellm +question = "which country has the highest population" +response = litellm.completion( + model = "gpt-3.5-turbo", + messages = [ + { + "role": "user", + "content": question + } + ], +) + +# use the auto eval Factuality() evaluator +evaluator = Factuality() +openai.api_key = "" # set your openai api key for evaluator +result = evaluator( + output=response.choices[0]["message"]["content"], # response from litellm.completion() + expected="India", # expected output + input=question # question passed to litellm.completion +) + +print(result) +``` + +#### Output of Evaluation - from AutoEvals +```shell +Score( + name='Factuality', + score=0, + metadata= + {'rationale': "The expert answer is 'India'.\nThe submitted answer is 'As of 2021, China has the highest population in the world with an estimated 1.4 billion people.'\nThe submitted answer mentions China as the country with the highest population, while the expert answer mentions India.\nThere is a disagreement between the submitted answer and the expert answer.", + 'choice': 'D' + }, + error=None +) +``` + + + + + + + + + + + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index b85b3970b2..efecfda7aa 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -94,9 +94,10 @@ const sidebars = { label: 'Tutorials', items: [ 'tutorials/azure_openai', + "tutorials/lm_evaluation_harness", + "tutorials/eval_suites", 'tutorials/oobabooga', "tutorials/gradio_integration", - "tutorials/lm_evaluation_harness", 'tutorials/huggingface_codellama', 'tutorials/huggingface_tutorial', 'tutorials/TogetherAI_liteLLM',