litellm/cookbook/benchmark/eval_suites_mlflow_autoevals/auto_evals.py
2023-12-25 14:11:20 +05:30

34 lines
770 B
Python

import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import litellm
from litellm import embedding, completion, completion_cost
from autoevals.llm import *
###################
import litellm
# litellm completion call
question = "which country has the highest population"
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": question}],
)
print(response)
# use the auto eval Factuality() evaluator
print("calling evaluator")
evaluator = Factuality()
result = evaluator(
output=response.choices[0]["message"][
"content"
], # response from litellm.completion()
expected="India", # expected output
input=question, # question passed to litellm.completion
)
print(result)