import sys, os import traceback from dotenv import load_dotenv load_dotenv() import litellm from litellm import embedding, completion, completion_cost from autoevals.llm import * ################### import litellm # litellm completion call question = "which country has the highest population" response = litellm.completion( model="gpt-3.5-turbo", messages=[{"role": "user", "content": question}], ) print(response) # use the auto eval Factuality() evaluator print("calling evaluator") evaluator = Factuality() result = evaluator( output=response.choices[0]["message"][ "content" ], # response from litellm.completion() expected="India", # expected output input=question, # question passed to litellm.completion ) print(result)