diff --git a/docs/my-website/docs/tutorials/compare_llms.md b/docs/my-website/docs/tutorials/compare_llms.md new file mode 100644 index 000000000..c4c73b174 --- /dev/null +++ b/docs/my-website/docs/tutorials/compare_llms.md @@ -0,0 +1,123 @@ +import Image from '@theme/IdealImage'; + +# Comparing LLMs on a Test Set using LiteLLM + + +
+ +LiteLLM allows you to use any LLM as a drop in replacement for +`gpt-3.5-turbo` + +This notebook walks through how you can compare GPT-4 vs Claude-2 on a +given test set using litellm + +## Output at the end of this tutorial: + +

+ +
+ +
+ +``` python +!pip install litellm +``` + +
+ +
+ +``` python +from litellm import completion +import litellm + +# init your test set questions +questions = [ + "how do i call completion() using LiteLLM", + "does LiteLLM support VertexAI", + "how do I set my keys on replicate llama2?", +] + + +# set your prompt +prompt = """ +You are a coding assistant helping users using litellm. +litellm is a light package to simplify calling OpenAI, Azure, Cohere, Anthropic, Huggingface API Endpoints. It manages: + +""" +``` + +
+ +
+ +``` python +import os +os.environ['OPENAI_API_KEY'] = "" +os.environ['ANTHROPIC_API_KEY'] = "" +``` + +
+ +
+ +
+ +
+ +## Calling gpt-3.5-turbo and claude-2 on the same questions + +## LiteLLM `completion()` allows you to call all LLMs in the same format + +
+ +
+ +``` python +results = [] # for storing results + +models = ['gpt-3.5-turbo', 'claude-2'] # define what models you're testing, see: https://docs.litellm.ai/docs/completion/supported +for question in questions: + row = [question] + for model in models: + print("Calling:", model, "question:", question) + response = completion( # using litellm.completion + model=model, + messages=[ + {'role': 'system', 'content': prompt}, + {'role': 'user', 'content': question} + ] + ) + answer = response.choices[0].message['content'] + row.append(answer) + print(print("Calling:", model, "answer:", answer)) + + results.append(row) # save results + +``` + +
+ +
+ +## Visualizing Results + +
+ +
+ +``` python +# Create a table to visualize results +import pandas as pd + +columns = ['Question'] + models +df = pd.DataFrame(results, columns=columns) + +df +``` +## Output Table + + +
diff --git a/docs/my-website/img/compare_llms.png b/docs/my-website/img/compare_llms.png new file mode 100644 index 000000000..704489b03 Binary files /dev/null and b/docs/my-website/img/compare_llms.png differ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 764b01522..78ad0e180 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -51,7 +51,7 @@ const sidebars = { items: [ 'tutorials/huggingface_tutorial', 'tutorials/TogetherAI_liteLLM', - 'tutorials/fallbacks', + 'tutorials/compare_llms', 'tutorials/finetuned_chat_gpt', 'tutorials/text_completion', 'tutorials/litellm_Test_Multiple_Providers',